devops-ai-mcp 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: devops-ai-mcp
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Devops Ai automation via MCP. Includes docker compose generator, cicd pipeline builder, log analyzer. By MEOK AI Labs.
|
|
5
|
+
Project-URL: Homepage, https://meok.ai
|
|
6
|
+
Project-URL: Repository, https://github.com/CSOAI-ORG/devops-ai-mcp
|
|
7
|
+
Author-email: MEOK AI Labs <nicholas@meok.ai>
|
|
8
|
+
License: MIT License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2026 MEOK AI Labs (meok.ai)
|
|
11
|
+
|
|
12
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
+
in the Software without restriction, including without limitation the rights
|
|
15
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
+
furnished to do so, subject to the following conditions:
|
|
18
|
+
|
|
19
|
+
The above copyright notice and this permission notice shall be included in all
|
|
20
|
+
copies or substantial portions of the Software.
|
|
21
|
+
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Keywords: ai,devops,mcp,meok
|
|
27
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
28
|
+
Classifier: Operating System :: OS Independent
|
|
29
|
+
Classifier: Programming Language :: Python :: 3
|
|
30
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
31
|
+
Requires-Python: >=3.10
|
|
32
|
+
Requires-Dist: mcp>=1.0.0
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
server.py,sha256=N26R-YF0sDslSblPtYt4mksMGb3O1ITZzTRrs0fDsOQ,29155
|
|
2
|
+
devops_ai_mcp-1.0.0.dist-info/METADATA,sha256=pI37UrzsJCuRxrz80XS9W5yM6pSUXhWG0zG8RsSSz-Q,1597
|
|
3
|
+
devops_ai_mcp-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
4
|
+
devops_ai_mcp-1.0.0.dist-info/entry_points.txt,sha256=2WjoUiGYHvEtY6QA1g8sDOlrbwy4GsQfar5kJM4uIsQ,46
|
|
5
|
+
devops_ai_mcp-1.0.0.dist-info/licenses/LICENSE,sha256=j3ubn5qaWJ2R1iHLwwnUIwaFCGnaPWGUP4rLLcmYL9k,820
|
|
6
|
+
devops_ai_mcp-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 MEOK AI Labs (meok.ai)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
server.py
ADDED
|
@@ -0,0 +1,692 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
DevOps AI MCP Server
|
|
4
|
+
=======================
|
|
5
|
+
Infrastructure and DevOps toolkit for AI agents: Docker Compose generation,
|
|
6
|
+
CI/CD pipeline building, log analysis, incident classification, and runbook generation.
|
|
7
|
+
|
|
8
|
+
By MEOK AI Labs | https://meok.ai
|
|
9
|
+
|
|
10
|
+
Install: pip install mcp
|
|
11
|
+
Run: python server.py
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import sys, os
|
|
16
|
+
sys.path.insert(0, os.path.expanduser('~/clawd/meok-labs-engine/shared'))
|
|
17
|
+
from auth_middleware import check_access
|
|
18
|
+
|
|
19
|
+
import re
|
|
20
|
+
from collections import Counter, defaultdict
|
|
21
|
+
from datetime import datetime, timedelta
|
|
22
|
+
from typing import Any, Optional
|
|
23
|
+
from mcp.server.fastmcp import FastMCP
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Rate limiting
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
FREE_DAILY_LIMIT = 30
|
|
29
|
+
_usage: dict[str, list[datetime]] = defaultdict(list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _check_rate_limit(caller: str = "anonymous") -> Optional[str]:
|
|
33
|
+
now = datetime.now()
|
|
34
|
+
cutoff = now - timedelta(days=1)
|
|
35
|
+
_usage[caller] = [t for t in _usage[caller] if t > cutoff]
|
|
36
|
+
if len(_usage[caller]) >= FREE_DAILY_LIMIT:
|
|
37
|
+
return f"Free tier limit reached ({FREE_DAILY_LIMIT}/day). Upgrade: https://mcpize.com/devops-ai-mcp/pro"
|
|
38
|
+
_usage[caller].append(now)
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Core operations
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
def _docker_compose(services: list[dict], network_name: str,
|
|
46
|
+
include_volumes: bool) -> dict:
|
|
47
|
+
"""Generate a Docker Compose configuration."""
|
|
48
|
+
if not services:
|
|
49
|
+
return {"error": "Provide at least one service"}
|
|
50
|
+
|
|
51
|
+
compose = {"version": "3.8", "services": {}, "networks": {network_name: {"driver": "bridge"}}}
|
|
52
|
+
if include_volumes:
|
|
53
|
+
compose["volumes"] = {}
|
|
54
|
+
|
|
55
|
+
for svc in services:
|
|
56
|
+
name = svc.get("name", "app")
|
|
57
|
+
image = svc.get("image", "")
|
|
58
|
+
build_context = svc.get("build", "")
|
|
59
|
+
ports = svc.get("ports", [])
|
|
60
|
+
env_vars = svc.get("environment", {})
|
|
61
|
+
depends = svc.get("depends_on", [])
|
|
62
|
+
replicas = svc.get("replicas", 1)
|
|
63
|
+
health_check = svc.get("health_check", "")
|
|
64
|
+
|
|
65
|
+
service_def = {"networks": [network_name], "restart": "unless-stopped"}
|
|
66
|
+
|
|
67
|
+
if build_context:
|
|
68
|
+
service_def["build"] = {"context": build_context, "dockerfile": "Dockerfile"}
|
|
69
|
+
elif image:
|
|
70
|
+
service_def["image"] = image
|
|
71
|
+
else:
|
|
72
|
+
service_def["image"] = f"{name}:latest"
|
|
73
|
+
|
|
74
|
+
if ports:
|
|
75
|
+
service_def["ports"] = [f"{p}" for p in ports]
|
|
76
|
+
|
|
77
|
+
if env_vars:
|
|
78
|
+
service_def["environment"] = env_vars
|
|
79
|
+
|
|
80
|
+
if depends:
|
|
81
|
+
service_def["depends_on"] = depends
|
|
82
|
+
|
|
83
|
+
if replicas > 1:
|
|
84
|
+
service_def["deploy"] = {"replicas": replicas, "restart_policy": {"condition": "on-failure"}}
|
|
85
|
+
|
|
86
|
+
if health_check:
|
|
87
|
+
service_def["healthcheck"] = {
|
|
88
|
+
"test": ["CMD-SHELL", health_check],
|
|
89
|
+
"interval": "30s",
|
|
90
|
+
"timeout": "10s",
|
|
91
|
+
"retries": 3,
|
|
92
|
+
"start_period": "40s",
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if include_volumes:
|
|
96
|
+
vol_name = f"{name}_data"
|
|
97
|
+
service_def["volumes"] = [f"{vol_name}:/data"]
|
|
98
|
+
compose["volumes"][vol_name] = {"driver": "local"}
|
|
99
|
+
|
|
100
|
+
compose["services"][name] = service_def
|
|
101
|
+
|
|
102
|
+
# Generate YAML-like output
|
|
103
|
+
yaml_lines = ["version: '3.8'", "", "services:"]
|
|
104
|
+
for svc_name, svc_def in compose["services"].items():
|
|
105
|
+
yaml_lines.append(f" {svc_name}:")
|
|
106
|
+
if "image" in svc_def:
|
|
107
|
+
yaml_lines.append(f" image: {svc_def['image']}")
|
|
108
|
+
if "build" in svc_def:
|
|
109
|
+
yaml_lines.append(f" build:")
|
|
110
|
+
yaml_lines.append(f" context: {svc_def['build']['context']}")
|
|
111
|
+
yaml_lines.append(f" dockerfile: {svc_def['build']['dockerfile']}")
|
|
112
|
+
if "ports" in svc_def:
|
|
113
|
+
yaml_lines.append(f" ports:")
|
|
114
|
+
for p in svc_def["ports"]:
|
|
115
|
+
yaml_lines.append(f" - \"{p}\"")
|
|
116
|
+
if "environment" in svc_def:
|
|
117
|
+
yaml_lines.append(f" environment:")
|
|
118
|
+
for k, v in svc_def["environment"].items():
|
|
119
|
+
yaml_lines.append(f" {k}: {v}")
|
|
120
|
+
if "depends_on" in svc_def:
|
|
121
|
+
yaml_lines.append(f" depends_on:")
|
|
122
|
+
for d in svc_def["depends_on"]:
|
|
123
|
+
yaml_lines.append(f" - {d}")
|
|
124
|
+
if "volumes" in svc_def:
|
|
125
|
+
yaml_lines.append(f" volumes:")
|
|
126
|
+
for v in svc_def["volumes"]:
|
|
127
|
+
yaml_lines.append(f" - {v}")
|
|
128
|
+
if "healthcheck" in svc_def:
|
|
129
|
+
hc = svc_def["healthcheck"]
|
|
130
|
+
yaml_lines.append(f" healthcheck:")
|
|
131
|
+
yaml_lines.append(f" test: {hc['test']}")
|
|
132
|
+
yaml_lines.append(f" interval: {hc['interval']}")
|
|
133
|
+
yaml_lines.append(f" timeout: {hc['timeout']}")
|
|
134
|
+
yaml_lines.append(f" retries: {hc['retries']}")
|
|
135
|
+
yaml_lines.append(f" networks:")
|
|
136
|
+
yaml_lines.append(f" - {network_name}")
|
|
137
|
+
yaml_lines.append(f" restart: unless-stopped")
|
|
138
|
+
yaml_lines.append("")
|
|
139
|
+
|
|
140
|
+
yaml_lines.append(f"networks:")
|
|
141
|
+
yaml_lines.append(f" {network_name}:")
|
|
142
|
+
yaml_lines.append(f" driver: bridge")
|
|
143
|
+
|
|
144
|
+
if include_volumes and compose.get("volumes"):
|
|
145
|
+
yaml_lines.append("")
|
|
146
|
+
yaml_lines.append("volumes:")
|
|
147
|
+
for vol in compose["volumes"]:
|
|
148
|
+
yaml_lines.append(f" {vol}:")
|
|
149
|
+
yaml_lines.append(f" driver: local")
|
|
150
|
+
|
|
151
|
+
return {
|
|
152
|
+
"service_count": len(compose["services"]),
|
|
153
|
+
"network": network_name,
|
|
154
|
+
"has_volumes": include_volumes,
|
|
155
|
+
"compose_yaml": "\n".join(yaml_lines),
|
|
156
|
+
"compose_json": compose,
|
|
157
|
+
"commands": {
|
|
158
|
+
"start": "docker compose up -d",
|
|
159
|
+
"stop": "docker compose down",
|
|
160
|
+
"logs": "docker compose logs -f",
|
|
161
|
+
"rebuild": "docker compose up -d --build",
|
|
162
|
+
"status": "docker compose ps",
|
|
163
|
+
},
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _cicd_pipeline(platform: str, language: str, stages: list[str],
|
|
168
|
+
deploy_target: str, branch: str) -> dict:
|
|
169
|
+
"""Generate CI/CD pipeline configuration."""
|
|
170
|
+
platforms = {
|
|
171
|
+
"github_actions": {"file": ".github/workflows/ci.yml", "format": "yaml"},
|
|
172
|
+
"gitlab_ci": {"file": ".gitlab-ci.yml", "format": "yaml"},
|
|
173
|
+
"jenkins": {"file": "Jenkinsfile", "format": "groovy"},
|
|
174
|
+
"circleci": {"file": ".circleci/config.yml", "format": "yaml"},
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if platform not in platforms:
|
|
178
|
+
return {"error": f"Unknown platform. Use: {list(platforms.keys())}"}
|
|
179
|
+
|
|
180
|
+
lang_configs = {
|
|
181
|
+
"python": {"setup": "pip install -r requirements.txt", "test": "pytest --cov", "lint": "ruff check .", "build": "python -m build", "image": "python:3.12-slim"},
|
|
182
|
+
"node": {"setup": "npm ci", "test": "npm test", "lint": "npx eslint .", "build": "npm run build", "image": "node:20-alpine"},
|
|
183
|
+
"go": {"setup": "go mod download", "test": "go test ./...", "lint": "golangci-lint run", "build": "go build -o app .", "image": "golang:1.22-alpine"},
|
|
184
|
+
"rust": {"setup": "cargo fetch", "test": "cargo test", "lint": "cargo clippy", "build": "cargo build --release", "image": "rust:1.77-slim"},
|
|
185
|
+
"java": {"setup": "mvn install -DskipTests", "test": "mvn test", "lint": "mvn checkstyle:check", "build": "mvn package", "image": "maven:3.9-eclipse-temurin-21"},
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
lang = lang_configs.get(language, lang_configs["python"])
|
|
189
|
+
deploy_configs = {
|
|
190
|
+
"aws": {"cmd": "aws ecs update-service --cluster prod --service app --force-new-deployment", "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"]},
|
|
191
|
+
"gcp": {"cmd": "gcloud run deploy app --image gcr.io/$PROJECT_ID/app --region us-central1", "env_vars": ["GCP_PROJECT_ID", "GCP_SA_KEY"]},
|
|
192
|
+
"azure": {"cmd": "az webapp deployment source config-zip -g rg-prod -n app --src app.zip", "env_vars": ["AZURE_CREDENTIALS"]},
|
|
193
|
+
"kubernetes": {"cmd": "kubectl apply -f k8s/ && kubectl rollout status deployment/app", "env_vars": ["KUBECONFIG"]},
|
|
194
|
+
"docker": {"cmd": "docker push registry.example.com/app:latest", "env_vars": ["DOCKER_USERNAME", "DOCKER_PASSWORD"]},
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
deploy = deploy_configs.get(deploy_target, deploy_configs["docker"])
|
|
198
|
+
|
|
199
|
+
if platform == "github_actions":
|
|
200
|
+
pipeline = f"""name: CI/CD Pipeline
|
|
201
|
+
|
|
202
|
+
on:
|
|
203
|
+
push:
|
|
204
|
+
branches: [{branch}]
|
|
205
|
+
pull_request:
|
|
206
|
+
branches: [{branch}]
|
|
207
|
+
|
|
208
|
+
jobs:"""
|
|
209
|
+
for stage in stages:
|
|
210
|
+
if stage == "lint":
|
|
211
|
+
pipeline += f"""
|
|
212
|
+
lint:
|
|
213
|
+
runs-on: ubuntu-latest
|
|
214
|
+
steps:
|
|
215
|
+
- uses: actions/checkout@v4
|
|
216
|
+
- uses: actions/setup-python@v5
|
|
217
|
+
with:
|
|
218
|
+
python-version: '3.12'
|
|
219
|
+
- run: {lang['setup']}
|
|
220
|
+
- run: {lang['lint']}
|
|
221
|
+
"""
|
|
222
|
+
elif stage == "test":
|
|
223
|
+
pipeline += f"""
|
|
224
|
+
test:
|
|
225
|
+
runs-on: ubuntu-latest
|
|
226
|
+
needs: [lint]
|
|
227
|
+
steps:
|
|
228
|
+
- uses: actions/checkout@v4
|
|
229
|
+
- uses: actions/setup-python@v5
|
|
230
|
+
with:
|
|
231
|
+
python-version: '3.12'
|
|
232
|
+
- run: {lang['setup']}
|
|
233
|
+
- run: {lang['test']}
|
|
234
|
+
"""
|
|
235
|
+
elif stage == "build":
|
|
236
|
+
pipeline += f"""
|
|
237
|
+
build:
|
|
238
|
+
runs-on: ubuntu-latest
|
|
239
|
+
needs: [test]
|
|
240
|
+
steps:
|
|
241
|
+
- uses: actions/checkout@v4
|
|
242
|
+
- run: {lang['setup']}
|
|
243
|
+
- run: {lang['build']}
|
|
244
|
+
"""
|
|
245
|
+
elif stage == "deploy":
|
|
246
|
+
pipeline += f"""
|
|
247
|
+
deploy:
|
|
248
|
+
runs-on: ubuntu-latest
|
|
249
|
+
needs: [build]
|
|
250
|
+
if: github.ref == 'refs/heads/{branch}'
|
|
251
|
+
steps:
|
|
252
|
+
- uses: actions/checkout@v4
|
|
253
|
+
- run: {deploy['cmd']}
|
|
254
|
+
env:"""
|
|
255
|
+
for env in deploy["env_vars"]:
|
|
256
|
+
pipeline += f"""
|
|
257
|
+
{env}: ${{{{ secrets.{env} }}}}"""
|
|
258
|
+
pipeline += "\n"
|
|
259
|
+
else:
|
|
260
|
+
pipeline = f"# {platform} pipeline for {language} - configure based on platform docs"
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
"platform": platform,
|
|
264
|
+
"language": language,
|
|
265
|
+
"stages": stages,
|
|
266
|
+
"deploy_target": deploy_target,
|
|
267
|
+
"branch": branch,
|
|
268
|
+
"config_file": platforms[platform]["file"],
|
|
269
|
+
"pipeline_config": pipeline,
|
|
270
|
+
"required_secrets": deploy.get("env_vars", []),
|
|
271
|
+
"base_image": lang["image"],
|
|
272
|
+
"commands": {
|
|
273
|
+
"setup": lang["setup"],
|
|
274
|
+
"test": lang["test"],
|
|
275
|
+
"lint": lang["lint"],
|
|
276
|
+
"build": lang["build"],
|
|
277
|
+
"deploy": deploy["cmd"],
|
|
278
|
+
},
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _log_analyzer(log_lines: list[str], time_window_minutes: int) -> dict:
|
|
283
|
+
"""Analyze log lines for patterns, errors, and anomalies."""
|
|
284
|
+
if not log_lines:
|
|
285
|
+
return {"error": "No log lines provided"}
|
|
286
|
+
|
|
287
|
+
levels = Counter()
|
|
288
|
+
error_messages = []
|
|
289
|
+
warning_messages = []
|
|
290
|
+
timestamps = []
|
|
291
|
+
ip_addresses = Counter()
|
|
292
|
+
status_codes = Counter()
|
|
293
|
+
paths = Counter()
|
|
294
|
+
|
|
295
|
+
level_patterns = {
|
|
296
|
+
"ERROR": r'\b(?:ERROR|ERR|FATAL|CRITICAL)\b',
|
|
297
|
+
"WARNING": r'\b(?:WARNING|WARN)\b',
|
|
298
|
+
"INFO": r'\b(?:INFO)\b',
|
|
299
|
+
"DEBUG": r'\b(?:DEBUG|TRACE)\b',
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
for line in log_lines:
|
|
303
|
+
# Detect log level
|
|
304
|
+
detected = "UNKNOWN"
|
|
305
|
+
for level, pattern in level_patterns.items():
|
|
306
|
+
if re.search(pattern, line, re.I):
|
|
307
|
+
detected = level
|
|
308
|
+
break
|
|
309
|
+
levels[detected] += 1
|
|
310
|
+
|
|
311
|
+
if detected == "ERROR":
|
|
312
|
+
error_messages.append(line[:200])
|
|
313
|
+
elif detected == "WARNING":
|
|
314
|
+
warning_messages.append(line[:200])
|
|
315
|
+
|
|
316
|
+
# Extract timestamps
|
|
317
|
+
ts_match = re.search(r'\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}', line)
|
|
318
|
+
if ts_match:
|
|
319
|
+
timestamps.append(ts_match.group())
|
|
320
|
+
|
|
321
|
+
# Extract IPs
|
|
322
|
+
ip_match = re.findall(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', line)
|
|
323
|
+
for ip in ip_match:
|
|
324
|
+
ip_addresses[ip] += 1
|
|
325
|
+
|
|
326
|
+
# Extract HTTP status codes
|
|
327
|
+
status_match = re.search(r'\b([1-5]\d{2})\b', line)
|
|
328
|
+
if status_match:
|
|
329
|
+
status_codes[status_match.group(1)] += 1
|
|
330
|
+
|
|
331
|
+
# Extract URL paths
|
|
332
|
+
path_match = re.search(r'(?:GET|POST|PUT|DELETE|PATCH)\s+(/\S+)', line)
|
|
333
|
+
if path_match:
|
|
334
|
+
paths[path_match.group(1)] += 1
|
|
335
|
+
|
|
336
|
+
total = len(log_lines)
|
|
337
|
+
error_rate = (levels.get("ERROR", 0) / max(total, 1)) * 100
|
|
338
|
+
|
|
339
|
+
# Anomaly detection
|
|
340
|
+
anomalies = []
|
|
341
|
+
if error_rate > 10:
|
|
342
|
+
anomalies.append({"type": "high_error_rate", "value": f"{error_rate:.1f}%", "severity": "HIGH"})
|
|
343
|
+
if any(c > total * 0.3 for c in ip_addresses.values()):
|
|
344
|
+
top_ip = ip_addresses.most_common(1)[0]
|
|
345
|
+
anomalies.append({"type": "ip_concentration", "value": f"{top_ip[0]}: {top_ip[1]} requests", "severity": "MEDIUM"})
|
|
346
|
+
if status_codes.get("500", 0) > total * 0.05:
|
|
347
|
+
anomalies.append({"type": "server_errors", "value": f"{status_codes['500']} 500 errors", "severity": "HIGH"})
|
|
348
|
+
if status_codes.get("429", 0) > 0:
|
|
349
|
+
anomalies.append({"type": "rate_limiting", "value": f"{status_codes['429']} 429 responses", "severity": "MEDIUM"})
|
|
350
|
+
|
|
351
|
+
# Error patterns
|
|
352
|
+
error_patterns = Counter()
|
|
353
|
+
for err in error_messages:
|
|
354
|
+
# Normalize error messages
|
|
355
|
+
normalized = re.sub(r'\d+', 'N', err)
|
|
356
|
+
normalized = re.sub(r'[0-9a-f]{8,}', 'HASH', normalized, flags=re.I)
|
|
357
|
+
error_patterns[normalized[:80]] += 1
|
|
358
|
+
|
|
359
|
+
return {
|
|
360
|
+
"total_lines": total,
|
|
361
|
+
"log_levels": dict(levels),
|
|
362
|
+
"error_rate_pct": round(error_rate, 2),
|
|
363
|
+
"time_window_minutes": time_window_minutes,
|
|
364
|
+
"anomalies": anomalies,
|
|
365
|
+
"top_errors": [{"pattern": p, "count": c} for p, c in error_patterns.most_common(5)],
|
|
366
|
+
"top_ips": dict(ip_addresses.most_common(10)),
|
|
367
|
+
"status_codes": dict(status_codes.most_common(10)),
|
|
368
|
+
"top_paths": dict(paths.most_common(10)),
|
|
369
|
+
"error_samples": error_messages[:5],
|
|
370
|
+
"warning_samples": warning_messages[:5],
|
|
371
|
+
"health": "CRITICAL" if error_rate > 20 else "DEGRADED" if error_rate > 5 else "HEALTHY",
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _incident_classifier(title: str, description: str,
|
|
376
|
+
affected_services: list[str],
|
|
377
|
+
error_count: int, user_reports: int) -> dict:
|
|
378
|
+
"""Classify an incident by severity and category."""
|
|
379
|
+
desc_lower = description.lower()
|
|
380
|
+
|
|
381
|
+
# Category detection
|
|
382
|
+
categories = {
|
|
383
|
+
"outage": ["down", "unavailable", "outage", "unreachable", "offline", "502", "503"],
|
|
384
|
+
"performance": ["slow", "latency", "timeout", "degraded", "high load", "response time"],
|
|
385
|
+
"data": ["data loss", "corruption", "inconsistent", "missing data", "wrong data"],
|
|
386
|
+
"security": ["breach", "unauthorized", "vulnerability", "exploit", "attack", "ddos"],
|
|
387
|
+
"deployment": ["deploy", "rollback", "release", "broken build", "regression"],
|
|
388
|
+
"infrastructure": ["disk", "memory", "cpu", "network", "dns", "ssl", "certificate"],
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
detected_category = "unknown"
|
|
392
|
+
category_confidence = 0
|
|
393
|
+
for cat, keywords in categories.items():
|
|
394
|
+
matches = sum(1 for k in keywords if k in desc_lower)
|
|
395
|
+
if matches > category_confidence:
|
|
396
|
+
category_confidence = matches
|
|
397
|
+
detected_category = cat
|
|
398
|
+
|
|
399
|
+
# Severity scoring
|
|
400
|
+
severity_score = 0
|
|
401
|
+
|
|
402
|
+
if error_count > 1000:
|
|
403
|
+
severity_score += 4
|
|
404
|
+
elif error_count > 100:
|
|
405
|
+
severity_score += 3
|
|
406
|
+
elif error_count > 10:
|
|
407
|
+
severity_score += 2
|
|
408
|
+
else:
|
|
409
|
+
severity_score += 1
|
|
410
|
+
|
|
411
|
+
if user_reports > 100:
|
|
412
|
+
severity_score += 4
|
|
413
|
+
elif user_reports > 10:
|
|
414
|
+
severity_score += 3
|
|
415
|
+
elif user_reports > 0:
|
|
416
|
+
severity_score += 2
|
|
417
|
+
|
|
418
|
+
severity_score += min(3, len(affected_services))
|
|
419
|
+
|
|
420
|
+
if detected_category in ["outage", "security", "data"]:
|
|
421
|
+
severity_score += 2
|
|
422
|
+
|
|
423
|
+
if severity_score >= 10:
|
|
424
|
+
severity = "P1 - Critical"
|
|
425
|
+
response_time = "15 minutes"
|
|
426
|
+
escalation = "VP Engineering + On-call lead + Comms team"
|
|
427
|
+
elif severity_score >= 7:
|
|
428
|
+
severity = "P2 - High"
|
|
429
|
+
response_time = "30 minutes"
|
|
430
|
+
escalation = "Engineering lead + On-call engineer"
|
|
431
|
+
elif severity_score >= 4:
|
|
432
|
+
severity = "P3 - Medium"
|
|
433
|
+
response_time = "2 hours"
|
|
434
|
+
escalation = "On-call engineer"
|
|
435
|
+
else:
|
|
436
|
+
severity = "P4 - Low"
|
|
437
|
+
response_time = "Next business day"
|
|
438
|
+
escalation = "Team backlog"
|
|
439
|
+
|
|
440
|
+
return {
|
|
441
|
+
"title": title,
|
|
442
|
+
"category": detected_category,
|
|
443
|
+
"severity": severity,
|
|
444
|
+
"severity_score": severity_score,
|
|
445
|
+
"response_time": response_time,
|
|
446
|
+
"escalation": escalation,
|
|
447
|
+
"affected_services": affected_services,
|
|
448
|
+
"impact": {
|
|
449
|
+
"error_count": error_count,
|
|
450
|
+
"user_reports": user_reports,
|
|
451
|
+
"services_affected": len(affected_services),
|
|
452
|
+
},
|
|
453
|
+
"immediate_actions": [
|
|
454
|
+
f"Acknowledge incident within {response_time}",
|
|
455
|
+
"Create incident channel and war room",
|
|
456
|
+
f"Assess impact on {', '.join(affected_services[:3])}",
|
|
457
|
+
"Begin root cause investigation",
|
|
458
|
+
f"{'Notify customers' if severity_score >= 7 else 'Monitor for escalation'}",
|
|
459
|
+
],
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _runbook_generator(service_name: str, incident_type: str,
|
|
464
|
+
tech_stack: list[str], alert_threshold: str) -> dict:
|
|
465
|
+
"""Generate an operational runbook for a service and incident type."""
|
|
466
|
+
runbook_templates = {
|
|
467
|
+
"high_cpu": {
|
|
468
|
+
"title": f"{service_name} - High CPU Usage Runbook",
|
|
469
|
+
"trigger": f"CPU usage exceeds {alert_threshold}",
|
|
470
|
+
"steps": [
|
|
471
|
+
{"step": 1, "action": "Check current CPU usage", "command": f"top -b -n 1 | head -20", "expected": "Identify top processes"},
|
|
472
|
+
{"step": 2, "action": "Check application metrics", "command": f"curl -s localhost:9090/metrics | grep cpu", "expected": "Application-level CPU metrics"},
|
|
473
|
+
{"step": 3, "action": "Check for runaway processes", "command": f"ps aux --sort=-%cpu | head -10", "expected": "Identify abnormal processes"},
|
|
474
|
+
{"step": 4, "action": "Check application logs", "command": f"journalctl -u {service_name} --since '30 min ago' | grep -i error", "expected": "Recent errors"},
|
|
475
|
+
{"step": 5, "action": "Scale if needed", "command": f"kubectl scale deployment {service_name} --replicas=3", "expected": "Pods scaling up"},
|
|
476
|
+
{"step": 6, "action": "Restart if unresolved", "command": f"systemctl restart {service_name}", "expected": "Service restarts, CPU drops"},
|
|
477
|
+
],
|
|
478
|
+
},
|
|
479
|
+
"high_memory": {
|
|
480
|
+
"title": f"{service_name} - High Memory Usage Runbook",
|
|
481
|
+
"trigger": f"Memory usage exceeds {alert_threshold}",
|
|
482
|
+
"steps": [
|
|
483
|
+
{"step": 1, "action": "Check memory usage", "command": "free -h && cat /proc/meminfo | head -5", "expected": "Current memory state"},
|
|
484
|
+
{"step": 2, "action": "Identify memory consumers", "command": "ps aux --sort=-%mem | head -10", "expected": "Top memory processes"},
|
|
485
|
+
{"step": 3, "action": "Check for memory leaks", "command": f"curl -s localhost:9090/metrics | grep memory", "expected": "Memory growth pattern"},
|
|
486
|
+
{"step": 4, "action": "Clear caches if safe", "command": "sync && echo 3 > /proc/sys/vm/drop_caches", "expected": "Cache memory freed"},
|
|
487
|
+
{"step": 5, "action": "Restart application", "command": f"systemctl restart {service_name}", "expected": "Memory usage drops"},
|
|
488
|
+
],
|
|
489
|
+
},
|
|
490
|
+
"service_down": {
|
|
491
|
+
"title": f"{service_name} - Service Down Runbook",
|
|
492
|
+
"trigger": f"Health check fails for {service_name}",
|
|
493
|
+
"steps": [
|
|
494
|
+
{"step": 1, "action": "Verify service status", "command": f"systemctl status {service_name}", "expected": "Check if active/failed"},
|
|
495
|
+
{"step": 2, "action": "Check logs", "command": f"journalctl -u {service_name} -n 50 --no-pager", "expected": "Identify crash reason"},
|
|
496
|
+
{"step": 3, "action": "Check dependencies", "command": "curl -s localhost:5432/health && curl -s localhost:6379/health", "expected": "DB and cache reachable"},
|
|
497
|
+
{"step": 4, "action": "Check disk space", "command": "df -h", "expected": "Adequate disk space"},
|
|
498
|
+
{"step": 5, "action": "Check network", "command": "netstat -tlnp | grep -E '(80|443|8080)'", "expected": "Ports available"},
|
|
499
|
+
{"step": 6, "action": "Restart service", "command": f"systemctl restart {service_name}", "expected": "Service comes back online"},
|
|
500
|
+
{"step": 7, "action": "Verify recovery", "command": f"curl -s localhost:8080/health", "expected": "200 OK response"},
|
|
501
|
+
],
|
|
502
|
+
},
|
|
503
|
+
"high_latency": {
|
|
504
|
+
"title": f"{service_name} - High Latency Runbook",
|
|
505
|
+
"trigger": f"P99 latency exceeds {alert_threshold}",
|
|
506
|
+
"steps": [
|
|
507
|
+
{"step": 1, "action": "Check current latency metrics", "command": f"curl -s localhost:9090/metrics | grep latency", "expected": "Current latency values"},
|
|
508
|
+
{"step": 2, "action": "Check database performance", "command": "PGPASSWORD=$DB_PASS psql -h localhost -U app -c 'SELECT * FROM pg_stat_activity WHERE state != \\'idle\\';'", "expected": "Active queries"},
|
|
509
|
+
{"step": 3, "action": "Check connection pools", "command": f"curl -s localhost:9090/metrics | grep pool", "expected": "Pool utilization"},
|
|
510
|
+
{"step": 4, "action": "Check downstream services", "command": "for svc in api-gateway auth-service; do curl -w '%{time_total}' -o /dev/null -s http://$svc:8080/health; echo \" $svc\"; done", "expected": "Response times"},
|
|
511
|
+
{"step": 5, "action": "Scale horizontally", "command": f"kubectl scale deployment {service_name} --replicas=5", "expected": "Load distributed"},
|
|
512
|
+
],
|
|
513
|
+
},
|
|
514
|
+
"disk_full": {
|
|
515
|
+
"title": f"{service_name} - Disk Full Runbook",
|
|
516
|
+
"trigger": f"Disk usage exceeds {alert_threshold}",
|
|
517
|
+
"steps": [
|
|
518
|
+
{"step": 1, "action": "Check disk usage", "command": "df -h", "expected": "Identify full partition"},
|
|
519
|
+
{"step": 2, "action": "Find large files", "command": "du -sh /var/log/* | sort -rh | head -10", "expected": "Largest files"},
|
|
520
|
+
{"step": 3, "action": "Rotate logs", "command": "logrotate -f /etc/logrotate.conf", "expected": "Logs rotated"},
|
|
521
|
+
{"step": 4, "action": "Clean old containers", "command": "docker system prune -f", "expected": "Docker space freed"},
|
|
522
|
+
{"step": 5, "action": "Clean package cache", "command": "apt clean && rm -rf /tmp/*", "expected": "Temp files removed"},
|
|
523
|
+
],
|
|
524
|
+
},
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
if incident_type not in runbook_templates:
|
|
528
|
+
incident_type = "service_down"
|
|
529
|
+
|
|
530
|
+
template = runbook_templates[incident_type]
|
|
531
|
+
|
|
532
|
+
return {
|
|
533
|
+
"runbook_title": template["title"],
|
|
534
|
+
"service": service_name,
|
|
535
|
+
"incident_type": incident_type,
|
|
536
|
+
"trigger": template["trigger"],
|
|
537
|
+
"tech_stack": tech_stack,
|
|
538
|
+
"alert_threshold": alert_threshold,
|
|
539
|
+
"steps": template["steps"],
|
|
540
|
+
"escalation_policy": {
|
|
541
|
+
"level_1": {"time": "15 min", "who": "On-call engineer", "action": "Follow runbook steps"},
|
|
542
|
+
"level_2": {"time": "30 min", "who": "Team lead", "action": "Assist with troubleshooting"},
|
|
543
|
+
"level_3": {"time": "60 min", "who": "Engineering manager", "action": "Coordinate response, customer comms"},
|
|
544
|
+
},
|
|
545
|
+
"post_incident": [
|
|
546
|
+
"Update incident timeline",
|
|
547
|
+
"Write root cause analysis",
|
|
548
|
+
"Create follow-up tickets for permanent fixes",
|
|
549
|
+
"Schedule post-mortem meeting within 48 hours",
|
|
550
|
+
"Update monitoring and alerting if gaps found",
|
|
551
|
+
],
|
|
552
|
+
"available_incident_types": list(runbook_templates.keys()),
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
# ---------------------------------------------------------------------------
|
|
557
|
+
# MCP Server
|
|
558
|
+
# ---------------------------------------------------------------------------
|
|
559
|
+
mcp = FastMCP(
|
|
560
|
+
"DevOps AI MCP",
|
|
561
|
+
instructions="Infrastructure and DevOps toolkit: Docker Compose generation, CI/CD pipeline building, log analysis, incident classification, and runbook generation. By MEOK AI Labs.")
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
@mcp.tool()
|
|
565
|
+
def docker_compose_generator(services: list[dict], network_name: str = "app-network",
|
|
566
|
+
include_volumes: bool = True, api_key: str = "") -> dict:
|
|
567
|
+
"""Generate a Docker Compose configuration with networking, health checks,
|
|
568
|
+
and volume management.
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
services: List of services as [{"name": "api", "image": "node:20", "ports": ["3000:3000"], "environment": {"NODE_ENV": "production"}, "depends_on": ["db"], "health_check": "curl -f http://localhost:3000/health"}]
|
|
572
|
+
network_name: Docker network name
|
|
573
|
+
include_volumes: Whether to create named volumes for services
|
|
574
|
+
"""
|
|
575
|
+
allowed, msg, tier = check_access(api_key)
|
|
576
|
+
if not allowed:
|
|
577
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
578
|
+
|
|
579
|
+
err = _check_rate_limit()
|
|
580
|
+
if err:
|
|
581
|
+
return {"error": err}
|
|
582
|
+
try:
|
|
583
|
+
return _docker_compose(services, network_name, include_volumes)
|
|
584
|
+
except Exception as e:
|
|
585
|
+
return {"error": str(e)}
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
@mcp.tool()
|
|
589
|
+
def cicd_pipeline_builder(platform: str = "github_actions", language: str = "python",
|
|
590
|
+
stages: list[str] = ["lint", "test", "build", "deploy"],
|
|
591
|
+
deploy_target: str = "docker",
|
|
592
|
+
branch: str = "main", api_key: str = "") -> dict:
|
|
593
|
+
"""Generate a CI/CD pipeline configuration for common platforms and languages.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
platform: CI platform (github_actions, gitlab_ci, jenkins, circleci)
|
|
597
|
+
language: Project language (python, node, go, rust, java)
|
|
598
|
+
stages: Pipeline stages to include (lint, test, build, deploy)
|
|
599
|
+
deploy_target: Deployment target (aws, gcp, azure, kubernetes, docker)
|
|
600
|
+
branch: Branch that triggers deployment
|
|
601
|
+
"""
|
|
602
|
+
allowed, msg, tier = check_access(api_key)
|
|
603
|
+
if not allowed:
|
|
604
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
605
|
+
|
|
606
|
+
err = _check_rate_limit()
|
|
607
|
+
if err:
|
|
608
|
+
return {"error": err}
|
|
609
|
+
try:
|
|
610
|
+
return _cicd_pipeline(platform, language, stages, deploy_target, branch)
|
|
611
|
+
except Exception as e:
|
|
612
|
+
return {"error": str(e)}
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
@mcp.tool()
|
|
616
|
+
def log_analyzer(log_lines: list[str], time_window_minutes: int = 60, api_key: str = "") -> dict:
|
|
617
|
+
"""Analyze log lines to extract error patterns, anomalies, status code
|
|
618
|
+
distributions, and top IP addresses.
|
|
619
|
+
|
|
620
|
+
Args:
|
|
621
|
+
log_lines: List of raw log lines to analyze
|
|
622
|
+
time_window_minutes: Time window the logs cover (for rate calculations)
|
|
623
|
+
"""
|
|
624
|
+
allowed, msg, tier = check_access(api_key)
|
|
625
|
+
if not allowed:
|
|
626
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
627
|
+
|
|
628
|
+
err = _check_rate_limit()
|
|
629
|
+
if err:
|
|
630
|
+
return {"error": err}
|
|
631
|
+
try:
|
|
632
|
+
return _log_analyzer(log_lines, time_window_minutes)
|
|
633
|
+
except Exception as e:
|
|
634
|
+
return {"error": str(e)}
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
@mcp.tool()
|
|
638
|
+
def incident_classifier(title: str, description: str,
|
|
639
|
+
affected_services: list[str] = [],
|
|
640
|
+
error_count: int = 0,
|
|
641
|
+
user_reports: int = 0, api_key: str = "") -> dict:
|
|
642
|
+
"""Classify an incident by severity (P1-P4) and category with recommended
|
|
643
|
+
response actions and escalation paths.
|
|
644
|
+
|
|
645
|
+
Args:
|
|
646
|
+
title: Incident title
|
|
647
|
+
description: Detailed incident description
|
|
648
|
+
affected_services: List of affected service names
|
|
649
|
+
error_count: Number of errors observed
|
|
650
|
+
user_reports: Number of user-reported issues
|
|
651
|
+
"""
|
|
652
|
+
allowed, msg, tier = check_access(api_key)
|
|
653
|
+
if not allowed:
|
|
654
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
655
|
+
|
|
656
|
+
err = _check_rate_limit()
|
|
657
|
+
if err:
|
|
658
|
+
return {"error": err}
|
|
659
|
+
try:
|
|
660
|
+
return _incident_classifier(title, description, affected_services, error_count, user_reports)
|
|
661
|
+
except Exception as e:
|
|
662
|
+
return {"error": str(e)}
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
@mcp.tool()
|
|
666
|
+
def runbook_generator(service_name: str, incident_type: str = "service_down",
|
|
667
|
+
tech_stack: list[str] = [],
|
|
668
|
+
alert_threshold: str = "90%", api_key: str = "") -> dict:
|
|
669
|
+
"""Generate an operational runbook with step-by-step commands, expected
|
|
670
|
+
outcomes, and escalation policies.
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
service_name: Name of the service
|
|
674
|
+
incident_type: Type of incident (high_cpu, high_memory, service_down, high_latency, disk_full)
|
|
675
|
+
tech_stack: Technologies used (e.g. ["python", "postgres", "redis"])
|
|
676
|
+
alert_threshold: Alert threshold that triggered the runbook
|
|
677
|
+
"""
|
|
678
|
+
allowed, msg, tier = check_access(api_key)
|
|
679
|
+
if not allowed:
|
|
680
|
+
return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
|
|
681
|
+
|
|
682
|
+
err = _check_rate_limit()
|
|
683
|
+
if err:
|
|
684
|
+
return {"error": err}
|
|
685
|
+
try:
|
|
686
|
+
return _runbook_generator(service_name, incident_type, tech_stack, alert_threshold)
|
|
687
|
+
except Exception as e:
|
|
688
|
+
return {"error": str(e)}
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
if __name__ == "__main__":
|
|
692
|
+
mcp.run()
|