dockerbrain 1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- core/__init__.py +1 -0
- core/__main__.py +4 -0
- core/ai_advisor.py +345 -0
- core/cli.py +369 -0
- core/dockerizer.py +310 -0
- core/fixer/__init__.py +21 -0
- core/fixer/container.py +171 -0
- core/fixer/dockerfile.py +225 -0
- core/llm.py +212 -0
- core/monitor/__init__.py +33 -0
- core/monitor/collector.py +197 -0
- core/monitor/display.py +279 -0
- core/monitor/snapshot.py +57 -0
- core/optimizer/__init__.py +23 -0
- core/optimizer/engine.py +84 -0
- core/optimizer/rules.py +221 -0
- core/storage.py +161 -0
- core/templates.py +559 -0
- core/utils.py +38 -0
- dockerbrain-1.0.dist-info/METADATA +156 -0
- dockerbrain-1.0.dist-info/RECORD +25 -0
- dockerbrain-1.0.dist-info/WHEEL +5 -0
- dockerbrain-1.0.dist-info/entry_points.txt +2 -0
- dockerbrain-1.0.dist-info/licenses/LICENSE +201 -0
- dockerbrain-1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
import time
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
|
|
9
|
+
import docker
|
|
10
|
+
from docker.errors import APIError, DockerException
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.live import Live
|
|
13
|
+
from rich.panel import Panel
|
|
14
|
+
|
|
15
|
+
from core.monitor.snapshot import (
|
|
16
|
+
ContainerSnapshot,
|
|
17
|
+
IDLE_CPU_THRESHOLD,
|
|
18
|
+
IDLE_CONSECUTIVE_POLLS,
|
|
19
|
+
)
|
|
20
|
+
from core.monitor.display import build_monitor_layout
|
|
21
|
+
from core.storage import store_snapshot
|
|
22
|
+
from core.utils import calc_cpu_percent, get_docker_offline_hint
|
|
23
|
+
|
|
24
|
+
console = Console()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ContainerMonitor:
|
|
28
|
+
"""Polls Docker container stats and tracks idle state."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, interval: int = 1) -> None:
|
|
31
|
+
self.interval = interval
|
|
32
|
+
self.client = self._connect()
|
|
33
|
+
self._idle_counter: dict[str, int] = defaultdict(int)
|
|
34
|
+
self._lock = threading.Lock()
|
|
35
|
+
self._latest_snapshots: list[ContainerSnapshot] = []
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def _connect() -> docker.DockerClient:
|
|
39
|
+
try:
|
|
40
|
+
return docker.from_env()
|
|
41
|
+
except DockerException as exc:
|
|
42
|
+
console.print(
|
|
43
|
+
Panel(
|
|
44
|
+
"[red bold]Could not connect to Docker daemon.[/]\n\n"
|
|
45
|
+
f"{get_docker_offline_hint()}\n\n",
|
|
46
|
+
title="[bold red]Docker Unavailable[/]",
|
|
47
|
+
border_style="red",
|
|
48
|
+
expand=False,
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
raise SystemExit(3) from exc
|
|
52
|
+
|
|
53
|
+
def _fetch_one(self, ctr: object) -> ContainerSnapshot | None:
|
|
54
|
+
"""Fetch stats for a single container."""
|
|
55
|
+
try:
|
|
56
|
+
raw = ctr.stats(stream=False)
|
|
57
|
+
ctr.reload()
|
|
58
|
+
except (APIError, DockerException) as exc:
|
|
59
|
+
console.print(f"[yellow] Skipping {ctr.name}: {exc}[/]", highlight=False)
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
cpu = calc_cpu_percent(raw)
|
|
63
|
+
|
|
64
|
+
mem_stats = raw.get("memory_stats", {})
|
|
65
|
+
mem_raw = mem_stats.get("usage", 0)
|
|
66
|
+
mem_limit = mem_stats.get("limit", 1)
|
|
67
|
+
|
|
68
|
+
cache = mem_stats.get("stats", {}).get("inactive_file", 0)
|
|
69
|
+
if not cache:
|
|
70
|
+
cache = mem_stats.get("stats", {}).get("cache", 0)
|
|
71
|
+
mem_usage = mem_raw - cache
|
|
72
|
+
|
|
73
|
+
mem_usage_mb = mem_usage / (1024 * 1024)
|
|
74
|
+
mem_limit_mb = mem_limit / (1024 * 1024)
|
|
75
|
+
mem_percent = (mem_usage / mem_limit) * 100.0 if mem_limit else 0.0
|
|
76
|
+
|
|
77
|
+
networks = raw.get("networks", {})
|
|
78
|
+
net_rx = sum(v.get("rx_bytes", 0) for v in networks.values())
|
|
79
|
+
net_tx = sum(v.get("tx_bytes", 0) for v in networks.values())
|
|
80
|
+
net_rx_pkts = sum(v.get("rx_packets", 0) for v in networks.values())
|
|
81
|
+
net_tx_pkts = sum(v.get("tx_packets", 0) for v in networks.values())
|
|
82
|
+
net_rx_errs = sum(v.get("rx_errors", 0) for v in networks.values())
|
|
83
|
+
net_tx_errs = sum(v.get("tx_errors", 0) for v in networks.values())
|
|
84
|
+
net_rx_drop = sum(v.get("rx_dropped", 0) for v in networks.values())
|
|
85
|
+
net_tx_drop = sum(v.get("tx_dropped", 0) for v in networks.values())
|
|
86
|
+
|
|
87
|
+
uptime_secs = 0.0
|
|
88
|
+
started_at = ctr.attrs.get("State", {}).get("StartedAt", "")
|
|
89
|
+
if started_at:
|
|
90
|
+
try:
|
|
91
|
+
start_dt = datetime.fromisoformat(started_at.replace("Z", "+00:00"))
|
|
92
|
+
uptime_secs = (datetime.now(timezone.utc) - start_dt).total_seconds()
|
|
93
|
+
except (ValueError, TypeError):
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
restart_count = ctr.attrs.get("RestartCount", 0)
|
|
97
|
+
image_tag = ctr.image.tags[0] if ctr.image.tags else ctr.image.short_id
|
|
98
|
+
|
|
99
|
+
with self._lock:
|
|
100
|
+
if cpu < IDLE_CPU_THRESHOLD:
|
|
101
|
+
self._idle_counter[ctr.name] += 1
|
|
102
|
+
else:
|
|
103
|
+
self._idle_counter[ctr.name] = 0
|
|
104
|
+
idle_polls = self._idle_counter[ctr.name]
|
|
105
|
+
|
|
106
|
+
is_idle = idle_polls >= IDLE_CONSECUTIVE_POLLS
|
|
107
|
+
|
|
108
|
+
return ContainerSnapshot(
|
|
109
|
+
name=ctr.name,
|
|
110
|
+
status=ctr.status,
|
|
111
|
+
cpu_percent=cpu,
|
|
112
|
+
mem_usage_mb=mem_usage_mb,
|
|
113
|
+
mem_limit_mb=mem_limit_mb,
|
|
114
|
+
mem_percent=mem_percent,
|
|
115
|
+
mem_cache_mb=cache / (1024 * 1024),
|
|
116
|
+
net_rx_bytes=net_rx,
|
|
117
|
+
net_tx_bytes=net_tx,
|
|
118
|
+
net_rx_packets=net_rx_pkts,
|
|
119
|
+
net_tx_packets=net_tx_pkts,
|
|
120
|
+
net_rx_errors=net_rx_errs,
|
|
121
|
+
net_tx_errors=net_tx_errs,
|
|
122
|
+
net_rx_dropped=net_rx_drop,
|
|
123
|
+
net_tx_dropped=net_tx_drop,
|
|
124
|
+
is_idle=is_idle,
|
|
125
|
+
idle_polls=idle_polls,
|
|
126
|
+
uptime_seconds=uptime_secs,
|
|
127
|
+
restart_count=restart_count,
|
|
128
|
+
image_tag=image_tag,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def poll(self) -> list[ContainerSnapshot]:
|
|
132
|
+
"""Collect one snapshot for every running container in parallel.
|
|
133
|
+
Uses a thread pool so N containers finish in ~1s (time of slowest
|
|
134
|
+
single stats call), not N seconds.
|
|
135
|
+
"""
|
|
136
|
+
containers = self.client.containers.list()
|
|
137
|
+
if not containers:
|
|
138
|
+
return []
|
|
139
|
+
|
|
140
|
+
snapshots: list[ContainerSnapshot] = []
|
|
141
|
+
|
|
142
|
+
with ThreadPoolExecutor(max_workers=min(len(containers), 20)) as pool:
|
|
143
|
+
futures = {pool.submit(self._fetch_one, ctr): ctr for ctr in containers}
|
|
144
|
+
for fut in as_completed(futures):
|
|
145
|
+
result = fut.result()
|
|
146
|
+
if result is not None:
|
|
147
|
+
snapshots.append(result)
|
|
148
|
+
store_snapshot(result)
|
|
149
|
+
|
|
150
|
+
snapshots.sort(key=lambda s: s.name)
|
|
151
|
+
return snapshots
|
|
152
|
+
|
|
153
|
+
def run(self, duration: int | None = None) -> None:
|
|
154
|
+
"""Poll containers in a background thread, refresh display every second.
|
|
155
|
+
|
|
156
|
+
Polling runs in a daemon thread using a thread pool.
|
|
157
|
+
The display loop redraws every second from the latest shared snapshot.
|
|
158
|
+
"""
|
|
159
|
+
start = time.monotonic()
|
|
160
|
+
stop_event = threading.Event()
|
|
161
|
+
|
|
162
|
+
def _poll_loop() -> None:
|
|
163
|
+
while not stop_event.is_set():
|
|
164
|
+
snaps = self.poll()
|
|
165
|
+
with self._lock:
|
|
166
|
+
self._latest_snapshots = snaps
|
|
167
|
+
stop_event.wait(self.interval)
|
|
168
|
+
|
|
169
|
+
poll_thread = threading.Thread(target=_poll_loop, daemon=True)
|
|
170
|
+
poll_thread.start()
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
with Live(console=console, refresh_per_second=4, screen=True) as live:
|
|
174
|
+
while True:
|
|
175
|
+
elapsed = time.monotonic() - start
|
|
176
|
+
if duration and elapsed >= duration:
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
with self._lock:
|
|
180
|
+
snaps = list(self._latest_snapshots)
|
|
181
|
+
|
|
182
|
+
live.update(build_monitor_layout(snaps))
|
|
183
|
+
time.sleep(1)
|
|
184
|
+
|
|
185
|
+
except KeyboardInterrupt:
|
|
186
|
+
pass
|
|
187
|
+
finally:
|
|
188
|
+
stop_event.set()
|
|
189
|
+
|
|
190
|
+
elapsed = time.monotonic() - start
|
|
191
|
+
console.print(f"\n[yellow]Monitoring stopped after {elapsed:.0f}s.[/]")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def run_monitor(interval: int = 1, duration: int | None = None) -> None:
|
|
195
|
+
"""Create a ContainerMonitor and start polling."""
|
|
196
|
+
monitor = ContainerMonitor(interval=interval)
|
|
197
|
+
monitor.run(duration=duration)
|
core/monitor/display.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from rich.align import Align
|
|
4
|
+
from rich.layout import Layout
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
from rich.table import Table
|
|
7
|
+
from rich.text import Text
|
|
8
|
+
|
|
9
|
+
from core.monitor.snapshot import (
|
|
10
|
+
ContainerSnapshot,
|
|
11
|
+
MEM_WARNING_PCT,
|
|
12
|
+
MEM_CRITICAL_PCT,
|
|
13
|
+
)
|
|
14
|
+
from core.utils import format_bytes
|
|
15
|
+
|
|
16
|
+
_BLOCKS = " ▏▎▍▌▋▊▉█"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _format_uptime(seconds: float) -> str:
|
|
20
|
+
"""Convert seconds to a human-readable uptime string."""
|
|
21
|
+
if seconds < 60:
|
|
22
|
+
return f"{seconds:.0f}s"
|
|
23
|
+
if seconds < 3600:
|
|
24
|
+
return f"{seconds / 60:.0f}m"
|
|
25
|
+
hours = seconds // 3600
|
|
26
|
+
mins = (seconds % 3600) // 60
|
|
27
|
+
if hours < 24:
|
|
28
|
+
return f"{hours:.0f}h {mins:.0f}m"
|
|
29
|
+
days = hours // 24
|
|
30
|
+
hours = hours % 24
|
|
31
|
+
return f"{days:.0f}d {hours:.0f}h"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _make_bar(value: float, max_value: float, width: int, color: str) -> Text:
|
|
35
|
+
"""Build a Unicode bar with sub-character precision.
|
|
36
|
+
|
|
37
|
+
Uses eighth-block characters (▏▎▍▌▋▊▉█) so a 30-char bar
|
|
38
|
+
effectively has 240 discrete steps of resolution.
|
|
39
|
+
"""
|
|
40
|
+
if max_value <= 0:
|
|
41
|
+
return Text("░" * width, style="dim")
|
|
42
|
+
|
|
43
|
+
ratio = min(value / max_value, 1.0)
|
|
44
|
+
total_eighths = int(ratio * width * 8)
|
|
45
|
+
full = total_eighths // 8
|
|
46
|
+
frac = total_eighths % 8
|
|
47
|
+
empty = width - full - (1 if frac else 0)
|
|
48
|
+
|
|
49
|
+
bar = Text()
|
|
50
|
+
bar.append("█" * full, style=color)
|
|
51
|
+
if frac:
|
|
52
|
+
bar.append(_BLOCKS[frac], style=color)
|
|
53
|
+
bar.append("░" * empty, style="dim")
|
|
54
|
+
return bar
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _cpu_color(pct: float) -> str:
|
|
58
|
+
"""Return a Rich style for a CPU percentage."""
|
|
59
|
+
if pct > 80:
|
|
60
|
+
return "bold red"
|
|
61
|
+
if pct > 50:
|
|
62
|
+
return "yellow"
|
|
63
|
+
return "green"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _mem_color(pct: float) -> str:
|
|
67
|
+
"""Return a Rich style for a memory percentage."""
|
|
68
|
+
if pct > MEM_CRITICAL_PCT:
|
|
69
|
+
return "bold red"
|
|
70
|
+
if pct > MEM_WARNING_PCT:
|
|
71
|
+
return "yellow"
|
|
72
|
+
return "cyan"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def build_monitor_layout(snapshots: list[ContainerSnapshot]) -> Layout:
|
|
76
|
+
"""Build a full-screen Rich Layout with bar graphs and panels."""
|
|
77
|
+
n = max(len(snapshots), 1)
|
|
78
|
+
BAR_WIDTH = 30
|
|
79
|
+
|
|
80
|
+
header_text = Text()
|
|
81
|
+
header_text.append("DockerBrain", style="bold cyan")
|
|
82
|
+
header_text.append(" Monitor", style="dim")
|
|
83
|
+
|
|
84
|
+
header = Panel(
|
|
85
|
+
Align.center(header_text),
|
|
86
|
+
border_style="bright_blue",
|
|
87
|
+
style="on #1a1a2e",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
running = sum(1 for s in snapshots if s.status == "running")
|
|
91
|
+
idle = sum(1 for s in snapshots if s.is_idle)
|
|
92
|
+
healthy = sum(1 for s in snapshots if s.health_label == "HEALTHY")
|
|
93
|
+
warning = sum(1 for s in snapshots if s.health_label == "WARNING")
|
|
94
|
+
critical = sum(1 for s in snapshots if s.health_label == "CRITICAL")
|
|
95
|
+
|
|
96
|
+
total_cpu = sum(s.cpu_percent for s in snapshots)
|
|
97
|
+
total_mem = sum(s.mem_usage_mb for s in snapshots)
|
|
98
|
+
total_mem_limit = max((s.mem_limit_mb for s in snapshots), default=1)
|
|
99
|
+
avg_mem_pct = (total_mem / total_mem_limit) * 100 if snapshots else 0
|
|
100
|
+
total_cache = sum(s.mem_cache_mb for s in snapshots)
|
|
101
|
+
|
|
102
|
+
stats = Text()
|
|
103
|
+
stats.append(" Containers ", style="bold")
|
|
104
|
+
stats.append(f"{len(snapshots)}", style="bold cyan")
|
|
105
|
+
stats.append(" Running ", style="bold")
|
|
106
|
+
stats.append(f"{running}", style="bold green")
|
|
107
|
+
stats.append(" Idle ", style="bold")
|
|
108
|
+
stats.append(f"{idle}", style="bold red" if idle else "dim")
|
|
109
|
+
stats.append(" Healthy ", style="bold")
|
|
110
|
+
stats.append(f"{healthy}", style="bold green")
|
|
111
|
+
stats.append(" Warn ", style="bold")
|
|
112
|
+
stats.append(f"{warning}", style="bold yellow" if warning else "dim")
|
|
113
|
+
stats.append(" Crit ", style="bold")
|
|
114
|
+
stats.append(f"{critical}\n", style="bold red" if critical else "dim")
|
|
115
|
+
stats.append(" CPU ", style="bold")
|
|
116
|
+
stats.append(f"{total_cpu:.1f}%", style=_cpu_color(total_cpu))
|
|
117
|
+
stats.append(" Mem ", style="bold")
|
|
118
|
+
stats.append(f"{total_mem:.0f}/{total_mem_limit:.0f} MB", style=_mem_color(avg_mem_pct))
|
|
119
|
+
stats.append(" Cache ", style="bold")
|
|
120
|
+
stats.append(f"{total_cache:.0f} MB", style="dim")
|
|
121
|
+
|
|
122
|
+
overview_panel = Panel(
|
|
123
|
+
stats,
|
|
124
|
+
title="[bold cyan]System Overview[/]",
|
|
125
|
+
border_style="cyan",
|
|
126
|
+
padding=(0, 1),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
table = Table(expand=True, border_style="dim", show_lines=False, pad_edge=False)
|
|
130
|
+
table.add_column("Container", style="bold cyan", no_wrap=True)
|
|
131
|
+
table.add_column("Image", style="dim", no_wrap=True, max_width=28)
|
|
132
|
+
table.add_column("Status", justify="center")
|
|
133
|
+
table.add_column("Uptime", justify="right")
|
|
134
|
+
table.add_column("Health", justify="center")
|
|
135
|
+
|
|
136
|
+
for s in snapshots:
|
|
137
|
+
dot_style = "green" if s.status == "running" else "red"
|
|
138
|
+
status_text = Text()
|
|
139
|
+
status_text.append("● ", style=dot_style)
|
|
140
|
+
status_text.append(s.status)
|
|
141
|
+
|
|
142
|
+
table.add_row(
|
|
143
|
+
s.name,
|
|
144
|
+
s.image_tag,
|
|
145
|
+
status_text,
|
|
146
|
+
_format_uptime(s.uptime_seconds),
|
|
147
|
+
Text(s.health_label, style=f"bold {s.health_style}"),
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if not snapshots:
|
|
151
|
+
table.add_row(
|
|
152
|
+
Text("No running containers", style="yellow"),
|
|
153
|
+
"", "", "", "",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
table_panel = Panel(
|
|
157
|
+
table,
|
|
158
|
+
title="[bold cyan]Containers[/]",
|
|
159
|
+
border_style="cyan",
|
|
160
|
+
padding=(0, 1),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
cpu_content = Text()
|
|
164
|
+
for i, s in enumerate(snapshots):
|
|
165
|
+
color = _cpu_color(s.cpu_percent)
|
|
166
|
+
label = s.name if len(s.name) <= 14 else s.name[:11] + "…"
|
|
167
|
+
cpu_content.append(f" {label:<14} ", style="cyan")
|
|
168
|
+
cpu_content.append_text(_make_bar(s.cpu_percent, 100, BAR_WIDTH, color))
|
|
169
|
+
cpu_content.append(f" {s.cpu_percent:>5.1f}%", style=f"bold {color}")
|
|
170
|
+
if s.is_idle:
|
|
171
|
+
cpu_content.append(" idle", style="bold red")
|
|
172
|
+
if i < len(snapshots) - 1:
|
|
173
|
+
cpu_content.append("\n")
|
|
174
|
+
|
|
175
|
+
if not snapshots:
|
|
176
|
+
cpu_content.append(" Waiting for containers…", style="dim italic")
|
|
177
|
+
|
|
178
|
+
cpu_panel = Panel(
|
|
179
|
+
cpu_content,
|
|
180
|
+
title="[bold green]CPU Usage[/]",
|
|
181
|
+
border_style="green",
|
|
182
|
+
padding=(0, 1),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
mem_content = Text()
|
|
186
|
+
for i, s in enumerate(snapshots):
|
|
187
|
+
color = _mem_color(s.mem_percent)
|
|
188
|
+
label = s.name if len(s.name) <= 14 else s.name[:11] + "…"
|
|
189
|
+
mem_content.append(f" {label:<14} ", style="cyan")
|
|
190
|
+
mem_content.append_text(_make_bar(s.mem_percent, 100, BAR_WIDTH, color))
|
|
191
|
+
mem_content.append(
|
|
192
|
+
f" {s.mem_usage_mb:>6.1f}/{s.mem_limit_mb:>.0f}MB",
|
|
193
|
+
style=color,
|
|
194
|
+
)
|
|
195
|
+
mem_content.append(f" {s.mem_percent:.0f}%", style=f"bold {color}")
|
|
196
|
+
if i < len(snapshots) - 1:
|
|
197
|
+
mem_content.append("\n")
|
|
198
|
+
|
|
199
|
+
if not snapshots:
|
|
200
|
+
mem_content.append(" Waiting for containers…", style="dim italic")
|
|
201
|
+
|
|
202
|
+
mem_panel = Panel(
|
|
203
|
+
mem_content,
|
|
204
|
+
title="[bold blue]Memory Usage[/]",
|
|
205
|
+
border_style="blue",
|
|
206
|
+
padding=(0, 1),
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
net_table = Table(expand=True, show_header=True, show_lines=False, border_style="dim", pad_edge=False)
|
|
210
|
+
net_table.add_column("Container", style="cyan", no_wrap=True)
|
|
211
|
+
net_table.add_column("↓ Recv", justify="right", style="green")
|
|
212
|
+
net_table.add_column("↑ Sent", justify="right", style="yellow")
|
|
213
|
+
net_table.add_column("Rx Pkts", justify="right", style="blue")
|
|
214
|
+
net_table.add_column("Tx Pkts", justify="right", style="blue")
|
|
215
|
+
net_table.add_column("Err", justify="right")
|
|
216
|
+
net_table.add_column("Drop", justify="right")
|
|
217
|
+
|
|
218
|
+
total_rx = total_tx = 0
|
|
219
|
+
total_rx_pkts = total_tx_pkts = 0
|
|
220
|
+
total_errs = total_drops = 0
|
|
221
|
+
for s in snapshots:
|
|
222
|
+
total_rx += s.net_rx_bytes
|
|
223
|
+
total_tx += s.net_tx_bytes
|
|
224
|
+
total_rx_pkts += s.net_rx_packets
|
|
225
|
+
total_tx_pkts += s.net_tx_packets
|
|
226
|
+
errs = s.net_rx_errors + s.net_tx_errors
|
|
227
|
+
drops = s.net_rx_dropped + s.net_tx_dropped
|
|
228
|
+
total_errs += errs
|
|
229
|
+
total_drops += drops
|
|
230
|
+
|
|
231
|
+
err_style = "bold red" if errs > 0 else "dim"
|
|
232
|
+
drop_style = "bold red" if drops > 0 else "dim"
|
|
233
|
+
|
|
234
|
+
net_table.add_row(
|
|
235
|
+
s.name,
|
|
236
|
+
format_bytes(s.net_rx_bytes),
|
|
237
|
+
format_bytes(s.net_tx_bytes),
|
|
238
|
+
f"{s.net_rx_packets:,}",
|
|
239
|
+
f"{s.net_tx_packets:,}",
|
|
240
|
+
Text(str(errs), style=err_style),
|
|
241
|
+
Text(str(drops), style=drop_style),
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
if not snapshots:
|
|
246
|
+
net_table.add_row(
|
|
247
|
+
Text("Waiting for containers…", style="dim italic"),
|
|
248
|
+
"", "", "", "", "", "",
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
net_panel = Panel(
|
|
252
|
+
net_table,
|
|
253
|
+
title="[bold magenta]Network I/O[/]",
|
|
254
|
+
border_style="magenta",
|
|
255
|
+
padding=(0, 1),
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
layout = Layout()
|
|
259
|
+
|
|
260
|
+
layout.split_column(
|
|
261
|
+
Layout(name="header", size=3),
|
|
262
|
+
Layout(name="overview", size=4),
|
|
263
|
+
Layout(name="table", ratio=2),
|
|
264
|
+
Layout(name="bars_row", ratio=1),
|
|
265
|
+
Layout(name="network", ratio=2),
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
layout["header"].update(header)
|
|
269
|
+
layout["overview"].update(overview_panel)
|
|
270
|
+
layout["table"].update(table_panel)
|
|
271
|
+
layout["bars_row"].split_row(
|
|
272
|
+
Layout(name="cpu", ratio=1),
|
|
273
|
+
Layout(name="mem", ratio=1),
|
|
274
|
+
)
|
|
275
|
+
layout["cpu"].update(cpu_panel)
|
|
276
|
+
layout["mem"].update(mem_panel)
|
|
277
|
+
layout["network"].update(net_panel)
|
|
278
|
+
|
|
279
|
+
return layout
|
core/monitor/snapshot.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
|
|
6
|
+
IDLE_CPU_THRESHOLD: float = 0.5
|
|
7
|
+
IDLE_CONSECUTIVE_POLLS: int = 10
|
|
8
|
+
MEM_WARNING_PCT: float = 70.0
|
|
9
|
+
MEM_CRITICAL_PCT: float = 85.0
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ContainerSnapshot:
|
|
14
|
+
"""One point-in-time snapshot of a container's resource usage."""
|
|
15
|
+
|
|
16
|
+
name: str
|
|
17
|
+
status: str
|
|
18
|
+
cpu_percent: float
|
|
19
|
+
mem_usage_mb: float
|
|
20
|
+
mem_limit_mb: float
|
|
21
|
+
mem_percent: float
|
|
22
|
+
net_rx_bytes: int
|
|
23
|
+
net_tx_bytes: int
|
|
24
|
+
net_rx_packets: int = 0
|
|
25
|
+
net_tx_packets: int = 0
|
|
26
|
+
net_rx_errors: int = 0
|
|
27
|
+
net_tx_errors: int = 0
|
|
28
|
+
net_rx_dropped: int = 0
|
|
29
|
+
net_tx_dropped: int = 0
|
|
30
|
+
mem_cache_mb: float = 0.0
|
|
31
|
+
is_idle: bool = False
|
|
32
|
+
idle_polls: int = 0
|
|
33
|
+
uptime_seconds: float = 0.0
|
|
34
|
+
restart_count: int = 0
|
|
35
|
+
image_tag: str = ""
|
|
36
|
+
timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def health_style(self) -> str:
|
|
40
|
+
"""Return a Rich style string based on health status."""
|
|
41
|
+
if self.is_idle:
|
|
42
|
+
return "red"
|
|
43
|
+
if self.mem_percent > MEM_CRITICAL_PCT:
|
|
44
|
+
return "red"
|
|
45
|
+
if self.mem_percent > MEM_WARNING_PCT:
|
|
46
|
+
return "yellow"
|
|
47
|
+
return "green"
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def health_label(self) -> str:
|
|
51
|
+
if self.is_idle:
|
|
52
|
+
return "IDLE"
|
|
53
|
+
if self.mem_percent > MEM_CRITICAL_PCT:
|
|
54
|
+
return "CRITICAL"
|
|
55
|
+
if self.mem_percent > MEM_WARNING_PCT:
|
|
56
|
+
return "WARNING"
|
|
57
|
+
return "HEALTHY"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from core.optimizer.rules import (
|
|
2
|
+
Severity,
|
|
3
|
+
Suggestion,
|
|
4
|
+
Rule,
|
|
5
|
+
IdleContainerRule,
|
|
6
|
+
MemoryHogRule,
|
|
7
|
+
NoMemoryLimitRule,
|
|
8
|
+
HighRestartRule,
|
|
9
|
+
StaleImageRule,
|
|
10
|
+
)
|
|
11
|
+
from core.optimizer.engine import RuleBasedOptimizer
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"Severity",
|
|
15
|
+
"Suggestion",
|
|
16
|
+
"Rule",
|
|
17
|
+
"IdleContainerRule",
|
|
18
|
+
"MemoryHogRule",
|
|
19
|
+
"NoMemoryLimitRule",
|
|
20
|
+
"HighRestartRule",
|
|
21
|
+
"StaleImageRule",
|
|
22
|
+
"RuleBasedOptimizer",
|
|
23
|
+
]
|
core/optimizer/engine.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
import docker
|
|
6
|
+
from docker.errors import APIError, DockerException
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.panel import Panel
|
|
9
|
+
|
|
10
|
+
from core.optimizer.rules import (
|
|
11
|
+
Suggestion,
|
|
12
|
+
IdleContainerRule,
|
|
13
|
+
MemoryHogRule,
|
|
14
|
+
NoMemoryLimitRule,
|
|
15
|
+
HighRestartRule,
|
|
16
|
+
StaleImageRule,
|
|
17
|
+
Rule,
|
|
18
|
+
)
|
|
19
|
+
from core.utils import get_docker_offline_hint
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from docker.models.containers import Container
|
|
23
|
+
|
|
24
|
+
console = Console()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RuleBasedOptimizer:
|
|
28
|
+
"""Evaluates all registered rules against running containers."""
|
|
29
|
+
|
|
30
|
+
def __init__(self) -> None:
|
|
31
|
+
self.client = self._connect()
|
|
32
|
+
self.rules: list[Rule] = [
|
|
33
|
+
IdleContainerRule(),
|
|
34
|
+
MemoryHogRule(),
|
|
35
|
+
NoMemoryLimitRule(),
|
|
36
|
+
HighRestartRule(),
|
|
37
|
+
StaleImageRule(),
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def _connect() -> docker.DockerClient:
|
|
42
|
+
try:
|
|
43
|
+
return docker.from_env()
|
|
44
|
+
except DockerException as exc:
|
|
45
|
+
console.print(
|
|
46
|
+
Panel(
|
|
47
|
+
"[red bold]Could not connect to Docker daemon.[/]\n\n"
|
|
48
|
+
f"{get_docker_offline_hint()}\n\n",
|
|
49
|
+
title="[bold red]Docker Unavailable[/]",
|
|
50
|
+
border_style="red",
|
|
51
|
+
expand=False,
|
|
52
|
+
)
|
|
53
|
+
)
|
|
54
|
+
raise SystemExit(3) from exc
|
|
55
|
+
|
|
56
|
+
def analyze(self, container_name: str | None = None) -> list[Suggestion]:
|
|
57
|
+
"""Run every rule against target containers and return suggestions."""
|
|
58
|
+
containers: list[Container] = (
|
|
59
|
+
[self.client.containers.get(container_name)]
|
|
60
|
+
if container_name
|
|
61
|
+
else self.client.containers.list()
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if not containers:
|
|
65
|
+
return []
|
|
66
|
+
|
|
67
|
+
suggestions: list[Suggestion] = []
|
|
68
|
+
|
|
69
|
+
for ctr in containers:
|
|
70
|
+
try:
|
|
71
|
+
ctr.reload()
|
|
72
|
+
raw_stats = ctr.stats(stream=False)
|
|
73
|
+
except (APIError, DockerException) as exc:
|
|
74
|
+
console.print(
|
|
75
|
+
f"[yellow] Skipping {ctr.name}: {exc}[/]",
|
|
76
|
+
highlight=False,
|
|
77
|
+
)
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
for rule in self.rules:
|
|
81
|
+
suggestions.extend(rule.evaluate(ctr, raw_stats))
|
|
82
|
+
|
|
83
|
+
suggestions.sort(key=lambda s: s.severity.rank)
|
|
84
|
+
return suggestions
|