raijin-server 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- raijin_server/__init__.py +1 -1
- raijin_server/cli.py +147 -2
- raijin_server/config.py +4 -4
- raijin_server/healthchecks.py +22 -0
- raijin_server/modules/calico.py +68 -3
- raijin_server/modules/cert_manager.py +33 -7
- raijin_server/modules/full_install.py +228 -1
- raijin_server/modules/kubernetes.py +18 -1
- raijin_server/modules/network.py +3 -3
- raijin_server/modules/prometheus.py +90 -5
- raijin_server/modules/sanitize.py +49 -1
- raijin_server/scripts/install.sh +3 -2
- raijin_server/scripts/log_size_metric.sh +17 -13
- raijin_server/scripts/pre-deploy-check.sh +2 -1
- raijin_server/utils.py +37 -1
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/METADATA +96 -79
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/RECORD +21 -21
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/WHEEL +0 -0
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/entry_points.txt +0 -0
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
"""Instalacao completa e automatizada do ambiente produtivo."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
from typing import List
|
|
4
6
|
|
|
5
7
|
import typer
|
|
6
8
|
|
|
7
9
|
from raijin_server.utils import ExecutionContext, require_root
|
|
10
|
+
from raijin_server.healthchecks import run_health_check
|
|
8
11
|
from raijin_server.modules import (
|
|
9
12
|
bootstrap,
|
|
10
13
|
calico,
|
|
@@ -68,6 +71,196 @@ def _cert_manager_install_only(ctx: ExecutionContext) -> None:
|
|
|
68
71
|
)
|
|
69
72
|
|
|
70
73
|
|
|
74
|
+
def _confirm_colored(message: str, default: bool = True) -> bool:
|
|
75
|
+
"""Confirmação com destaque visual."""
|
|
76
|
+
styled = typer.style(message, fg=typer.colors.YELLOW, bold=True)
|
|
77
|
+
return typer.confirm(styled, default=default)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _select_steps_interactively() -> List[str] | None:
|
|
81
|
+
typer.secho("Selecione passos (separados por vírgula) ou ENTER para todos:", fg=typer.colors.CYAN)
|
|
82
|
+
typer.echo("Exemplo: kubernetes,calico,cert_manager,traefik")
|
|
83
|
+
answer = typer.prompt("Passos", default="").strip()
|
|
84
|
+
if not answer:
|
|
85
|
+
return None
|
|
86
|
+
steps = [s.strip() for s in answer.split(",") if s.strip()]
|
|
87
|
+
return steps or None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _kube_snapshot(ctx: ExecutionContext, events: int = 100, namespace: str | None = None) -> None:
|
|
91
|
+
"""Coleta snapshot rápido de cluster para debug (best-effort)."""
|
|
92
|
+
cmds = []
|
|
93
|
+
cmds.append(["kubectl", "get", "nodes", "-o", "wide"])
|
|
94
|
+
|
|
95
|
+
pods_cmd = ["kubectl", "get", "pods"]
|
|
96
|
+
if namespace:
|
|
97
|
+
pods_cmd += ["-n", namespace]
|
|
98
|
+
else:
|
|
99
|
+
pods_cmd.append("-A")
|
|
100
|
+
pods_cmd += ["-o", "wide"]
|
|
101
|
+
cmds.append(pods_cmd)
|
|
102
|
+
|
|
103
|
+
events_cmd = ["kubectl", "get", "events"]
|
|
104
|
+
if namespace:
|
|
105
|
+
events_cmd += ["-n", namespace]
|
|
106
|
+
else:
|
|
107
|
+
events_cmd.append("-A")
|
|
108
|
+
events_cmd += ["--sort-by=.lastTimestamp"]
|
|
109
|
+
cmds.append(events_cmd)
|
|
110
|
+
|
|
111
|
+
typer.secho("\n[DEBUG] Snapshot do cluster", fg=typer.colors.CYAN)
|
|
112
|
+
for cmd in cmds:
|
|
113
|
+
try:
|
|
114
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
|
115
|
+
typer.echo(f"$ {' '.join(cmd)}")
|
|
116
|
+
if result.stdout:
|
|
117
|
+
lines = result.stdout.strip().splitlines()
|
|
118
|
+
if cmd is events_cmd:
|
|
119
|
+
lines = lines[-events:]
|
|
120
|
+
typer.echo("\n".join(lines))
|
|
121
|
+
elif result.stderr:
|
|
122
|
+
typer.echo(result.stderr.strip())
|
|
123
|
+
except Exception as exc:
|
|
124
|
+
typer.secho(f"(snapshot falhou: {exc})", fg=typer.colors.YELLOW)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _run_cmd(title: str, cmd: List[str], ctx: ExecutionContext, tail: int | None = None) -> None:
|
|
128
|
+
"""Executa comando kubectl/helm best-effort para diagnosticos rapidos."""
|
|
129
|
+
typer.secho(f"\n[diagnose] {title}", fg=typer.colors.CYAN)
|
|
130
|
+
if ctx.dry_run:
|
|
131
|
+
typer.echo("[dry-run] comando nao executado")
|
|
132
|
+
return
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=40)
|
|
136
|
+
typer.echo(f"$ {' '.join(cmd)}")
|
|
137
|
+
output = result.stdout.strip() or result.stderr.strip()
|
|
138
|
+
if output:
|
|
139
|
+
lines = output.splitlines()
|
|
140
|
+
if tail:
|
|
141
|
+
lines = lines[-tail:]
|
|
142
|
+
typer.echo("\n".join(lines))
|
|
143
|
+
else:
|
|
144
|
+
typer.echo("(sem saida)")
|
|
145
|
+
except Exception as exc:
|
|
146
|
+
typer.secho(f"(falha ao executar: {exc})", fg=typer.colors.YELLOW)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _diag_namespace(ns: str, ctx: ExecutionContext, tail_events: int = 50) -> None:
|
|
150
|
+
_run_cmd(f"Pods em {ns}", ["kubectl", "get", "pods", "-n", ns, "-o", "wide"], ctx)
|
|
151
|
+
_run_cmd(f"Services em {ns}", ["kubectl", "get", "svc", "-n", ns], ctx)
|
|
152
|
+
_run_cmd(f"Deployments em {ns}", ["kubectl", "get", "deploy", "-n", ns], ctx)
|
|
153
|
+
_run_cmd(
|
|
154
|
+
f"Eventos em {ns}",
|
|
155
|
+
["kubectl", "get", "events", "-n", ns, "--sort-by=.lastTimestamp"],
|
|
156
|
+
ctx,
|
|
157
|
+
tail=tail_events,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _diag_calico(ctx: ExecutionContext) -> None:
|
|
162
|
+
ns = "kube-system"
|
|
163
|
+
_run_cmd("Calico DaemonSets", ["kubectl", "get", "ds", "-n", ns, "-o", "wide"], ctx)
|
|
164
|
+
_run_cmd("Calico pods", ["kubectl", "get", "pods", "-n", ns, "-l", "k8s-app=calico-node", "-o", "wide"], ctx)
|
|
165
|
+
_run_cmd("Calico typha", ["kubectl", "get", "pods", "-n", ns, "-l", "k8s-app=calico-typha", "-o", "wide"], ctx)
|
|
166
|
+
_run_cmd("Calico events", ["kubectl", "get", "events", "-n", ns, "--sort-by=.lastTimestamp"], ctx, tail=50)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _diag_secrets(ctx: ExecutionContext) -> None:
|
|
170
|
+
_diag_namespace("kube-system", ctx)
|
|
171
|
+
_diag_namespace("external-secrets", ctx)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _diag_prometheus(ctx: ExecutionContext) -> None:
|
|
175
|
+
ns = "observability"
|
|
176
|
+
_run_cmd("Prometheus pods", ["kubectl", "get", "pods", "-n", ns, "-l", "app.kubernetes.io/name=prometheus"], ctx)
|
|
177
|
+
_diag_namespace(ns, ctx)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _diag_grafana(ctx: ExecutionContext) -> None:
|
|
181
|
+
ns = "observability"
|
|
182
|
+
_run_cmd("Grafana svc", ["kubectl", "get", "svc", "-n", ns, "-l", "app.kubernetes.io/name=grafana"], ctx)
|
|
183
|
+
_diag_namespace(ns, ctx)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _diag_loki(ctx: ExecutionContext) -> None:
|
|
187
|
+
ns = "observability"
|
|
188
|
+
_run_cmd("Loki statefulsets", ["kubectl", "get", "sts", "-n", ns, "-l", "app.kubernetes.io/name=loki"], ctx)
|
|
189
|
+
_diag_namespace(ns, ctx)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _diag_traefik(ctx: ExecutionContext) -> None:
|
|
193
|
+
ns = "traefik"
|
|
194
|
+
_run_cmd("Traefik ingress", ["kubectl", "get", "ingress", "-n", ns], ctx)
|
|
195
|
+
_diag_namespace(ns, ctx)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _diag_observability_ingress(ctx: ExecutionContext) -> None:
|
|
199
|
+
ns = "observability"
|
|
200
|
+
_run_cmd("Ingress objects", ["kubectl", "get", "ingress", "-n", ns], ctx)
|
|
201
|
+
_diag_namespace(ns, ctx)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _diag_observability_dashboards(ctx: ExecutionContext) -> None:
|
|
205
|
+
ns = "observability"
|
|
206
|
+
_run_cmd("ConfigMaps dashboards", ["kubectl", "get", "configmap", "-n", ns, "-l", "raijin/dashboards=true"], ctx)
|
|
207
|
+
_diag_namespace(ns, ctx)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _diag_minio(ctx: ExecutionContext) -> None:
|
|
211
|
+
ns = "minio"
|
|
212
|
+
_diag_namespace(ns, ctx)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _diag_kafka(ctx: ExecutionContext) -> None:
|
|
216
|
+
ns = "kafka"
|
|
217
|
+
_run_cmd("Kafka pods", ["kubectl", "get", "pods", "-n", ns, "-o", "wide"], ctx)
|
|
218
|
+
_diag_namespace(ns, ctx)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _diag_velero(ctx: ExecutionContext) -> None:
|
|
222
|
+
ns = "velero"
|
|
223
|
+
_diag_namespace(ns, ctx)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _diag_kong(ctx: ExecutionContext) -> None:
|
|
227
|
+
ns = "kong"
|
|
228
|
+
_diag_namespace(ns, ctx)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
DIAG_HANDLERS = {
|
|
232
|
+
"cert_manager": cert_manager.diagnose,
|
|
233
|
+
"calico": _diag_calico,
|
|
234
|
+
"secrets": _diag_secrets,
|
|
235
|
+
"prometheus": _diag_prometheus,
|
|
236
|
+
"grafana": _diag_grafana,
|
|
237
|
+
"loki": _diag_loki,
|
|
238
|
+
"traefik": _diag_traefik,
|
|
239
|
+
"observability_ingress": _diag_observability_ingress,
|
|
240
|
+
"observability_dashboards": _diag_observability_dashboards,
|
|
241
|
+
"minio": _diag_minio,
|
|
242
|
+
"kafka": _diag_kafka,
|
|
243
|
+
"velero": _diag_velero,
|
|
244
|
+
"kong": _diag_kong,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _maybe_diagnose(name: str, ctx: ExecutionContext) -> None:
|
|
249
|
+
try:
|
|
250
|
+
if name in DIAG_HANDLERS:
|
|
251
|
+
DIAG_HANDLERS[name](ctx)
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
# fallback: health check se existir
|
|
255
|
+
ok = run_health_check(name, ctx)
|
|
256
|
+
if ok:
|
|
257
|
+
typer.secho(f"[diagnose] {name}: OK", fg=typer.colors.GREEN)
|
|
258
|
+
else:
|
|
259
|
+
typer.secho(f"[diagnose] {name}: falhou", fg=typer.colors.YELLOW)
|
|
260
|
+
except Exception as exc:
|
|
261
|
+
typer.secho(f"[diagnose] {name} falhou: {exc}", fg=typer.colors.YELLOW)
|
|
262
|
+
|
|
263
|
+
|
|
71
264
|
# Ordem de execucao dos modulos para instalacao completa
|
|
72
265
|
# Modulos marcados com skip_env podem ser pulados via variavel de ambiente
|
|
73
266
|
INSTALL_SEQUENCE = [
|
|
@@ -108,12 +301,25 @@ def run(ctx: ExecutionContext) -> None:
|
|
|
108
301
|
fg=typer.colors.CYAN,
|
|
109
302
|
)
|
|
110
303
|
|
|
304
|
+
steps_override = ctx.selected_steps
|
|
305
|
+
if steps_override is None and ctx.interactive_steps:
|
|
306
|
+
steps_override = _select_steps_interactively()
|
|
307
|
+
|
|
308
|
+
# Debug/diagnose menu simples
|
|
309
|
+
if not ctx.debug_snapshots and not ctx.post_diagnose:
|
|
310
|
+
typer.secho("Ativar modo debug (snapshots + diagnose pos-modulo)?", fg=typer.colors.YELLOW)
|
|
311
|
+
if typer.confirm("Habilitar debug?", default=False):
|
|
312
|
+
ctx.debug_snapshots = True
|
|
313
|
+
ctx.post_diagnose = True
|
|
314
|
+
|
|
111
315
|
# Mostra sequencia de instalacao
|
|
112
316
|
typer.echo("Sequencia de instalacao:")
|
|
113
317
|
for i, (name, _, desc, skip_env) in enumerate(INSTALL_SEQUENCE, 1):
|
|
114
318
|
suffix = ""
|
|
115
319
|
if skip_env and os.environ.get(skip_env, "").strip() in ("1", "true", "yes"):
|
|
116
320
|
suffix = " [SKIP]"
|
|
321
|
+
if steps_override and name not in steps_override:
|
|
322
|
+
suffix = " [IGNORADO]"
|
|
117
323
|
typer.echo(f" {i:2}. {name:25} - {desc}{suffix}")
|
|
118
324
|
|
|
119
325
|
typer.echo("")
|
|
@@ -126,7 +332,7 @@ def run(ctx: ExecutionContext) -> None:
|
|
|
126
332
|
typer.echo("")
|
|
127
333
|
|
|
128
334
|
if not ctx.dry_run:
|
|
129
|
-
if not
|
|
335
|
+
if not _confirm_colored("Deseja continuar com a instalacao completa?", default=True):
|
|
130
336
|
typer.echo("Instalacao cancelada.")
|
|
131
337
|
raise typer.Exit(code=0)
|
|
132
338
|
|
|
@@ -135,13 +341,25 @@ def run(ctx: ExecutionContext) -> None:
|
|
|
135
341
|
succeeded = []
|
|
136
342
|
skipped = []
|
|
137
343
|
|
|
344
|
+
cluster_ready = False
|
|
345
|
+
|
|
138
346
|
for i, (name, handler, desc, skip_env) in enumerate(INSTALL_SEQUENCE, 1):
|
|
347
|
+
if steps_override and name not in steps_override:
|
|
348
|
+
skipped.append(name)
|
|
349
|
+
typer.secho(f"⏭ {name} ignorado (fora da lista selecionada)", fg=typer.colors.YELLOW)
|
|
350
|
+
continue
|
|
351
|
+
|
|
139
352
|
# Verifica se modulo deve ser pulado via env
|
|
140
353
|
if skip_env and os.environ.get(skip_env, "").strip() in ("1", "true", "yes"):
|
|
141
354
|
skipped.append(name)
|
|
142
355
|
typer.secho(f"⏭ {name} pulado via {skip_env}=1", fg=typer.colors.YELLOW)
|
|
143
356
|
continue
|
|
144
357
|
|
|
358
|
+
if ctx.confirm_each_step:
|
|
359
|
+
if not _confirm_colored(f"Executar modulo '{name}' agora?", default=True):
|
|
360
|
+
skipped.append(name)
|
|
361
|
+
continue
|
|
362
|
+
|
|
145
363
|
typer.secho(
|
|
146
364
|
f"\n{'='*60}",
|
|
147
365
|
fg=typer.colors.CYAN,
|
|
@@ -160,6 +378,15 @@ def run(ctx: ExecutionContext) -> None:
|
|
|
160
378
|
handler(ctx)
|
|
161
379
|
succeeded.append(name)
|
|
162
380
|
typer.secho(f"✓ {name} concluido com sucesso", fg=typer.colors.GREEN)
|
|
381
|
+
|
|
382
|
+
if name == "kubernetes":
|
|
383
|
+
cluster_ready = True
|
|
384
|
+
|
|
385
|
+
if ctx.post_diagnose and cluster_ready:
|
|
386
|
+
_maybe_diagnose(name, ctx)
|
|
387
|
+
|
|
388
|
+
if ctx.debug_snapshots and cluster_ready:
|
|
389
|
+
_kube_snapshot(ctx, events=80)
|
|
163
390
|
except KeyboardInterrupt:
|
|
164
391
|
typer.secho(f"\n⚠ Instalacao interrompida pelo usuario no modulo '{name}'", fg=typer.colors.YELLOW)
|
|
165
392
|
raise typer.Exit(code=130)
|
|
@@ -146,6 +146,11 @@ def run(ctx: ExecutionContext) -> None:
|
|
|
146
146
|
enable_service("containerd", ctx)
|
|
147
147
|
enable_service("kubelet", ctx)
|
|
148
148
|
|
|
149
|
+
# Garante swap off antes de prosseguir (requisito kubeadm)
|
|
150
|
+
typer.echo("Desabilitando swap (requisito Kubernetes)...")
|
|
151
|
+
run_cmd(["swapoff", "-a"], ctx, check=False)
|
|
152
|
+
run_cmd("sed -i '/swap/d' /etc/fstab", ctx, use_shell=True, check=False)
|
|
153
|
+
|
|
149
154
|
# kubeadm exige ip_forward=1; sobrepoe ajuste de hardening para fase de cluster.
|
|
150
155
|
# Desabilita IPv6 completamente para evitar erros de preflight e simplificar rede
|
|
151
156
|
sysctl_k8s = """# Kubernetes network settings
|
|
@@ -164,7 +169,19 @@ net.ipv6.conf.lo.disable_ipv6=1
|
|
|
164
169
|
pod_cidr = typer.prompt("Pod CIDR", default="10.244.0.0/16")
|
|
165
170
|
service_cidr = typer.prompt("Service CIDR", default="10.96.0.0/12")
|
|
166
171
|
cluster_name = typer.prompt("Nome do cluster", default="raijin")
|
|
167
|
-
|
|
172
|
+
default_adv = "192.168.1.81"
|
|
173
|
+
advertise_address = typer.prompt("API advertise address", default=default_adv)
|
|
174
|
+
if advertise_address != default_adv:
|
|
175
|
+
typer.secho(
|
|
176
|
+
f"⚠ Para ambiente atual use {default_adv} (IP LAN, evita NAT).", fg=typer.colors.YELLOW
|
|
177
|
+
)
|
|
178
|
+
if not typer.confirm(f"Deseja forcar {default_adv}?", default=True):
|
|
179
|
+
typer.secho(
|
|
180
|
+
f"Usando valor informado: {advertise_address}. Certifique-se que todos os nos alcancem esse IP.",
|
|
181
|
+
fg=typer.colors.YELLOW,
|
|
182
|
+
)
|
|
183
|
+
else:
|
|
184
|
+
advertise_address = default_adv
|
|
168
185
|
|
|
169
186
|
kubeadm_config = f"""apiVersion: kubeadm.k8s.io/v1beta3
|
|
170
187
|
kind: ClusterConfiguration
|
raijin_server/modules/network.py
CHANGED
|
@@ -124,9 +124,9 @@ def run(ctx: ExecutionContext) -> None:
|
|
|
124
124
|
)
|
|
125
125
|
|
|
126
126
|
iface = typer.prompt("Interface", default="ens18")
|
|
127
|
-
address = typer.prompt("Endereco CIDR", default="192.168.
|
|
128
|
-
gateway = typer.prompt("Gateway", default="192.168.
|
|
129
|
-
dns = typer.prompt("DNS (separe por virgula)", default="
|
|
127
|
+
address = typer.prompt("Endereco CIDR", default="192.168.1.81/24")
|
|
128
|
+
gateway = typer.prompt("Gateway", default="192.168.1.254")
|
|
129
|
+
dns = typer.prompt("DNS (separe por virgula)", default="177.128.80.44,177.128.80.45")
|
|
130
130
|
|
|
131
131
|
dns_list = ",".join([item.strip() for item in dns.split(",") if item.strip()])
|
|
132
132
|
netplan_content = f"""network:
|
|
@@ -1,30 +1,115 @@
|
|
|
1
|
-
"""Configuracao do Prometheus Stack via Helm."""
|
|
1
|
+
"""Configuracao do Prometheus Stack via Helm (robust, production-ready)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
2
4
|
|
|
3
5
|
import typer
|
|
4
6
|
|
|
5
|
-
from raijin_server.utils import
|
|
7
|
+
from raijin_server.utils import (
|
|
8
|
+
ExecutionContext,
|
|
9
|
+
helm_upgrade_install,
|
|
10
|
+
kubectl_create_ns,
|
|
11
|
+
require_root,
|
|
12
|
+
run_cmd,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
DEFAULT_NAMESPACE = "observability"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _get_default_storage_class(ctx: ExecutionContext) -> str:
|
|
19
|
+
if ctx.dry_run:
|
|
20
|
+
return ""
|
|
21
|
+
result = run_cmd(
|
|
22
|
+
[
|
|
23
|
+
"kubectl",
|
|
24
|
+
"get",
|
|
25
|
+
"storageclass",
|
|
26
|
+
"-o",
|
|
27
|
+
"jsonpath={.items[?(@.metadata.annotations['storageclass.kubernetes.io/is-default-class']=='true')].metadata.name}",
|
|
28
|
+
],
|
|
29
|
+
ctx,
|
|
30
|
+
check=False,
|
|
31
|
+
)
|
|
32
|
+
return (result.stdout or "").strip()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _ensure_cluster_access(ctx: ExecutionContext) -> None:
|
|
36
|
+
if ctx.dry_run:
|
|
37
|
+
return
|
|
38
|
+
result = run_cmd(["kubectl", "cluster-info"], ctx, check=False)
|
|
39
|
+
if result.returncode != 0:
|
|
40
|
+
typer.secho("Cluster Kubernetes nao acessivel. Verifique kubeconfig/controle-plane.", fg=typer.colors.RED)
|
|
41
|
+
raise typer.Exit(code=1)
|
|
6
42
|
|
|
7
43
|
|
|
8
44
|
def run(ctx: ExecutionContext) -> None:
|
|
9
45
|
require_root(ctx)
|
|
46
|
+
_ensure_cluster_access(ctx)
|
|
47
|
+
|
|
10
48
|
typer.echo("Instalando kube-prometheus-stack via Helm...")
|
|
11
49
|
|
|
50
|
+
namespace = typer.prompt("Namespace destino", default=DEFAULT_NAMESPACE)
|
|
51
|
+
kubectl_create_ns(namespace, ctx)
|
|
52
|
+
|
|
53
|
+
default_sc = _get_default_storage_class(ctx)
|
|
54
|
+
enable_persistence = typer.confirm(
|
|
55
|
+
"Habilitar PVC para Prometheus e Alertmanager?", default=bool(default_sc)
|
|
56
|
+
)
|
|
57
|
+
|
|
12
58
|
values = [
|
|
13
59
|
"grafana.enabled=false",
|
|
14
60
|
"prometheus.prometheusSpec.retention=15d",
|
|
15
61
|
"prometheus.prometheusSpec.enableAdminAPI=true",
|
|
16
62
|
"prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false",
|
|
17
|
-
"prometheus.prometheusSpec.
|
|
18
|
-
"alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.resources.requests.storage=10Gi",
|
|
63
|
+
"prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false",
|
|
19
64
|
"defaultRules.create=true",
|
|
20
65
|
]
|
|
21
66
|
|
|
67
|
+
extra_args = ["--wait", "--timeout", "5m", "--atomic"]
|
|
68
|
+
|
|
69
|
+
chart_version = typer.prompt(
|
|
70
|
+
"Versao do chart (vazio para latest)",
|
|
71
|
+
default="",
|
|
72
|
+
).strip()
|
|
73
|
+
if chart_version:
|
|
74
|
+
extra_args.extend(["--version", chart_version])
|
|
75
|
+
|
|
76
|
+
if enable_persistence:
|
|
77
|
+
storage_class = typer.prompt(
|
|
78
|
+
"StorageClass para PVC",
|
|
79
|
+
default=default_sc or "",
|
|
80
|
+
).strip()
|
|
81
|
+
prom_size = typer.prompt("Tamanho PVC Prometheus", default="20Gi")
|
|
82
|
+
alert_size = typer.prompt("Tamanho PVC Alertmanager", default="10Gi")
|
|
83
|
+
|
|
84
|
+
if storage_class:
|
|
85
|
+
values.extend(
|
|
86
|
+
[
|
|
87
|
+
f"prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.storageClassName={storage_class}",
|
|
88
|
+
f"alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.storageClassName={storage_class}",
|
|
89
|
+
]
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
values.extend(
|
|
93
|
+
[
|
|
94
|
+
f"prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage={prom_size}",
|
|
95
|
+
f"alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.resources.requests.storage={alert_size}",
|
|
96
|
+
]
|
|
97
|
+
)
|
|
98
|
+
else:
|
|
99
|
+
typer.secho(
|
|
100
|
+
"PVC desativado: Prometheus/Alertmanager usarao volumes efemeros (sem retenção apos restart).",
|
|
101
|
+
fg=typer.colors.YELLOW,
|
|
102
|
+
)
|
|
103
|
+
|
|
22
104
|
helm_upgrade_install(
|
|
23
105
|
release="kube-prometheus-stack",
|
|
24
106
|
chart="kube-prometheus-stack",
|
|
25
|
-
namespace=
|
|
107
|
+
namespace=namespace,
|
|
26
108
|
repo="prometheus-community",
|
|
27
109
|
repo_url="https://prometheus-community.github.io/helm-charts",
|
|
28
110
|
ctx=ctx,
|
|
29
111
|
values=values,
|
|
112
|
+
extra_args=extra_args,
|
|
30
113
|
)
|
|
114
|
+
|
|
115
|
+
typer.secho("kube-prometheus-stack instalado com sucesso.", fg=typer.colors.GREEN)
|
|
@@ -7,7 +7,14 @@ from pathlib import Path
|
|
|
7
7
|
|
|
8
8
|
import typer
|
|
9
9
|
|
|
10
|
-
from raijin_server.utils import ExecutionContext, require_root, run_cmd
|
|
10
|
+
from raijin_server.utils import ExecutionContext, require_root, run_cmd, write_file
|
|
11
|
+
|
|
12
|
+
# Defaults alinhados com configuracao de rede solicitada
|
|
13
|
+
NETPLAN_IFACE = "ens18"
|
|
14
|
+
NETPLAN_ADDRESS = "192.168.1.81/24"
|
|
15
|
+
NETPLAN_GATEWAY = "192.168.1.254"
|
|
16
|
+
NETPLAN_DNS = "177.128.80.44,177.128.80.45"
|
|
17
|
+
NETPLAN_PATH = Path("/etc/netplan/01-raijin-static.yaml")
|
|
11
18
|
|
|
12
19
|
SYSTEMD_SERVICES = [
|
|
13
20
|
"kubelet",
|
|
@@ -48,6 +55,44 @@ APT_MARKERS = [
|
|
|
48
55
|
]
|
|
49
56
|
|
|
50
57
|
|
|
58
|
+
def _ensure_netplan(ctx: ExecutionContext) -> None:
|
|
59
|
+
"""Garante que o netplan esteja com IP fixo esperado; se ja estiver, mostra OK."""
|
|
60
|
+
|
|
61
|
+
desired = f"""network:
|
|
62
|
+
version: 2
|
|
63
|
+
renderer: networkd
|
|
64
|
+
ethernets:
|
|
65
|
+
{NETPLAN_IFACE}:
|
|
66
|
+
dhcp4: false
|
|
67
|
+
addresses: [{NETPLAN_ADDRESS}]
|
|
68
|
+
gateway4: {NETPLAN_GATEWAY}
|
|
69
|
+
nameservers:
|
|
70
|
+
addresses: [{NETPLAN_DNS}]
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
existing = None
|
|
74
|
+
if NETPLAN_PATH.exists():
|
|
75
|
+
try:
|
|
76
|
+
existing = NETPLAN_PATH.read_text()
|
|
77
|
+
except Exception:
|
|
78
|
+
existing = None
|
|
79
|
+
|
|
80
|
+
if existing and all(x in existing for x in (NETPLAN_ADDRESS, NETPLAN_GATEWAY, NETPLAN_DNS)):
|
|
81
|
+
typer.secho(
|
|
82
|
+
f"\n✓ Netplan ja configurado com {NETPLAN_ADDRESS} / gw {NETPLAN_GATEWAY} / dns {NETPLAN_DNS}",
|
|
83
|
+
fg=typer.colors.GREEN,
|
|
84
|
+
)
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
typer.echo("Aplicando netplan padrao antes da limpeza...")
|
|
88
|
+
write_file(NETPLAN_PATH, desired, ctx)
|
|
89
|
+
run_cmd(["netplan", "apply"], ctx, check=False)
|
|
90
|
+
typer.secho(
|
|
91
|
+
f"✓ Netplan ajustado para {NETPLAN_ADDRESS} (gw {NETPLAN_GATEWAY}, dns {NETPLAN_DNS})",
|
|
92
|
+
fg=typer.colors.GREEN,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
51
96
|
def _stop_services(ctx: ExecutionContext) -> None:
|
|
52
97
|
typer.echo("Parando serviços relacionados (kubelet, containerd)...")
|
|
53
98
|
for service in SYSTEMD_SERVICES:
|
|
@@ -131,6 +176,9 @@ def run(ctx: ExecutionContext) -> None:
|
|
|
131
176
|
typer.echo("Sanitizacao cancelada pelo usuario.")
|
|
132
177
|
return
|
|
133
178
|
|
|
179
|
+
# Primeiro passo: garantir netplan consistente, sem quebrar ao limpar
|
|
180
|
+
_ensure_netplan(ctx)
|
|
181
|
+
|
|
134
182
|
_stop_services(ctx)
|
|
135
183
|
_kubeadm_reset(ctx)
|
|
136
184
|
_flush_iptables(ctx)
|
raijin_server/scripts/install.sh
CHANGED
|
@@ -38,7 +38,7 @@ echo "Escolha o tipo de instalação:"
|
|
|
38
38
|
echo " 1) Global (requer sudo, todos os usuários)"
|
|
39
39
|
echo " 2) Virtual env (recomendado para desenvolvimento)"
|
|
40
40
|
echo " 3) User install (apenas usuário atual)"
|
|
41
|
-
read -p "Opção [2]: " INSTALL_TYPE
|
|
41
|
+
read -r -p "Opção [2]: " INSTALL_TYPE
|
|
42
42
|
INSTALL_TYPE=${INSTALL_TYPE:-2}
|
|
43
43
|
|
|
44
44
|
echo ""
|
|
@@ -51,6 +51,7 @@ case $INSTALL_TYPE in
|
|
|
51
51
|
2)
|
|
52
52
|
echo -e "${YELLOW}Criando virtual environment...${NC}"
|
|
53
53
|
python3 -m venv .venv
|
|
54
|
+
# shellcheck disable=SC1091
|
|
54
55
|
source .venv/bin/activate
|
|
55
56
|
pip install --upgrade pip
|
|
56
57
|
pip install -e .
|
|
@@ -73,7 +74,7 @@ EOF
|
|
|
73
74
|
|
|
74
75
|
# Adicionar ao PATH se necessário
|
|
75
76
|
if [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
|
|
76
|
-
echo
|
|
77
|
+
echo "export PATH=\"$HOME/.local/bin:$PATH\"" >> ~/.bashrc
|
|
77
78
|
echo -e "${YELLOW}⚠${NC} Adicionado $HOME/.local/bin ao PATH"
|
|
78
79
|
echo "Execute: source ~/.bashrc"
|
|
79
80
|
fi
|
|
@@ -11,21 +11,25 @@ OUTPUT=${RAIJIN_METRIC_FILE:-/var/lib/node_exporter/textfile_collector/raijin_lo
|
|
|
11
11
|
# Calcula soma de todos os logs (principal + rotações)
|
|
12
12
|
TOTAL_BYTES=0
|
|
13
13
|
shopt -s nullglob
|
|
14
|
+
|
|
15
|
+
METRICS_TMP=$(mktemp)
|
|
16
|
+
trap 'rm -f "$METRICS_TMP"' EXIT
|
|
17
|
+
|
|
14
18
|
for f in "$LOG_DIR"/$LOG_PATTERN; do
|
|
15
19
|
size=$(stat -c%s "$f" 2>/dev/null || echo 0)
|
|
16
20
|
TOTAL_BYTES=$((TOTAL_BYTES + size))
|
|
17
21
|
if [[ "$f" =~ raijin-server\.log(\.\d+)?$ ]]; then
|
|
18
|
-
printf "raijin_log_size_bytes{file=\"%s\"} %d\n" "$(basename "$f")" "$size"
|
|
22
|
+
printf "raijin_log_size_bytes{file=\"%s\"} %d\n" "$(basename "$f")" "$size" >> "$METRICS_TMP"
|
|
19
23
|
fi
|
|
20
|
-
done
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
}
|
|
24
|
+
done
|
|
25
|
+
|
|
26
|
+
# Escreve métricas no arquivo final
|
|
27
|
+
mkdir -p "$(dirname "$OUTPUT")"
|
|
28
|
+
{
|
|
29
|
+
echo "# HELP raijin_log_size_bytes Tamanho dos logs do raijin-server (bytes)"
|
|
30
|
+
echo "# TYPE raijin_log_size_bytes gauge"
|
|
31
|
+
cat "$METRICS_TMP"
|
|
32
|
+
echo "# HELP raijin_log_size_total_bytes Soma dos logs do raijin-server (bytes)"
|
|
33
|
+
echo "# TYPE raijin_log_size_total_bytes gauge"
|
|
34
|
+
echo "raijin_log_size_total_bytes ${TOTAL_BYTES}"
|
|
35
|
+
} > "$OUTPUT"
|
|
@@ -49,6 +49,7 @@ fi
|
|
|
49
49
|
echo ""
|
|
50
50
|
echo "2. Verificando Sistema Operacional..."
|
|
51
51
|
if [ -f /etc/os-release ]; then
|
|
52
|
+
# shellcheck disable=SC1091
|
|
52
53
|
. /etc/os-release
|
|
53
54
|
if [[ "$ID" == "ubuntu" ]]; then
|
|
54
55
|
VERSION_NUM=$(echo "$VERSION_ID" | cut -d. -f1)
|
|
@@ -152,7 +153,7 @@ STATE_DIRS=("/var/lib/raijin-server/state" "$HOME/.local/share/raijin-server/sta
|
|
|
152
153
|
FOUND_STATE=0
|
|
153
154
|
for dir in "${STATE_DIRS[@]}"; do
|
|
154
155
|
if [[ -d "$dir" ]]; then
|
|
155
|
-
MODULE_COUNT=$(
|
|
156
|
+
MODULE_COUNT=$(find "$dir" -maxdepth 1 -name '*.done' -type f 2>/dev/null | wc -l)
|
|
156
157
|
if [[ $MODULE_COUNT -gt 0 ]]; then
|
|
157
158
|
check_pass "$MODULE_COUNT modulos concluidos (em $dir)"
|
|
158
159
|
FOUND_STATE=1
|
raijin_server/utils.py
CHANGED
|
@@ -29,9 +29,38 @@ BACKUP_COUNT = int(os.environ.get("RAIJIN_LOG_BACKUP_COUNT", 5))
|
|
|
29
29
|
logger = logging.getLogger("raijin-server")
|
|
30
30
|
logger.setLevel(logging.INFO)
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
|
|
33
|
+
def _build_file_handler() -> RotatingFileHandler:
|
|
34
|
+
"""Cria handler com fallback para $HOME quando /var/log exige root."""
|
|
35
|
+
try:
|
|
36
|
+
return RotatingFileHandler(LOG_FILE, maxBytes=MAX_LOG_BYTES, backupCount=BACKUP_COUNT)
|
|
37
|
+
except PermissionError:
|
|
38
|
+
fallback = Path.home() / ".raijin-server.log"
|
|
39
|
+
fallback.parent.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
return RotatingFileHandler(fallback, maxBytes=MAX_LOG_BYTES, backupCount=BACKUP_COUNT)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
file_handler = _build_file_handler()
|
|
33
44
|
stream_handler = logging.StreamHandler()
|
|
34
45
|
|
|
46
|
+
|
|
47
|
+
def active_log_file() -> Path:
|
|
48
|
+
return Path(getattr(file_handler, "baseFilename", LOG_FILE))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def available_log_files() -> list[Path]:
|
|
52
|
+
base = active_log_file()
|
|
53
|
+
pattern = base.name + "*"
|
|
54
|
+
return [p for p in sorted(base.parent.glob(pattern)) if p.is_file()]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def page_text(content: str) -> None:
|
|
58
|
+
pager = shutil.which("less")
|
|
59
|
+
if pager:
|
|
60
|
+
subprocess.run([pager, "-R"], input=content, text=True, check=False)
|
|
61
|
+
else:
|
|
62
|
+
typer.echo(content)
|
|
63
|
+
|
|
35
64
|
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
36
65
|
file_handler.setFormatter(formatter)
|
|
37
66
|
stream_handler.setFormatter(formatter)
|
|
@@ -57,6 +86,13 @@ class ExecutionContext:
|
|
|
57
86
|
timeout: int = 600 # 10 min for slow connections
|
|
58
87
|
errors: list = field(default_factory=list)
|
|
59
88
|
warnings: list = field(default_factory=list)
|
|
89
|
+
# Controle interativo/diagnostico
|
|
90
|
+
selected_steps: list[str] | None = None
|
|
91
|
+
confirm_each_step: bool = False
|
|
92
|
+
debug_snapshots: bool = False
|
|
93
|
+
post_diagnose: bool = False
|
|
94
|
+
color_prompts: bool = True
|
|
95
|
+
interactive_steps: bool = False
|
|
60
96
|
|
|
61
97
|
|
|
62
98
|
def resolve_script_path(script_name: str) -> Path:
|