raijin-server 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,13 @@
1
1
  """Instalacao completa e automatizada do ambiente produtivo."""
2
2
 
3
3
  import os
4
+ import subprocess
5
+ from typing import List
4
6
 
5
7
  import typer
6
8
 
7
9
  from raijin_server.utils import ExecutionContext, require_root
10
+ from raijin_server.healthchecks import run_health_check
8
11
  from raijin_server.modules import (
9
12
  bootstrap,
10
13
  calico,
@@ -68,6 +71,196 @@ def _cert_manager_install_only(ctx: ExecutionContext) -> None:
68
71
  )
69
72
 
70
73
 
74
+ def _confirm_colored(message: str, default: bool = True) -> bool:
75
+ """Confirmação com destaque visual."""
76
+ styled = typer.style(message, fg=typer.colors.YELLOW, bold=True)
77
+ return typer.confirm(styled, default=default)
78
+
79
+
80
+ def _select_steps_interactively() -> List[str] | None:
81
+ typer.secho("Selecione passos (separados por vírgula) ou ENTER para todos:", fg=typer.colors.CYAN)
82
+ typer.echo("Exemplo: kubernetes,calico,cert_manager,traefik")
83
+ answer = typer.prompt("Passos", default="").strip()
84
+ if not answer:
85
+ return None
86
+ steps = [s.strip() for s in answer.split(",") if s.strip()]
87
+ return steps or None
88
+
89
+
90
+ def _kube_snapshot(ctx: ExecutionContext, events: int = 100, namespace: str | None = None) -> None:
91
+ """Coleta snapshot rápido de cluster para debug (best-effort)."""
92
+ cmds = []
93
+ cmds.append(["kubectl", "get", "nodes", "-o", "wide"])
94
+
95
+ pods_cmd = ["kubectl", "get", "pods"]
96
+ if namespace:
97
+ pods_cmd += ["-n", namespace]
98
+ else:
99
+ pods_cmd.append("-A")
100
+ pods_cmd += ["-o", "wide"]
101
+ cmds.append(pods_cmd)
102
+
103
+ events_cmd = ["kubectl", "get", "events"]
104
+ if namespace:
105
+ events_cmd += ["-n", namespace]
106
+ else:
107
+ events_cmd.append("-A")
108
+ events_cmd += ["--sort-by=.lastTimestamp"]
109
+ cmds.append(events_cmd)
110
+
111
+ typer.secho("\n[DEBUG] Snapshot do cluster", fg=typer.colors.CYAN)
112
+ for cmd in cmds:
113
+ try:
114
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
115
+ typer.echo(f"$ {' '.join(cmd)}")
116
+ if result.stdout:
117
+ lines = result.stdout.strip().splitlines()
118
+ if cmd is events_cmd:
119
+ lines = lines[-events:]
120
+ typer.echo("\n".join(lines))
121
+ elif result.stderr:
122
+ typer.echo(result.stderr.strip())
123
+ except Exception as exc:
124
+ typer.secho(f"(snapshot falhou: {exc})", fg=typer.colors.YELLOW)
125
+
126
+
127
+ def _run_cmd(title: str, cmd: List[str], ctx: ExecutionContext, tail: int | None = None) -> None:
128
+ """Executa comando kubectl/helm best-effort para diagnosticos rapidos."""
129
+ typer.secho(f"\n[diagnose] {title}", fg=typer.colors.CYAN)
130
+ if ctx.dry_run:
131
+ typer.echo("[dry-run] comando nao executado")
132
+ return
133
+
134
+ try:
135
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=40)
136
+ typer.echo(f"$ {' '.join(cmd)}")
137
+ output = result.stdout.strip() or result.stderr.strip()
138
+ if output:
139
+ lines = output.splitlines()
140
+ if tail:
141
+ lines = lines[-tail:]
142
+ typer.echo("\n".join(lines))
143
+ else:
144
+ typer.echo("(sem saida)")
145
+ except Exception as exc:
146
+ typer.secho(f"(falha ao executar: {exc})", fg=typer.colors.YELLOW)
147
+
148
+
149
+ def _diag_namespace(ns: str, ctx: ExecutionContext, tail_events: int = 50) -> None:
150
+ _run_cmd(f"Pods em {ns}", ["kubectl", "get", "pods", "-n", ns, "-o", "wide"], ctx)
151
+ _run_cmd(f"Services em {ns}", ["kubectl", "get", "svc", "-n", ns], ctx)
152
+ _run_cmd(f"Deployments em {ns}", ["kubectl", "get", "deploy", "-n", ns], ctx)
153
+ _run_cmd(
154
+ f"Eventos em {ns}",
155
+ ["kubectl", "get", "events", "-n", ns, "--sort-by=.lastTimestamp"],
156
+ ctx,
157
+ tail=tail_events,
158
+ )
159
+
160
+
161
+ def _diag_calico(ctx: ExecutionContext) -> None:
162
+ ns = "kube-system"
163
+ _run_cmd("Calico DaemonSets", ["kubectl", "get", "ds", "-n", ns, "-o", "wide"], ctx)
164
+ _run_cmd("Calico pods", ["kubectl", "get", "pods", "-n", ns, "-l", "k8s-app=calico-node", "-o", "wide"], ctx)
165
+ _run_cmd("Calico typha", ["kubectl", "get", "pods", "-n", ns, "-l", "k8s-app=calico-typha", "-o", "wide"], ctx)
166
+ _run_cmd("Calico events", ["kubectl", "get", "events", "-n", ns, "--sort-by=.lastTimestamp"], ctx, tail=50)
167
+
168
+
169
+ def _diag_secrets(ctx: ExecutionContext) -> None:
170
+ _diag_namespace("kube-system", ctx)
171
+ _diag_namespace("external-secrets", ctx)
172
+
173
+
174
+ def _diag_prometheus(ctx: ExecutionContext) -> None:
175
+ ns = "observability"
176
+ _run_cmd("Prometheus pods", ["kubectl", "get", "pods", "-n", ns, "-l", "app.kubernetes.io/name=prometheus"], ctx)
177
+ _diag_namespace(ns, ctx)
178
+
179
+
180
+ def _diag_grafana(ctx: ExecutionContext) -> None:
181
+ ns = "observability"
182
+ _run_cmd("Grafana svc", ["kubectl", "get", "svc", "-n", ns, "-l", "app.kubernetes.io/name=grafana"], ctx)
183
+ _diag_namespace(ns, ctx)
184
+
185
+
186
+ def _diag_loki(ctx: ExecutionContext) -> None:
187
+ ns = "observability"
188
+ _run_cmd("Loki statefulsets", ["kubectl", "get", "sts", "-n", ns, "-l", "app.kubernetes.io/name=loki"], ctx)
189
+ _diag_namespace(ns, ctx)
190
+
191
+
192
+ def _diag_traefik(ctx: ExecutionContext) -> None:
193
+ ns = "traefik"
194
+ _run_cmd("Traefik ingress", ["kubectl", "get", "ingress", "-n", ns], ctx)
195
+ _diag_namespace(ns, ctx)
196
+
197
+
198
+ def _diag_observability_ingress(ctx: ExecutionContext) -> None:
199
+ ns = "observability"
200
+ _run_cmd("Ingress objects", ["kubectl", "get", "ingress", "-n", ns], ctx)
201
+ _diag_namespace(ns, ctx)
202
+
203
+
204
+ def _diag_observability_dashboards(ctx: ExecutionContext) -> None:
205
+ ns = "observability"
206
+ _run_cmd("ConfigMaps dashboards", ["kubectl", "get", "configmap", "-n", ns, "-l", "raijin/dashboards=true"], ctx)
207
+ _diag_namespace(ns, ctx)
208
+
209
+
210
+ def _diag_minio(ctx: ExecutionContext) -> None:
211
+ ns = "minio"
212
+ _diag_namespace(ns, ctx)
213
+
214
+
215
+ def _diag_kafka(ctx: ExecutionContext) -> None:
216
+ ns = "kafka"
217
+ _run_cmd("Kafka pods", ["kubectl", "get", "pods", "-n", ns, "-o", "wide"], ctx)
218
+ _diag_namespace(ns, ctx)
219
+
220
+
221
+ def _diag_velero(ctx: ExecutionContext) -> None:
222
+ ns = "velero"
223
+ _diag_namespace(ns, ctx)
224
+
225
+
226
+ def _diag_kong(ctx: ExecutionContext) -> None:
227
+ ns = "kong"
228
+ _diag_namespace(ns, ctx)
229
+
230
+
231
+ DIAG_HANDLERS = {
232
+ "cert_manager": cert_manager.diagnose,
233
+ "calico": _diag_calico,
234
+ "secrets": _diag_secrets,
235
+ "prometheus": _diag_prometheus,
236
+ "grafana": _diag_grafana,
237
+ "loki": _diag_loki,
238
+ "traefik": _diag_traefik,
239
+ "observability_ingress": _diag_observability_ingress,
240
+ "observability_dashboards": _diag_observability_dashboards,
241
+ "minio": _diag_minio,
242
+ "kafka": _diag_kafka,
243
+ "velero": _diag_velero,
244
+ "kong": _diag_kong,
245
+ }
246
+
247
+
248
+ def _maybe_diagnose(name: str, ctx: ExecutionContext) -> None:
249
+ try:
250
+ if name in DIAG_HANDLERS:
251
+ DIAG_HANDLERS[name](ctx)
252
+ return
253
+
254
+ # fallback: health check se existir
255
+ ok = run_health_check(name, ctx)
256
+ if ok:
257
+ typer.secho(f"[diagnose] {name}: OK", fg=typer.colors.GREEN)
258
+ else:
259
+ typer.secho(f"[diagnose] {name}: falhou", fg=typer.colors.YELLOW)
260
+ except Exception as exc:
261
+ typer.secho(f"[diagnose] {name} falhou: {exc}", fg=typer.colors.YELLOW)
262
+
263
+
71
264
  # Ordem de execucao dos modulos para instalacao completa
72
265
  # Modulos marcados com skip_env podem ser pulados via variavel de ambiente
73
266
  INSTALL_SEQUENCE = [
@@ -108,12 +301,25 @@ def run(ctx: ExecutionContext) -> None:
108
301
  fg=typer.colors.CYAN,
109
302
  )
110
303
 
304
+ steps_override = ctx.selected_steps
305
+ if steps_override is None and ctx.interactive_steps:
306
+ steps_override = _select_steps_interactively()
307
+
308
+ # Debug/diagnose menu simples
309
+ if not ctx.debug_snapshots and not ctx.post_diagnose:
310
+ typer.secho("Ativar modo debug (snapshots + diagnose pos-modulo)?", fg=typer.colors.YELLOW)
311
+ if typer.confirm("Habilitar debug?", default=False):
312
+ ctx.debug_snapshots = True
313
+ ctx.post_diagnose = True
314
+
111
315
  # Mostra sequencia de instalacao
112
316
  typer.echo("Sequencia de instalacao:")
113
317
  for i, (name, _, desc, skip_env) in enumerate(INSTALL_SEQUENCE, 1):
114
318
  suffix = ""
115
319
  if skip_env and os.environ.get(skip_env, "").strip() in ("1", "true", "yes"):
116
320
  suffix = " [SKIP]"
321
+ if steps_override and name not in steps_override:
322
+ suffix = " [IGNORADO]"
117
323
  typer.echo(f" {i:2}. {name:25} - {desc}{suffix}")
118
324
 
119
325
  typer.echo("")
@@ -126,7 +332,7 @@ def run(ctx: ExecutionContext) -> None:
126
332
  typer.echo("")
127
333
 
128
334
  if not ctx.dry_run:
129
- if not typer.confirm("Deseja continuar com a instalacao completa?", default=True):
335
+ if not _confirm_colored("Deseja continuar com a instalacao completa?", default=True):
130
336
  typer.echo("Instalacao cancelada.")
131
337
  raise typer.Exit(code=0)
132
338
 
@@ -135,13 +341,25 @@ def run(ctx: ExecutionContext) -> None:
135
341
  succeeded = []
136
342
  skipped = []
137
343
 
344
+ cluster_ready = False
345
+
138
346
  for i, (name, handler, desc, skip_env) in enumerate(INSTALL_SEQUENCE, 1):
347
+ if steps_override and name not in steps_override:
348
+ skipped.append(name)
349
+ typer.secho(f"⏭ {name} ignorado (fora da lista selecionada)", fg=typer.colors.YELLOW)
350
+ continue
351
+
139
352
  # Verifica se modulo deve ser pulado via env
140
353
  if skip_env and os.environ.get(skip_env, "").strip() in ("1", "true", "yes"):
141
354
  skipped.append(name)
142
355
  typer.secho(f"⏭ {name} pulado via {skip_env}=1", fg=typer.colors.YELLOW)
143
356
  continue
144
357
 
358
+ if ctx.confirm_each_step:
359
+ if not _confirm_colored(f"Executar modulo '{name}' agora?", default=True):
360
+ skipped.append(name)
361
+ continue
362
+
145
363
  typer.secho(
146
364
  f"\n{'='*60}",
147
365
  fg=typer.colors.CYAN,
@@ -160,6 +378,15 @@ def run(ctx: ExecutionContext) -> None:
160
378
  handler(ctx)
161
379
  succeeded.append(name)
162
380
  typer.secho(f"✓ {name} concluido com sucesso", fg=typer.colors.GREEN)
381
+
382
+ if name == "kubernetes":
383
+ cluster_ready = True
384
+
385
+ if ctx.post_diagnose and cluster_ready:
386
+ _maybe_diagnose(name, ctx)
387
+
388
+ if ctx.debug_snapshots and cluster_ready:
389
+ _kube_snapshot(ctx, events=80)
163
390
  except KeyboardInterrupt:
164
391
  typer.secho(f"\n⚠ Instalacao interrompida pelo usuario no modulo '{name}'", fg=typer.colors.YELLOW)
165
392
  raise typer.Exit(code=130)
@@ -146,6 +146,11 @@ def run(ctx: ExecutionContext) -> None:
146
146
  enable_service("containerd", ctx)
147
147
  enable_service("kubelet", ctx)
148
148
 
149
+ # Garante swap off antes de prosseguir (requisito kubeadm)
150
+ typer.echo("Desabilitando swap (requisito Kubernetes)...")
151
+ run_cmd(["swapoff", "-a"], ctx, check=False)
152
+ run_cmd("sed -i '/swap/d' /etc/fstab", ctx, use_shell=True, check=False)
153
+
149
154
  # kubeadm exige ip_forward=1; sobrepoe ajuste de hardening para fase de cluster.
150
155
  # Desabilita IPv6 completamente para evitar erros de preflight e simplificar rede
151
156
  sysctl_k8s = """# Kubernetes network settings
@@ -164,7 +169,19 @@ net.ipv6.conf.lo.disable_ipv6=1
164
169
  pod_cidr = typer.prompt("Pod CIDR", default="10.244.0.0/16")
165
170
  service_cidr = typer.prompt("Service CIDR", default="10.96.0.0/12")
166
171
  cluster_name = typer.prompt("Nome do cluster", default="raijin")
167
- advertise_address = typer.prompt("API advertise address", default="0.0.0.0")
172
+ default_adv = "192.168.1.81"
173
+ advertise_address = typer.prompt("API advertise address", default=default_adv)
174
+ if advertise_address != default_adv:
175
+ typer.secho(
176
+ f"⚠ Para ambiente atual use {default_adv} (IP LAN, evita NAT).", fg=typer.colors.YELLOW
177
+ )
178
+ if not typer.confirm(f"Deseja forcar {default_adv}?", default=True):
179
+ typer.secho(
180
+ f"Usando valor informado: {advertise_address}. Certifique-se que todos os nos alcancem esse IP.",
181
+ fg=typer.colors.YELLOW,
182
+ )
183
+ else:
184
+ advertise_address = default_adv
168
185
 
169
186
  kubeadm_config = f"""apiVersion: kubeadm.k8s.io/v1beta3
170
187
  kind: ClusterConfiguration
@@ -124,9 +124,9 @@ def run(ctx: ExecutionContext) -> None:
124
124
  )
125
125
 
126
126
  iface = typer.prompt("Interface", default="ens18")
127
- address = typer.prompt("Endereco CIDR", default="192.168.0.10/24")
128
- gateway = typer.prompt("Gateway", default="192.168.0.1")
129
- dns = typer.prompt("DNS (separe por virgula)", default="1.1.1.1,8.8.8.8")
127
+ address = typer.prompt("Endereco CIDR", default="192.168.1.81/24")
128
+ gateway = typer.prompt("Gateway", default="192.168.1.254")
129
+ dns = typer.prompt("DNS (separe por virgula)", default="177.128.80.44,177.128.80.45")
130
130
 
131
131
  dns_list = ",".join([item.strip() for item in dns.split(",") if item.strip()])
132
132
  netplan_content = f"""network:
@@ -1,30 +1,115 @@
1
- """Configuracao do Prometheus Stack via Helm."""
1
+ """Configuracao do Prometheus Stack via Helm (robust, production-ready)."""
2
+
3
+ from __future__ import annotations
2
4
 
3
5
  import typer
4
6
 
5
- from raijin_server.utils import ExecutionContext, helm_upgrade_install, require_root
7
+ from raijin_server.utils import (
8
+ ExecutionContext,
9
+ helm_upgrade_install,
10
+ kubectl_create_ns,
11
+ require_root,
12
+ run_cmd,
13
+ )
14
+
15
+ DEFAULT_NAMESPACE = "observability"
16
+
17
+
18
+ def _get_default_storage_class(ctx: ExecutionContext) -> str:
19
+ if ctx.dry_run:
20
+ return ""
21
+ result = run_cmd(
22
+ [
23
+ "kubectl",
24
+ "get",
25
+ "storageclass",
26
+ "-o",
27
+ "jsonpath={.items[?(@.metadata.annotations['storageclass.kubernetes.io/is-default-class']=='true')].metadata.name}",
28
+ ],
29
+ ctx,
30
+ check=False,
31
+ )
32
+ return (result.stdout or "").strip()
33
+
34
+
35
+ def _ensure_cluster_access(ctx: ExecutionContext) -> None:
36
+ if ctx.dry_run:
37
+ return
38
+ result = run_cmd(["kubectl", "cluster-info"], ctx, check=False)
39
+ if result.returncode != 0:
40
+ typer.secho("Cluster Kubernetes nao acessivel. Verifique kubeconfig/controle-plane.", fg=typer.colors.RED)
41
+ raise typer.Exit(code=1)
6
42
 
7
43
 
8
44
  def run(ctx: ExecutionContext) -> None:
9
45
  require_root(ctx)
46
+ _ensure_cluster_access(ctx)
47
+
10
48
  typer.echo("Instalando kube-prometheus-stack via Helm...")
11
49
 
50
+ namespace = typer.prompt("Namespace destino", default=DEFAULT_NAMESPACE)
51
+ kubectl_create_ns(namespace, ctx)
52
+
53
+ default_sc = _get_default_storage_class(ctx)
54
+ enable_persistence = typer.confirm(
55
+ "Habilitar PVC para Prometheus e Alertmanager?", default=bool(default_sc)
56
+ )
57
+
12
58
  values = [
13
59
  "grafana.enabled=false",
14
60
  "prometheus.prometheusSpec.retention=15d",
15
61
  "prometheus.prometheusSpec.enableAdminAPI=true",
16
62
  "prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false",
17
- "prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage=20Gi",
18
- "alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.resources.requests.storage=10Gi",
63
+ "prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false",
19
64
  "defaultRules.create=true",
20
65
  ]
21
66
 
67
+ extra_args = ["--wait", "--timeout", "5m", "--atomic"]
68
+
69
+ chart_version = typer.prompt(
70
+ "Versao do chart (vazio para latest)",
71
+ default="",
72
+ ).strip()
73
+ if chart_version:
74
+ extra_args.extend(["--version", chart_version])
75
+
76
+ if enable_persistence:
77
+ storage_class = typer.prompt(
78
+ "StorageClass para PVC",
79
+ default=default_sc or "",
80
+ ).strip()
81
+ prom_size = typer.prompt("Tamanho PVC Prometheus", default="20Gi")
82
+ alert_size = typer.prompt("Tamanho PVC Alertmanager", default="10Gi")
83
+
84
+ if storage_class:
85
+ values.extend(
86
+ [
87
+ f"prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.storageClassName={storage_class}",
88
+ f"alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.storageClassName={storage_class}",
89
+ ]
90
+ )
91
+
92
+ values.extend(
93
+ [
94
+ f"prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage={prom_size}",
95
+ f"alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.resources.requests.storage={alert_size}",
96
+ ]
97
+ )
98
+ else:
99
+ typer.secho(
100
+ "PVC desativado: Prometheus/Alertmanager usarao volumes efemeros (sem retenção apos restart).",
101
+ fg=typer.colors.YELLOW,
102
+ )
103
+
22
104
  helm_upgrade_install(
23
105
  release="kube-prometheus-stack",
24
106
  chart="kube-prometheus-stack",
25
- namespace="observability",
107
+ namespace=namespace,
26
108
  repo="prometheus-community",
27
109
  repo_url="https://prometheus-community.github.io/helm-charts",
28
110
  ctx=ctx,
29
111
  values=values,
112
+ extra_args=extra_args,
30
113
  )
114
+
115
+ typer.secho("kube-prometheus-stack instalado com sucesso.", fg=typer.colors.GREEN)
@@ -7,7 +7,14 @@ from pathlib import Path
7
7
 
8
8
  import typer
9
9
 
10
- from raijin_server.utils import ExecutionContext, require_root, run_cmd
10
+ from raijin_server.utils import ExecutionContext, require_root, run_cmd, write_file
11
+
12
+ # Defaults alinhados com configuracao de rede solicitada
13
+ NETPLAN_IFACE = "ens18"
14
+ NETPLAN_ADDRESS = "192.168.1.81/24"
15
+ NETPLAN_GATEWAY = "192.168.1.254"
16
+ NETPLAN_DNS = "177.128.80.44,177.128.80.45"
17
+ NETPLAN_PATH = Path("/etc/netplan/01-raijin-static.yaml")
11
18
 
12
19
  SYSTEMD_SERVICES = [
13
20
  "kubelet",
@@ -48,6 +55,44 @@ APT_MARKERS = [
48
55
  ]
49
56
 
50
57
 
58
+ def _ensure_netplan(ctx: ExecutionContext) -> None:
59
+ """Garante que o netplan esteja com IP fixo esperado; se ja estiver, mostra OK."""
60
+
61
+ desired = f"""network:
62
+ version: 2
63
+ renderer: networkd
64
+ ethernets:
65
+ {NETPLAN_IFACE}:
66
+ dhcp4: false
67
+ addresses: [{NETPLAN_ADDRESS}]
68
+ gateway4: {NETPLAN_GATEWAY}
69
+ nameservers:
70
+ addresses: [{NETPLAN_DNS}]
71
+ """
72
+
73
+ existing = None
74
+ if NETPLAN_PATH.exists():
75
+ try:
76
+ existing = NETPLAN_PATH.read_text()
77
+ except Exception:
78
+ existing = None
79
+
80
+ if existing and all(x in existing for x in (NETPLAN_ADDRESS, NETPLAN_GATEWAY, NETPLAN_DNS)):
81
+ typer.secho(
82
+ f"\n✓ Netplan ja configurado com {NETPLAN_ADDRESS} / gw {NETPLAN_GATEWAY} / dns {NETPLAN_DNS}",
83
+ fg=typer.colors.GREEN,
84
+ )
85
+ return
86
+
87
+ typer.echo("Aplicando netplan padrao antes da limpeza...")
88
+ write_file(NETPLAN_PATH, desired, ctx)
89
+ run_cmd(["netplan", "apply"], ctx, check=False)
90
+ typer.secho(
91
+ f"✓ Netplan ajustado para {NETPLAN_ADDRESS} (gw {NETPLAN_GATEWAY}, dns {NETPLAN_DNS})",
92
+ fg=typer.colors.GREEN,
93
+ )
94
+
95
+
51
96
  def _stop_services(ctx: ExecutionContext) -> None:
52
97
  typer.echo("Parando serviços relacionados (kubelet, containerd)...")
53
98
  for service in SYSTEMD_SERVICES:
@@ -131,6 +176,9 @@ def run(ctx: ExecutionContext) -> None:
131
176
  typer.echo("Sanitizacao cancelada pelo usuario.")
132
177
  return
133
178
 
179
+ # Primeiro passo: garantir netplan consistente, sem quebrar ao limpar
180
+ _ensure_netplan(ctx)
181
+
134
182
  _stop_services(ctx)
135
183
  _kubeadm_reset(ctx)
136
184
  _flush_iptables(ctx)
@@ -38,7 +38,7 @@ echo "Escolha o tipo de instalação:"
38
38
  echo " 1) Global (requer sudo, todos os usuários)"
39
39
  echo " 2) Virtual env (recomendado para desenvolvimento)"
40
40
  echo " 3) User install (apenas usuário atual)"
41
- read -p "Opção [2]: " INSTALL_TYPE
41
+ read -r -p "Opção [2]: " INSTALL_TYPE
42
42
  INSTALL_TYPE=${INSTALL_TYPE:-2}
43
43
 
44
44
  echo ""
@@ -51,6 +51,7 @@ case $INSTALL_TYPE in
51
51
  2)
52
52
  echo -e "${YELLOW}Criando virtual environment...${NC}"
53
53
  python3 -m venv .venv
54
+ # shellcheck disable=SC1091
54
55
  source .venv/bin/activate
55
56
  pip install --upgrade pip
56
57
  pip install -e .
@@ -73,7 +74,7 @@ EOF
73
74
 
74
75
  # Adicionar ao PATH se necessário
75
76
  if [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
76
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc
77
+ echo "export PATH=\"$HOME/.local/bin:$PATH\"" >> ~/.bashrc
77
78
  echo -e "${YELLOW}⚠${NC} Adicionado $HOME/.local/bin ao PATH"
78
79
  echo "Execute: source ~/.bashrc"
79
80
  fi
@@ -11,21 +11,25 @@ OUTPUT=${RAIJIN_METRIC_FILE:-/var/lib/node_exporter/textfile_collector/raijin_lo
11
11
  # Calcula soma de todos os logs (principal + rotações)
12
12
  TOTAL_BYTES=0
13
13
  shopt -s nullglob
14
+
15
+ METRICS_TMP=$(mktemp)
16
+ trap 'rm -f "$METRICS_TMP"' EXIT
17
+
14
18
  for f in "$LOG_DIR"/$LOG_PATTERN; do
15
19
  size=$(stat -c%s "$f" 2>/dev/null || echo 0)
16
20
  TOTAL_BYTES=$((TOTAL_BYTES + size))
17
21
  if [[ "$f" =~ raijin-server\.log(\.\d+)?$ ]]; then
18
- printf "raijin_log_size_bytes{file=\"%s\"} %d\n" "$(basename "$f")" "$size"
22
+ printf "raijin_log_size_bytes{file=\"%s\"} %d\n" "$(basename "$f")" "$size" >> "$METRICS_TMP"
19
23
  fi
20
- done | {
21
- # Escreve métricas no arquivo final
22
- mkdir -p "$(dirname "$OUTPUT")"
23
- {
24
- echo "# HELP raijin_log_size_bytes Tamanho dos logs do raijin-server (bytes)"
25
- echo "# TYPE raijin_log_size_bytes gauge"
26
- cat
27
- echo "# HELP raijin_log_size_total_bytes Soma dos logs do raijin-server (bytes)"
28
- echo "# TYPE raijin_log_size_total_bytes gauge"
29
- echo "raijin_log_size_total_bytes ${TOTAL_BYTES}"
30
- } > "$OUTPUT"
31
- }
24
+ done
25
+
26
+ # Escreve métricas no arquivo final
27
+ mkdir -p "$(dirname "$OUTPUT")"
28
+ {
29
+ echo "# HELP raijin_log_size_bytes Tamanho dos logs do raijin-server (bytes)"
30
+ echo "# TYPE raijin_log_size_bytes gauge"
31
+ cat "$METRICS_TMP"
32
+ echo "# HELP raijin_log_size_total_bytes Soma dos logs do raijin-server (bytes)"
33
+ echo "# TYPE raijin_log_size_total_bytes gauge"
34
+ echo "raijin_log_size_total_bytes ${TOTAL_BYTES}"
35
+ } > "$OUTPUT"
@@ -49,6 +49,7 @@ fi
49
49
  echo ""
50
50
  echo "2. Verificando Sistema Operacional..."
51
51
  if [ -f /etc/os-release ]; then
52
+ # shellcheck disable=SC1091
52
53
  . /etc/os-release
53
54
  if [[ "$ID" == "ubuntu" ]]; then
54
55
  VERSION_NUM=$(echo "$VERSION_ID" | cut -d. -f1)
@@ -152,7 +153,7 @@ STATE_DIRS=("/var/lib/raijin-server/state" "$HOME/.local/share/raijin-server/sta
152
153
  FOUND_STATE=0
153
154
  for dir in "${STATE_DIRS[@]}"; do
154
155
  if [[ -d "$dir" ]]; then
155
- MODULE_COUNT=$(ls -1 "$dir"/*.done 2>/dev/null | wc -l)
156
+ MODULE_COUNT=$(find "$dir" -maxdepth 1 -name '*.done' -type f 2>/dev/null | wc -l)
156
157
  if [[ $MODULE_COUNT -gt 0 ]]; then
157
158
  check_pass "$MODULE_COUNT modulos concluidos (em $dir)"
158
159
  FOUND_STATE=1
raijin_server/utils.py CHANGED
@@ -29,9 +29,38 @@ BACKUP_COUNT = int(os.environ.get("RAIJIN_LOG_BACKUP_COUNT", 5))
29
29
  logger = logging.getLogger("raijin-server")
30
30
  logger.setLevel(logging.INFO)
31
31
 
32
- file_handler = RotatingFileHandler(LOG_FILE, maxBytes=MAX_LOG_BYTES, backupCount=BACKUP_COUNT)
32
+
33
+ def _build_file_handler() -> RotatingFileHandler:
34
+ """Cria handler com fallback para $HOME quando /var/log exige root."""
35
+ try:
36
+ return RotatingFileHandler(LOG_FILE, maxBytes=MAX_LOG_BYTES, backupCount=BACKUP_COUNT)
37
+ except PermissionError:
38
+ fallback = Path.home() / ".raijin-server.log"
39
+ fallback.parent.mkdir(parents=True, exist_ok=True)
40
+ return RotatingFileHandler(fallback, maxBytes=MAX_LOG_BYTES, backupCount=BACKUP_COUNT)
41
+
42
+
43
+ file_handler = _build_file_handler()
33
44
  stream_handler = logging.StreamHandler()
34
45
 
46
+
47
+ def active_log_file() -> Path:
48
+ return Path(getattr(file_handler, "baseFilename", LOG_FILE))
49
+
50
+
51
+ def available_log_files() -> list[Path]:
52
+ base = active_log_file()
53
+ pattern = base.name + "*"
54
+ return [p for p in sorted(base.parent.glob(pattern)) if p.is_file()]
55
+
56
+
57
+ def page_text(content: str) -> None:
58
+ pager = shutil.which("less")
59
+ if pager:
60
+ subprocess.run([pager, "-R"], input=content, text=True, check=False)
61
+ else:
62
+ typer.echo(content)
63
+
35
64
  formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
36
65
  file_handler.setFormatter(formatter)
37
66
  stream_handler.setFormatter(formatter)
@@ -57,6 +86,13 @@ class ExecutionContext:
57
86
  timeout: int = 600 # 10 min for slow connections
58
87
  errors: list = field(default_factory=list)
59
88
  warnings: list = field(default_factory=list)
89
+ # Controle interativo/diagnostico
90
+ selected_steps: list[str] | None = None
91
+ confirm_each_step: bool = False
92
+ debug_snapshots: bool = False
93
+ post_diagnose: bool = False
94
+ color_prompts: bool = True
95
+ interactive_steps: bool = False
60
96
 
61
97
 
62
98
  def resolve_script_path(script_name: str) -> Path: