raijin-server 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
raijin_server/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """Pacote principal do CLI Raijin Server."""
2
2
 
3
- __version__ = "0.2.4"
3
+ __version__ = "0.2.7"
4
4
 
5
5
  __all__ = ["__version__"]
raijin_server/cli.py CHANGED
@@ -6,6 +6,8 @@ import os
6
6
  from pathlib import Path
7
7
  from typing import Callable, Dict, Optional
8
8
 
9
+ import subprocess
10
+
9
11
  import typer
10
12
  from rich import box
11
13
  from rich.console import Console
@@ -42,7 +44,7 @@ from raijin_server.modules import (
42
44
  velero,
43
45
  vpn,
44
46
  )
45
- from raijin_server.utils import ExecutionContext, logger
47
+ from raijin_server.utils import ExecutionContext, logger, active_log_file, available_log_files, page_text, ensure_tool
46
48
  from raijin_server.validators import validate_system_requirements, check_module_dependencies
47
49
  from raijin_server.healthchecks import run_health_check
48
50
  from raijin_server.config import ConfigManager
@@ -131,6 +133,19 @@ MODULE_DESCRIPTIONS: Dict[str, str] = {
131
133
  }
132
134
 
133
135
 
136
+ def _capture_cmd(cmd: list[str], timeout: int = 30) -> str:
137
+ try:
138
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
139
+ if result.returncode == 0:
140
+ return result.stdout.strip() or "(sem saida)"
141
+ return (
142
+ f"✗ {' '.join(cmd)}\n"
143
+ f"{(result.stdout or '').strip()}\n{(result.stderr or '').strip()}".strip()
144
+ )
145
+ except Exception as exc:
146
+ return f"✗ {' '.join(cmd)} -> {exc}"
147
+
148
+
134
149
  def _run_module(ctx: typer.Context, name: str, skip_validation: bool = False) -> None:
135
150
  handler = MODULES.get(name)
136
151
  if handler is None:
@@ -542,6 +557,120 @@ def cert_list_issuers(ctx: typer.Context) -> None:
542
557
  pass
543
558
 
544
559
 
560
+ # ============================================================================
561
+ # Ferramentas de Depuração / Logs
562
+ # ============================================================================
563
+ debug_app = typer.Typer(help="Ferramentas de depuracao e investigacao de logs")
564
+ app.add_typer(debug_app, name="debug")
565
+
566
+
567
+ @debug_app.command(name="logs")
568
+ def debug_logs(
569
+ lines: int = typer.Option(200, "--lines", "-n", help="Quantidade de linhas ao ler"),
570
+ follow: bool = typer.Option(False, "--follow", "-f", help="Segue o log com tail -F"),
571
+ pager: bool = typer.Option(True, "--pager/--no-pager", help="Exibe com less"),
572
+ ) -> None:
573
+ """Mostra logs do raijin-server com opcao de follow."""
574
+
575
+ logs = available_log_files()
576
+ if not logs:
577
+ typer.secho("Nenhum log encontrado", fg=typer.colors.YELLOW)
578
+ return
579
+
580
+ main_log = active_log_file()
581
+ typer.echo(f"Log ativo: {main_log}")
582
+
583
+ if follow:
584
+ subprocess.run(["tail", "-n", str(lines), "-F", str(main_log)])
585
+ return
586
+
587
+ chunks = []
588
+ for path in logs:
589
+ try:
590
+ data = path.read_text()
591
+ except Exception as exc:
592
+ data = f"[erro ao ler {path}: {exc}]"
593
+ chunks.append(f"===== {path} =====\n{data}")
594
+
595
+ output = "\n\n".join(chunks)
596
+ if pager:
597
+ page_text(output)
598
+ else:
599
+ typer.echo(output)
600
+
601
+
602
+ @debug_app.command(name="kube")
603
+ def debug_kube(
604
+ ctx: typer.Context,
605
+ events: int = typer.Option(200, "--events", "-e", help="Quantas linhas finais de eventos exibir"),
606
+ namespace: Optional[str] = typer.Option(None, "--namespace", "-n", help="Filtra pods/eventos por namespace"),
607
+ pager: bool = typer.Option(True, "--pager/--no-pager", help="Exibe com less"),
608
+ ) -> None:
609
+ """Snapshot rapido de nodes, pods e eventos do cluster."""
610
+
611
+ exec_ctx = ctx.obj or ExecutionContext()
612
+ ensure_tool("kubectl", exec_ctx)
613
+
614
+ sections = []
615
+ sections.append(("kubectl get nodes -o wide", _capture_cmd(["kubectl", "get", "nodes", "-o", "wide"])))
616
+
617
+ pods_cmd: list[str] = ["kubectl", "get", "pods"]
618
+ if namespace:
619
+ pods_cmd.extend(["-n", namespace])
620
+ else:
621
+ pods_cmd.append("-A")
622
+ pods_cmd.extend(["-o", "wide"])
623
+ sections.append(("kubectl get pods", _capture_cmd(pods_cmd)))
624
+
625
+ events_cmd: list[str] = ["kubectl", "get", "events"]
626
+ if namespace:
627
+ events_cmd.extend(["-n", namespace])
628
+ else:
629
+ events_cmd.append("-A")
630
+ events_cmd.extend(["--sort-by=.lastTimestamp"])
631
+ events_output = _capture_cmd(events_cmd)
632
+ if events_output and events > 0:
633
+ events_output = "\n".join(events_output.splitlines()[-events:])
634
+ sections.append(("kubectl get events", events_output))
635
+
636
+ combined = "\n\n".join([f"[{title}]\n{body}" for title, body in sections])
637
+ if pager:
638
+ page_text(combined)
639
+ else:
640
+ typer.echo(combined)
641
+
642
+
643
+ @debug_app.command(name="journal")
644
+ def debug_journal(
645
+ ctx: typer.Context,
646
+ service: str = typer.Option("kubelet", "--service", "-s", help="Unidade systemd para inspecionar"),
647
+ lines: int = typer.Option(200, "--lines", "-n", help="Linhas a exibir"),
648
+ follow: bool = typer.Option(False, "--follow", "-f", help="Segue o journal em tempo real"),
649
+ pager: bool = typer.Option(True, "--pager/--no-pager", help="Exibe com less"),
650
+ ) -> None:
651
+ """Mostra logs de services (ex.: kubelet) via journalctl."""
652
+
653
+ exec_ctx = ctx.obj or ExecutionContext()
654
+ ensure_tool("journalctl", exec_ctx)
655
+
656
+ cmd = ["journalctl", "-u", service, "-n", str(lines)]
657
+ if follow:
658
+ cmd.append("-f")
659
+ subprocess.run(cmd)
660
+ return
661
+
662
+ cmd.append("--no-pager")
663
+ output = _capture_cmd(cmd, timeout=60)
664
+ if lines > 0:
665
+ output = "\n".join(output.splitlines()[-lines:])
666
+
667
+ text = f"[journalctl -u {service} -n {lines}]\n{output}"
668
+ if pager:
669
+ page_text(text)
670
+ else:
671
+ typer.echo(text)
672
+
673
+
545
674
  # ============================================================================
546
675
  # Comandos Existentes
547
676
  # ============================================================================
@@ -554,8 +683,24 @@ def bootstrap_cmd(ctx: typer.Context) -> None:
554
683
 
555
684
 
556
685
  @app.command(name="full-install")
557
- def full_install_cmd(ctx: typer.Context) -> None:
686
+ def full_install_cmd(
687
+ ctx: typer.Context,
688
+ steps: Optional[str] = typer.Option(None, "--steps", help="Lista de modulos, separado por virgula"),
689
+ confirm_each: bool = typer.Option(False, "--confirm-each", help="Pedir confirmacao antes de cada modulo"),
690
+ debug_mode: bool = typer.Option(False, "--debug-mode", help="Habilita snapshots e diagnose pos-modulo"),
691
+ snapshots: bool = typer.Option(False, "--snapshots", help="Habilita snapshots de cluster apos cada modulo"),
692
+ post_diagnose: bool = typer.Option(False, "--post-diagnose", help="Executa diagnose pos-modulo quando disponivel"),
693
+ select_steps: bool = typer.Option(False, "--select-steps", help="Pergunta quais modulos executar antes de iniciar"),
694
+ ) -> None:
558
695
  """Executa instalacao completa e automatizada do ambiente de producao."""
696
+ exec_ctx = ctx.obj or ExecutionContext()
697
+ if steps:
698
+ exec_ctx.selected_steps = [s.strip() for s in steps.split(",") if s.strip()]
699
+ exec_ctx.interactive_steps = select_steps
700
+ exec_ctx.confirm_each_step = confirm_each
701
+ exec_ctx.debug_snapshots = debug_mode or snapshots or exec_ctx.debug_snapshots
702
+ exec_ctx.post_diagnose = debug_mode or post_diagnose or exec_ctx.post_diagnose
703
+ ctx.obj = exec_ctx
559
704
  _run_module(ctx, "full_install")
560
705
 
561
706
 
@@ -1,7 +1,8 @@
1
1
  """Configuracao de Calico como CNI com CIDR customizado e policies opinativas."""
2
2
 
3
+ import json
3
4
  from pathlib import Path
4
- from typing import Iterable
5
+ from typing import Iterable, List
5
6
 
6
7
  import typer
7
8
 
@@ -16,6 +17,7 @@ from raijin_server.utils import (
16
17
 
17
18
  EGRESS_LABEL_KEY = "networking.raijin.dev/egress"
18
19
  EGRESS_LABEL_VALUE = "internet"
20
+ DEFAULT_WORKLOAD_NAMESPACE = "apps"
19
21
 
20
22
 
21
23
  def _apply_policy(content: str, ctx: ExecutionContext, suffix: str) -> None:
@@ -25,6 +27,23 @@ def _apply_policy(content: str, ctx: ExecutionContext, suffix: str) -> None:
25
27
  path.unlink(missing_ok=True)
26
28
 
27
29
 
30
+ def _ensure_namespace(namespace: str, ctx: ExecutionContext) -> None:
31
+ """Garante que um namespace de workloads exista com labels padrao."""
32
+ manifest = f"""apiVersion: v1
33
+ kind: Namespace
34
+ metadata:
35
+ name: {namespace}
36
+ labels:
37
+ raijin/workload-profile: production
38
+ networking.raijin.dev/default-egress: restricted
39
+ """
40
+
41
+ path = Path(f"/tmp/raijin-ns-{namespace}.yaml")
42
+ write_file(path, manifest, ctx)
43
+ kubectl_apply(str(path), ctx)
44
+ path.unlink(missing_ok=True)
45
+
46
+
28
47
  def _build_default_deny(namespace: str) -> str:
29
48
  return f"""apiVersion: networking.k8s.io/v1
30
49
  kind: NetworkPolicy
@@ -62,6 +81,43 @@ def _split_namespaces(raw_value: str) -> Iterable[str]:
62
81
  return [ns.strip() for ns in raw_value.split(",") if ns.strip()]
63
82
 
64
83
 
84
+ def _list_workloads_without_egress(namespaces: List[str], ctx: ExecutionContext) -> None:
85
+ """Lista workloads sem label de egress e apenas avisa se falhar."""
86
+ if ctx.dry_run:
87
+ typer.echo("[dry-run] Skip listagem de workloads para liberação de egress")
88
+ return
89
+
90
+ typer.secho("\nWorkloads sem liberação de egress (adicione label para liberar internet):", fg=typer.colors.CYAN)
91
+ for ns in namespaces:
92
+ result = run_cmd(
93
+ ["kubectl", "get", "deploy,statefulset,daemonset", "-n", ns, "-o", "json"],
94
+ ctx,
95
+ check=False,
96
+ )
97
+ if result.returncode != 0:
98
+ msg = (result.stderr or result.stdout or "erro desconhecido").strip()
99
+ typer.secho(f" Aviso: nao foi possivel listar workloads em '{ns}' ({msg})", fg=typer.colors.YELLOW)
100
+ continue
101
+
102
+ try:
103
+ data = json.loads(result.stdout or "{}")
104
+ items = data.get("items", [])
105
+ pending = []
106
+ for item in items:
107
+ meta = item.get("metadata", {})
108
+ labels = meta.get("labels", {}) or {}
109
+ if labels.get(EGRESS_LABEL_KEY) != EGRESS_LABEL_VALUE:
110
+ pending.append(f"{meta.get('namespace', ns)}/{meta.get('name', 'desconhecido')}")
111
+
112
+ if pending:
113
+ for name in pending:
114
+ typer.echo(f" - {name}")
115
+ else:
116
+ typer.echo(f" Nenhum workload pendente em '{ns}'")
117
+ except Exception as exc:
118
+ typer.secho(f" Aviso: falha ao processar workloads em '{ns}': {exc}", fg=typer.colors.YELLOW)
119
+
120
+
65
121
  def _check_cluster_available(ctx: ExecutionContext) -> bool:
66
122
  """Verifica se o cluster Kubernetes esta acessivel."""
67
123
  if ctx.dry_run:
@@ -99,13 +155,19 @@ def run(ctx: ExecutionContext) -> None:
99
155
  typer.echo("Aplicando Calico como CNI...")
100
156
  pod_cidr = typer.prompt("Pod CIDR (Calico)", default="10.244.0.0/16")
101
157
 
158
+ typer.secho(
159
+ f"Criando namespace padrao de workloads '{DEFAULT_WORKLOAD_NAMESPACE}' (production-ready)...",
160
+ fg=typer.colors.CYAN,
161
+ )
162
+ _ensure_namespace(DEFAULT_WORKLOAD_NAMESPACE, ctx)
163
+
102
164
  manifest_url = "https://raw.githubusercontent.com/projectcalico/calico/v3.27.2/manifests/calico.yaml"
103
165
  cmd = f"curl -s {manifest_url} | sed 's#192.168.0.0/16#{pod_cidr}#' | kubectl apply -f -"
104
166
  run_cmd(cmd, ctx, use_shell=True)
105
167
 
106
168
  deny_namespaces_raw = typer.prompt(
107
169
  "Namespaces para aplicar default-deny (CSV)",
108
- default="default",
170
+ default=DEFAULT_WORKLOAD_NAMESPACE,
109
171
  )
110
172
  for namespace in _split_namespaces(deny_namespaces_raw):
111
173
  typer.echo(f"Aplicando default-deny no namespace '{namespace}'...")
@@ -117,9 +179,12 @@ def run(ctx: ExecutionContext) -> None:
117
179
  ):
118
180
  allow_namespaces_raw = typer.prompt(
119
181
  "Namespaces com pods que precisam acessar APIs externas (CSV)",
120
- default="default",
182
+ default=DEFAULT_WORKLOAD_NAMESPACE,
121
183
  )
122
184
  cidr = typer.prompt("CIDR liberado (ex.: 0.0.0.0/0)", default="0.0.0.0/0")
185
+ namespaces = list(_split_namespaces(allow_namespaces_raw))
186
+ if namespaces:
187
+ _list_workloads_without_egress(namespaces, ctx)
123
188
  for namespace in _split_namespaces(allow_namespaces_raw):
124
189
  typer.echo(
125
190
  f"Criando policy allow-egress-internet em '{namespace}' para pods com "
@@ -18,6 +18,8 @@ from enum import Enum
18
18
  from pathlib import Path
19
19
  from typing import Callable, Optional, List
20
20
 
21
+ import os
22
+
21
23
  import typer
22
24
 
23
25
  from raijin_server.utils import (
@@ -34,11 +36,14 @@ CHART_REPO = "https://charts.jetstack.io"
34
36
  CHART_NAME = "cert-manager"
35
37
  NAMESPACE = "cert-manager"
36
38
  MANIFEST_PATH = Path("/tmp/raijin-cert-manager-issuer.yaml")
39
+ HELM_DATA_DIR = Path("/tmp/raijin-helm")
40
+ HELM_REPO_CONFIG = HELM_DATA_DIR / "repositories.yaml"
41
+ HELM_REPO_CACHE = HELM_DATA_DIR / "cache"
37
42
 
38
- # Timeouts mais generosos para ambientes lentos
39
- WEBHOOK_READY_TIMEOUT = 600 # 10 minutos
40
- POD_READY_TIMEOUT = 300 # 5 minutos
41
- CRD_READY_TIMEOUT = 180 # 3 minutos
43
+ # Timeouts enxutos (falha rápida em redes rápidas)
44
+ WEBHOOK_READY_TIMEOUT = 240 # 4 minutos
45
+ POD_READY_TIMEOUT = 180 # 3 minutos
46
+ CRD_READY_TIMEOUT = 120 # 2 minutos
42
47
 
43
48
 
44
49
  class DNSProvider(str, Enum):
@@ -82,6 +87,17 @@ def _get_acme_server(staging: bool) -> str:
82
87
  return "https://acme-v02.api.letsencrypt.org/directory"
83
88
 
84
89
 
90
+ def _helm_env() -> dict:
91
+ """Garante diretórios de cache/config do Helm isolados em /tmp para evitar erros de permissão."""
92
+ HELM_DATA_DIR.mkdir(parents=True, exist_ok=True)
93
+ HELM_REPO_CACHE.mkdir(parents=True, exist_ok=True)
94
+ return {
95
+ **os.environ,
96
+ "HELM_REPOSITORY_CONFIG": str(HELM_REPO_CONFIG),
97
+ "HELM_REPOSITORY_CACHE": str(HELM_REPO_CACHE),
98
+ }
99
+
100
+
85
101
  # =============================================================================
86
102
  # Builders de Manifests YAML
87
103
  # =============================================================================
@@ -519,6 +535,7 @@ def _add_helm_repo(ctx: ExecutionContext) -> bool:
519
535
  capture_output=True,
520
536
  text=True,
521
537
  timeout=60,
538
+ env=_helm_env(),
522
539
  )
523
540
 
524
541
  if result.returncode != 0:
@@ -539,6 +556,7 @@ def _add_helm_repo(ctx: ExecutionContext) -> bool:
539
556
  capture_output=True,
540
557
  text=True,
541
558
  timeout=120,
559
+ env=_helm_env(),
542
560
  )
543
561
 
544
562
  elapsed_update = time.time() - start
@@ -562,8 +580,8 @@ def _add_helm_repo(ctx: ExecutionContext) -> bool:
562
580
  return False
563
581
 
564
582
 
565
- def _run_helm_install(ctx: ExecutionContext) -> bool:
566
- """Executa o helm upgrade --install."""
583
+ def _run_helm_install(ctx: ExecutionContext, attempt: int = 1) -> bool:
584
+ """Executa o helm upgrade --install, com uma tentativa de retry para repo/config."""
567
585
  if ctx.dry_run:
568
586
  typer.echo(" [4/5] [dry-run] Executando helm upgrade --install...")
569
587
  return True
@@ -574,6 +592,7 @@ def _run_helm_install(ctx: ExecutionContext) -> bool:
574
592
 
575
593
  cmd = [
576
594
  "helm", "upgrade", "--install", "cert-manager", "jetstack/cert-manager",
595
+ "--repo", CHART_REPO,
577
596
  "-n", NAMESPACE,
578
597
  "--create-namespace",
579
598
  "--set", "installCRDs=true",
@@ -598,6 +617,7 @@ def _run_helm_install(ctx: ExecutionContext) -> bool:
598
617
  stdout=subprocess.PIPE,
599
618
  stderr=subprocess.STDOUT,
600
619
  text=True,
620
+ env=_helm_env(),
601
621
  )
602
622
 
603
623
  output_lines = []
@@ -628,7 +648,13 @@ def _run_helm_install(ctx: ExecutionContext) -> bool:
628
648
  output = "".join(output_lines[-20:]) # Últimas 20 linhas
629
649
  logger.error(f"Helm install falhou (código {return_code}): {output}")
630
650
  typer.secho(f" ✗ Helm install falhou (código {return_code})", fg=typer.colors.RED)
631
-
651
+
652
+ needs_repo_retry = "repo jetstack not found" in output.lower() or "repositories.yaml" in output.lower()
653
+ if needs_repo_retry and attempt == 1:
654
+ typer.echo(" → Reconfigurando repositório Helm e tentando novamente...")
655
+ if _add_helm_repo(ctx):
656
+ return _run_helm_install(ctx, attempt=2)
657
+
632
658
  # Mostra as últimas linhas do erro
633
659
  typer.echo("\n Últimas linhas do log:")
634
660
  for line in output_lines[-10:]:
@@ -1,10 +1,13 @@
1
1
  """Instalacao completa e automatizada do ambiente produtivo."""
2
2
 
3
3
  import os
4
+ import subprocess
5
+ from typing import List
4
6
 
5
7
  import typer
6
8
 
7
9
  from raijin_server.utils import ExecutionContext, require_root
10
+ from raijin_server.healthchecks import run_health_check
8
11
  from raijin_server.modules import (
9
12
  bootstrap,
10
13
  calico,
@@ -68,6 +71,196 @@ def _cert_manager_install_only(ctx: ExecutionContext) -> None:
68
71
  )
69
72
 
70
73
 
74
+ def _confirm_colored(message: str, default: bool = True) -> bool:
75
+ """Confirmação com destaque visual."""
76
+ styled = typer.style(message, fg=typer.colors.YELLOW, bold=True)
77
+ return typer.confirm(styled, default=default)
78
+
79
+
80
+ def _select_steps_interactively() -> List[str] | None:
81
+ typer.secho("Selecione passos (separados por vírgula) ou ENTER para todos:", fg=typer.colors.CYAN)
82
+ typer.echo("Exemplo: kubernetes,calico,cert_manager,traefik")
83
+ answer = typer.prompt("Passos", default="").strip()
84
+ if not answer:
85
+ return None
86
+ steps = [s.strip() for s in answer.split(",") if s.strip()]
87
+ return steps or None
88
+
89
+
90
+ def _kube_snapshot(ctx: ExecutionContext, events: int = 100, namespace: str | None = None) -> None:
91
+ """Coleta snapshot rápido de cluster para debug (best-effort)."""
92
+ cmds = []
93
+ cmds.append(["kubectl", "get", "nodes", "-o", "wide"])
94
+
95
+ pods_cmd = ["kubectl", "get", "pods"]
96
+ if namespace:
97
+ pods_cmd += ["-n", namespace]
98
+ else:
99
+ pods_cmd.append("-A")
100
+ pods_cmd += ["-o", "wide"]
101
+ cmds.append(pods_cmd)
102
+
103
+ events_cmd = ["kubectl", "get", "events"]
104
+ if namespace:
105
+ events_cmd += ["-n", namespace]
106
+ else:
107
+ events_cmd.append("-A")
108
+ events_cmd += ["--sort-by=.lastTimestamp"]
109
+ cmds.append(events_cmd)
110
+
111
+ typer.secho("\n[DEBUG] Snapshot do cluster", fg=typer.colors.CYAN)
112
+ for cmd in cmds:
113
+ try:
114
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
115
+ typer.echo(f"$ {' '.join(cmd)}")
116
+ if result.stdout:
117
+ lines = result.stdout.strip().splitlines()
118
+ if cmd is events_cmd:
119
+ lines = lines[-events:]
120
+ typer.echo("\n".join(lines))
121
+ elif result.stderr:
122
+ typer.echo(result.stderr.strip())
123
+ except Exception as exc:
124
+ typer.secho(f"(snapshot falhou: {exc})", fg=typer.colors.YELLOW)
125
+
126
+
127
+ def _run_cmd(title: str, cmd: List[str], ctx: ExecutionContext, tail: int | None = None) -> None:
128
+ """Executa comando kubectl/helm best-effort para diagnosticos rapidos."""
129
+ typer.secho(f"\n[diagnose] {title}", fg=typer.colors.CYAN)
130
+ if ctx.dry_run:
131
+ typer.echo("[dry-run] comando nao executado")
132
+ return
133
+
134
+ try:
135
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=40)
136
+ typer.echo(f"$ {' '.join(cmd)}")
137
+ output = result.stdout.strip() or result.stderr.strip()
138
+ if output:
139
+ lines = output.splitlines()
140
+ if tail:
141
+ lines = lines[-tail:]
142
+ typer.echo("\n".join(lines))
143
+ else:
144
+ typer.echo("(sem saida)")
145
+ except Exception as exc:
146
+ typer.secho(f"(falha ao executar: {exc})", fg=typer.colors.YELLOW)
147
+
148
+
149
+ def _diag_namespace(ns: str, ctx: ExecutionContext, tail_events: int = 50) -> None:
150
+ _run_cmd(f"Pods em {ns}", ["kubectl", "get", "pods", "-n", ns, "-o", "wide"], ctx)
151
+ _run_cmd(f"Services em {ns}", ["kubectl", "get", "svc", "-n", ns], ctx)
152
+ _run_cmd(f"Deployments em {ns}", ["kubectl", "get", "deploy", "-n", ns], ctx)
153
+ _run_cmd(
154
+ f"Eventos em {ns}",
155
+ ["kubectl", "get", "events", "-n", ns, "--sort-by=.lastTimestamp"],
156
+ ctx,
157
+ tail=tail_events,
158
+ )
159
+
160
+
161
+ def _diag_calico(ctx: ExecutionContext) -> None:
162
+ ns = "kube-system"
163
+ _run_cmd("Calico DaemonSets", ["kubectl", "get", "ds", "-n", ns, "-o", "wide"], ctx)
164
+ _run_cmd("Calico pods", ["kubectl", "get", "pods", "-n", ns, "-l", "k8s-app=calico-node", "-o", "wide"], ctx)
165
+ _run_cmd("Calico typha", ["kubectl", "get", "pods", "-n", ns, "-l", "k8s-app=calico-typha", "-o", "wide"], ctx)
166
+ _run_cmd("Calico events", ["kubectl", "get", "events", "-n", ns, "--sort-by=.lastTimestamp"], ctx, tail=50)
167
+
168
+
169
+ def _diag_secrets(ctx: ExecutionContext) -> None:
170
+ _diag_namespace("kube-system", ctx)
171
+ _diag_namespace("external-secrets", ctx)
172
+
173
+
174
+ def _diag_prometheus(ctx: ExecutionContext) -> None:
175
+ ns = "observability"
176
+ _run_cmd("Prometheus pods", ["kubectl", "get", "pods", "-n", ns, "-l", "app.kubernetes.io/name=prometheus"], ctx)
177
+ _diag_namespace(ns, ctx)
178
+
179
+
180
+ def _diag_grafana(ctx: ExecutionContext) -> None:
181
+ ns = "observability"
182
+ _run_cmd("Grafana svc", ["kubectl", "get", "svc", "-n", ns, "-l", "app.kubernetes.io/name=grafana"], ctx)
183
+ _diag_namespace(ns, ctx)
184
+
185
+
186
+ def _diag_loki(ctx: ExecutionContext) -> None:
187
+ ns = "observability"
188
+ _run_cmd("Loki statefulsets", ["kubectl", "get", "sts", "-n", ns, "-l", "app.kubernetes.io/name=loki"], ctx)
189
+ _diag_namespace(ns, ctx)
190
+
191
+
192
+ def _diag_traefik(ctx: ExecutionContext) -> None:
193
+ ns = "traefik"
194
+ _run_cmd("Traefik ingress", ["kubectl", "get", "ingress", "-n", ns], ctx)
195
+ _diag_namespace(ns, ctx)
196
+
197
+
198
+ def _diag_observability_ingress(ctx: ExecutionContext) -> None:
199
+ ns = "observability"
200
+ _run_cmd("Ingress objects", ["kubectl", "get", "ingress", "-n", ns], ctx)
201
+ _diag_namespace(ns, ctx)
202
+
203
+
204
+ def _diag_observability_dashboards(ctx: ExecutionContext) -> None:
205
+ ns = "observability"
206
+ _run_cmd("ConfigMaps dashboards", ["kubectl", "get", "configmap", "-n", ns, "-l", "raijin/dashboards=true"], ctx)
207
+ _diag_namespace(ns, ctx)
208
+
209
+
210
+ def _diag_minio(ctx: ExecutionContext) -> None:
211
+ ns = "minio"
212
+ _diag_namespace(ns, ctx)
213
+
214
+
215
+ def _diag_kafka(ctx: ExecutionContext) -> None:
216
+ ns = "kafka"
217
+ _run_cmd("Kafka pods", ["kubectl", "get", "pods", "-n", ns, "-o", "wide"], ctx)
218
+ _diag_namespace(ns, ctx)
219
+
220
+
221
+ def _diag_velero(ctx: ExecutionContext) -> None:
222
+ ns = "velero"
223
+ _diag_namespace(ns, ctx)
224
+
225
+
226
+ def _diag_kong(ctx: ExecutionContext) -> None:
227
+ ns = "kong"
228
+ _diag_namespace(ns, ctx)
229
+
230
+
231
+ DIAG_HANDLERS = {
232
+ "cert_manager": cert_manager.diagnose,
233
+ "calico": _diag_calico,
234
+ "secrets": _diag_secrets,
235
+ "prometheus": _diag_prometheus,
236
+ "grafana": _diag_grafana,
237
+ "loki": _diag_loki,
238
+ "traefik": _diag_traefik,
239
+ "observability_ingress": _diag_observability_ingress,
240
+ "observability_dashboards": _diag_observability_dashboards,
241
+ "minio": _diag_minio,
242
+ "kafka": _diag_kafka,
243
+ "velero": _diag_velero,
244
+ "kong": _diag_kong,
245
+ }
246
+
247
+
248
+ def _maybe_diagnose(name: str, ctx: ExecutionContext) -> None:
249
+ try:
250
+ if name in DIAG_HANDLERS:
251
+ DIAG_HANDLERS[name](ctx)
252
+ return
253
+
254
+ # fallback: health check se existir
255
+ ok = run_health_check(name, ctx)
256
+ if ok:
257
+ typer.secho(f"[diagnose] {name}: OK", fg=typer.colors.GREEN)
258
+ else:
259
+ typer.secho(f"[diagnose] {name}: falhou", fg=typer.colors.YELLOW)
260
+ except Exception as exc:
261
+ typer.secho(f"[diagnose] {name} falhou: {exc}", fg=typer.colors.YELLOW)
262
+
263
+
71
264
  # Ordem de execucao dos modulos para instalacao completa
72
265
  # Modulos marcados com skip_env podem ser pulados via variavel de ambiente
73
266
  INSTALL_SEQUENCE = [
@@ -108,12 +301,25 @@ def run(ctx: ExecutionContext) -> None:
108
301
  fg=typer.colors.CYAN,
109
302
  )
110
303
 
304
+ steps_override = ctx.selected_steps
305
+ if steps_override is None and ctx.interactive_steps:
306
+ steps_override = _select_steps_interactively()
307
+
308
+ # Debug/diagnose menu simples
309
+ if not ctx.debug_snapshots and not ctx.post_diagnose:
310
+ typer.secho("Ativar modo debug (snapshots + diagnose pos-modulo)?", fg=typer.colors.YELLOW)
311
+ if typer.confirm("Habilitar debug?", default=False):
312
+ ctx.debug_snapshots = True
313
+ ctx.post_diagnose = True
314
+
111
315
  # Mostra sequencia de instalacao
112
316
  typer.echo("Sequencia de instalacao:")
113
317
  for i, (name, _, desc, skip_env) in enumerate(INSTALL_SEQUENCE, 1):
114
318
  suffix = ""
115
319
  if skip_env and os.environ.get(skip_env, "").strip() in ("1", "true", "yes"):
116
320
  suffix = " [SKIP]"
321
+ if steps_override and name not in steps_override:
322
+ suffix = " [IGNORADO]"
117
323
  typer.echo(f" {i:2}. {name:25} - {desc}{suffix}")
118
324
 
119
325
  typer.echo("")
@@ -126,7 +332,7 @@ def run(ctx: ExecutionContext) -> None:
126
332
  typer.echo("")
127
333
 
128
334
  if not ctx.dry_run:
129
- if not typer.confirm("Deseja continuar com a instalacao completa?", default=True):
335
+ if not _confirm_colored("Deseja continuar com a instalacao completa?", default=True):
130
336
  typer.echo("Instalacao cancelada.")
131
337
  raise typer.Exit(code=0)
132
338
 
@@ -135,13 +341,25 @@ def run(ctx: ExecutionContext) -> None:
135
341
  succeeded = []
136
342
  skipped = []
137
343
 
344
+ cluster_ready = False
345
+
138
346
  for i, (name, handler, desc, skip_env) in enumerate(INSTALL_SEQUENCE, 1):
347
+ if steps_override and name not in steps_override:
348
+ skipped.append(name)
349
+ typer.secho(f"⏭ {name} ignorado (fora da lista selecionada)", fg=typer.colors.YELLOW)
350
+ continue
351
+
139
352
  # Verifica se modulo deve ser pulado via env
140
353
  if skip_env and os.environ.get(skip_env, "").strip() in ("1", "true", "yes"):
141
354
  skipped.append(name)
142
355
  typer.secho(f"⏭ {name} pulado via {skip_env}=1", fg=typer.colors.YELLOW)
143
356
  continue
144
357
 
358
+ if ctx.confirm_each_step:
359
+ if not _confirm_colored(f"Executar modulo '{name}' agora?", default=True):
360
+ skipped.append(name)
361
+ continue
362
+
145
363
  typer.secho(
146
364
  f"\n{'='*60}",
147
365
  fg=typer.colors.CYAN,
@@ -160,6 +378,15 @@ def run(ctx: ExecutionContext) -> None:
160
378
  handler(ctx)
161
379
  succeeded.append(name)
162
380
  typer.secho(f"✓ {name} concluido com sucesso", fg=typer.colors.GREEN)
381
+
382
+ if name == "kubernetes":
383
+ cluster_ready = True
384
+
385
+ if ctx.post_diagnose and cluster_ready:
386
+ _maybe_diagnose(name, ctx)
387
+
388
+ if ctx.debug_snapshots and cluster_ready:
389
+ _kube_snapshot(ctx, events=80)
163
390
  except KeyboardInterrupt:
164
391
  typer.secho(f"\n⚠ Instalacao interrompida pelo usuario no modulo '{name}'", fg=typer.colors.YELLOW)
165
392
  raise typer.Exit(code=130)
@@ -1,30 +1,115 @@
1
- """Configuracao do Prometheus Stack via Helm."""
1
+ """Configuracao do Prometheus Stack via Helm (robust, production-ready)."""
2
+
3
+ from __future__ import annotations
2
4
 
3
5
  import typer
4
6
 
5
- from raijin_server.utils import ExecutionContext, helm_upgrade_install, require_root
7
+ from raijin_server.utils import (
8
+ ExecutionContext,
9
+ helm_upgrade_install,
10
+ kubectl_create_ns,
11
+ require_root,
12
+ run_cmd,
13
+ )
14
+
15
+ DEFAULT_NAMESPACE = "observability"
16
+
17
+
18
+ def _get_default_storage_class(ctx: ExecutionContext) -> str:
19
+ if ctx.dry_run:
20
+ return ""
21
+ result = run_cmd(
22
+ [
23
+ "kubectl",
24
+ "get",
25
+ "storageclass",
26
+ "-o",
27
+ "jsonpath={.items[?(@.metadata.annotations['storageclass.kubernetes.io/is-default-class']=='true')].metadata.name}",
28
+ ],
29
+ ctx,
30
+ check=False,
31
+ )
32
+ return (result.stdout or "").strip()
33
+
34
+
35
+ def _ensure_cluster_access(ctx: ExecutionContext) -> None:
36
+ if ctx.dry_run:
37
+ return
38
+ result = run_cmd(["kubectl", "cluster-info"], ctx, check=False)
39
+ if result.returncode != 0:
40
+ typer.secho("Cluster Kubernetes nao acessivel. Verifique kubeconfig/controle-plane.", fg=typer.colors.RED)
41
+ raise typer.Exit(code=1)
6
42
 
7
43
 
8
44
  def run(ctx: ExecutionContext) -> None:
9
45
  require_root(ctx)
46
+ _ensure_cluster_access(ctx)
47
+
10
48
  typer.echo("Instalando kube-prometheus-stack via Helm...")
11
49
 
50
+ namespace = typer.prompt("Namespace destino", default=DEFAULT_NAMESPACE)
51
+ kubectl_create_ns(namespace, ctx)
52
+
53
+ default_sc = _get_default_storage_class(ctx)
54
+ enable_persistence = typer.confirm(
55
+ "Habilitar PVC para Prometheus e Alertmanager?", default=bool(default_sc)
56
+ )
57
+
12
58
  values = [
13
59
  "grafana.enabled=false",
14
60
  "prometheus.prometheusSpec.retention=15d",
15
61
  "prometheus.prometheusSpec.enableAdminAPI=true",
16
62
  "prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false",
17
- "prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage=20Gi",
18
- "alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.resources.requests.storage=10Gi",
63
+ "prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false",
19
64
  "defaultRules.create=true",
20
65
  ]
21
66
 
67
+ extra_args = ["--wait", "--timeout", "5m", "--atomic"]
68
+
69
+ chart_version = typer.prompt(
70
+ "Versao do chart (vazio para latest)",
71
+ default="",
72
+ ).strip()
73
+ if chart_version:
74
+ extra_args.extend(["--version", chart_version])
75
+
76
+ if enable_persistence:
77
+ storage_class = typer.prompt(
78
+ "StorageClass para PVC",
79
+ default=default_sc or "",
80
+ ).strip()
81
+ prom_size = typer.prompt("Tamanho PVC Prometheus", default="20Gi")
82
+ alert_size = typer.prompt("Tamanho PVC Alertmanager", default="10Gi")
83
+
84
+ if storage_class:
85
+ values.extend(
86
+ [
87
+ f"prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.storageClassName={storage_class}",
88
+ f"alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.storageClassName={storage_class}",
89
+ ]
90
+ )
91
+
92
+ values.extend(
93
+ [
94
+ f"prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage={prom_size}",
95
+ f"alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.resources.requests.storage={alert_size}",
96
+ ]
97
+ )
98
+ else:
99
+ typer.secho(
100
+ "PVC desativado: Prometheus/Alertmanager usarao volumes efemeros (sem retenção apos restart).",
101
+ fg=typer.colors.YELLOW,
102
+ )
103
+
22
104
  helm_upgrade_install(
23
105
  release="kube-prometheus-stack",
24
106
  chart="kube-prometheus-stack",
25
- namespace="observability",
107
+ namespace=namespace,
26
108
  repo="prometheus-community",
27
109
  repo_url="https://prometheus-community.github.io/helm-charts",
28
110
  ctx=ctx,
29
111
  values=values,
112
+ extra_args=extra_args,
30
113
  )
114
+
115
+ typer.secho("kube-prometheus-stack instalado com sucesso.", fg=typer.colors.GREEN)
@@ -38,7 +38,7 @@ echo "Escolha o tipo de instalação:"
38
38
  echo " 1) Global (requer sudo, todos os usuários)"
39
39
  echo " 2) Virtual env (recomendado para desenvolvimento)"
40
40
  echo " 3) User install (apenas usuário atual)"
41
- read -p "Opção [2]: " INSTALL_TYPE
41
+ read -r -p "Opção [2]: " INSTALL_TYPE
42
42
  INSTALL_TYPE=${INSTALL_TYPE:-2}
43
43
 
44
44
  echo ""
@@ -51,6 +51,7 @@ case $INSTALL_TYPE in
51
51
  2)
52
52
  echo -e "${YELLOW}Criando virtual environment...${NC}"
53
53
  python3 -m venv .venv
54
+ # shellcheck disable=SC1091
54
55
  source .venv/bin/activate
55
56
  pip install --upgrade pip
56
57
  pip install -e .
@@ -73,7 +74,7 @@ EOF
73
74
 
74
75
  # Adicionar ao PATH se necessário
75
76
  if [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
76
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc
77
+ echo "export PATH=\"$HOME/.local/bin:$PATH\"" >> ~/.bashrc
77
78
  echo -e "${YELLOW}⚠${NC} Adicionado $HOME/.local/bin ao PATH"
78
79
  echo "Execute: source ~/.bashrc"
79
80
  fi
@@ -11,21 +11,25 @@ OUTPUT=${RAIJIN_METRIC_FILE:-/var/lib/node_exporter/textfile_collector/raijin_lo
11
11
  # Calcula soma de todos os logs (principal + rotações)
12
12
  TOTAL_BYTES=0
13
13
  shopt -s nullglob
14
+
15
+ METRICS_TMP=$(mktemp)
16
+ trap 'rm -f "$METRICS_TMP"' EXIT
17
+
14
18
  for f in "$LOG_DIR"/$LOG_PATTERN; do
15
19
  size=$(stat -c%s "$f" 2>/dev/null || echo 0)
16
20
  TOTAL_BYTES=$((TOTAL_BYTES + size))
17
21
  if [[ "$f" =~ raijin-server\.log(\.\d+)?$ ]]; then
18
- printf "raijin_log_size_bytes{file=\"%s\"} %d\n" "$(basename "$f")" "$size"
22
+ printf "raijin_log_size_bytes{file=\"%s\"} %d\n" "$(basename "$f")" "$size" >> "$METRICS_TMP"
19
23
  fi
20
- done | {
21
- # Escreve métricas no arquivo final
22
- mkdir -p "$(dirname "$OUTPUT")"
23
- {
24
- echo "# HELP raijin_log_size_bytes Tamanho dos logs do raijin-server (bytes)"
25
- echo "# TYPE raijin_log_size_bytes gauge"
26
- cat
27
- echo "# HELP raijin_log_size_total_bytes Soma dos logs do raijin-server (bytes)"
28
- echo "# TYPE raijin_log_size_total_bytes gauge"
29
- echo "raijin_log_size_total_bytes ${TOTAL_BYTES}"
30
- } > "$OUTPUT"
31
- }
24
+ done
25
+
26
+ # Escreve métricas no arquivo final
27
+ mkdir -p "$(dirname "$OUTPUT")"
28
+ {
29
+ echo "# HELP raijin_log_size_bytes Tamanho dos logs do raijin-server (bytes)"
30
+ echo "# TYPE raijin_log_size_bytes gauge"
31
+ cat "$METRICS_TMP"
32
+ echo "# HELP raijin_log_size_total_bytes Soma dos logs do raijin-server (bytes)"
33
+ echo "# TYPE raijin_log_size_total_bytes gauge"
34
+ echo "raijin_log_size_total_bytes ${TOTAL_BYTES}"
35
+ } > "$OUTPUT"
@@ -49,6 +49,7 @@ fi
49
49
  echo ""
50
50
  echo "2. Verificando Sistema Operacional..."
51
51
  if [ -f /etc/os-release ]; then
52
+ # shellcheck disable=SC1091
52
53
  . /etc/os-release
53
54
  if [[ "$ID" == "ubuntu" ]]; then
54
55
  VERSION_NUM=$(echo "$VERSION_ID" | cut -d. -f1)
@@ -152,7 +153,7 @@ STATE_DIRS=("/var/lib/raijin-server/state" "$HOME/.local/share/raijin-server/sta
152
153
  FOUND_STATE=0
153
154
  for dir in "${STATE_DIRS[@]}"; do
154
155
  if [[ -d "$dir" ]]; then
155
- MODULE_COUNT=$(ls -1 "$dir"/*.done 2>/dev/null | wc -l)
156
+ MODULE_COUNT=$(find "$dir" -maxdepth 1 -name '*.done' -type f 2>/dev/null | wc -l)
156
157
  if [[ $MODULE_COUNT -gt 0 ]]; then
157
158
  check_pass "$MODULE_COUNT modulos concluidos (em $dir)"
158
159
  FOUND_STATE=1
raijin_server/utils.py CHANGED
@@ -29,9 +29,38 @@ BACKUP_COUNT = int(os.environ.get("RAIJIN_LOG_BACKUP_COUNT", 5))
29
29
  logger = logging.getLogger("raijin-server")
30
30
  logger.setLevel(logging.INFO)
31
31
 
32
- file_handler = RotatingFileHandler(LOG_FILE, maxBytes=MAX_LOG_BYTES, backupCount=BACKUP_COUNT)
32
+
33
+ def _build_file_handler() -> RotatingFileHandler:
34
+ """Cria handler com fallback para $HOME quando /var/log exige root."""
35
+ try:
36
+ return RotatingFileHandler(LOG_FILE, maxBytes=MAX_LOG_BYTES, backupCount=BACKUP_COUNT)
37
+ except PermissionError:
38
+ fallback = Path.home() / ".raijin-server.log"
39
+ fallback.parent.mkdir(parents=True, exist_ok=True)
40
+ return RotatingFileHandler(fallback, maxBytes=MAX_LOG_BYTES, backupCount=BACKUP_COUNT)
41
+
42
+
43
+ file_handler = _build_file_handler()
33
44
  stream_handler = logging.StreamHandler()
34
45
 
46
+
47
+ def active_log_file() -> Path:
48
+ return Path(getattr(file_handler, "baseFilename", LOG_FILE))
49
+
50
+
51
+ def available_log_files() -> list[Path]:
52
+ base = active_log_file()
53
+ pattern = base.name + "*"
54
+ return [p for p in sorted(base.parent.glob(pattern)) if p.is_file()]
55
+
56
+
57
+ def page_text(content: str) -> None:
58
+ pager = shutil.which("less")
59
+ if pager:
60
+ subprocess.run([pager, "-R"], input=content, text=True, check=False)
61
+ else:
62
+ typer.echo(content)
63
+
35
64
  formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
36
65
  file_handler.setFormatter(formatter)
37
66
  stream_handler.setFormatter(formatter)
@@ -57,6 +86,13 @@ class ExecutionContext:
57
86
  timeout: int = 600 # 10 min for slow connections
58
87
  errors: list = field(default_factory=list)
59
88
  warnings: list = field(default_factory=list)
89
+ # Controle interativo/diagnostico
90
+ selected_steps: list[str] | None = None
91
+ confirm_each_step: bool = False
92
+ debug_snapshots: bool = False
93
+ post_diagnose: bool = False
94
+ color_prompts: bool = True
95
+ interactive_steps: bool = False
60
96
 
61
97
 
62
98
  def resolve_script_path(script_name: str) -> Path:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: raijin-server
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: CLI para automacao de setup e hardening de servidores Ubuntu Server.
5
5
  Home-page: https://example.com/raijin-server
6
6
  Author: Equipe Raijin
@@ -61,52 +61,29 @@ CLI em Python (Typer) para automatizar setup e hardening de servidores Ubuntu Se
61
61
 
62
62
  ## Requisitos
63
63
 
64
- - Python >= 3.9
65
- - Ubuntu Server 20.04+ (testado em 24.04)
66
- - Permissões root/sudo
67
- - Conectividade com internet
68
- - Mínimo 4GB RAM, 20GB disco livre
69
- - Ferramentas: `curl`, `apt-get`, `systemctl`
64
+ ## Instalação (sempre em venv midgard)
70
65
 
71
- Ferramentas adicionais (instaladas pelos módulos quando necessário):
72
- - `helm` (>=3.8 para OCI)
73
- - `kubectl`, `kubeadm`
74
- - `velero`, `istioctl`
75
-
76
- ## Instalacao
77
-
78
- Sem venv (global):
66
+ Use apenas o venv `~/.venvs/midgard` para padronizar ambiente e logs.
79
67
 
80
68
  ```bash
81
- python -m pip install .
82
- ```
83
-
84
- Com venv (recomendado para desenvolvimento):
85
-
86
- ```bash
87
- python -m venv .venv
88
- source .venv/bin/activate
89
- python -m pip install -e .
90
- ```
69
+ # 1) Criar/reativar venv midgard
70
+ python3 -m venv ~/.venvs/midgard
71
+ source ~/.venvs/midgard/bin/activate
72
+ pip install -U pip setuptools
91
73
 
92
- ### Instalação em Produção (Recomendado)
74
+ # 2) Instalar a partir do source (dev)
75
+ pip install -U raijin-server
93
76
 
94
- Para servidores em produção, use um venv isolado e execute com sudo preservando o ambiente:
77
+ # 3) Uso com sudo preservando o venv
78
+ sudo -E ~/.venvs/midgard/bin/raijin-server --version
79
+ sudo -E ~/.venvs/midgard/bin/raijin-server validate
80
+ sudo -E ~/.venvs/midgard/bin/raijin-server full-install
95
81
 
96
- ```bash
97
- # 1. Sair do venv atual (se estiver ativo)
82
+ # 4) Quando terminar
98
83
  deactivate
84
+ ```
99
85
 
100
- # 2. (Opcional) Remover venv antigo
101
- rm -rf ~/.venvs/raijin
102
-
103
- # 3. Criar venv novo
104
- python3 -m venv ~/.venvs/raijin
105
- source ~/.venvs/raijin/bin/activate
106
- pip install -U pip setuptools
107
-
108
- # 4. Instalar a versão mais recente
109
- pip install -U raijin-server
86
+ > Dica: se precisar reinstalar, remova o venv (`rm -rf ~/.venvs/midgard`), recrie e repita o passo 2. O `-E` no sudo mantém o venv ativo para o Python.
110
87
 
111
88
  # 5. Rodar usando root preservando o venv
112
89
  sudo -E ~/.venvs/raijin/bin/raijin-server --version
@@ -124,30 +101,70 @@ deactivate
124
101
  ### Validar Sistema
125
102
  ```bash
126
103
  # Verifica se o sistema atende pré-requisitos
127
- sudo raijin-server validate
104
+ sudo -E ~/.venvs/midgard/bin/raijin-server validate
128
105
  ```
129
106
 
130
107
  ### Menu Interativo
131
108
  ```bash
132
- # Menu visual com stautils.py`: Funções utilitárias com retry, timeout e logging.
133
- - `src/raijin_server/validators.py`: Validações de pré-requisitos e dependências.
134
- - `src/raijin_server/healthchecks.py`: Health checks pós-instalação.
135
- - `src/raijin_server/config.py`: Gerenciamento de configuração via arquivo.
136
- - `src/raijin_server/modules/`: Automações por tópico (hardening, network, essentials, firewall, kubernetes, calico, istio, traefik, kong, minio, prometheus, grafana, loki, harness, velero, kafka).
137
- - `src/raijin_server/scripts/`: Shells empacotados usados pelos módulos e scripts auxiliares.
138
- - `ARCHITECTURE.md`: Visão do desenho técnico.
139
- - `AUDIT.md`: Relatório completo de auditoria e melhorias.
140
- - `SECURITY.md`: Como reportar vulnerabilidades
109
+ # Menu visual com atalho para módulos
110
+ sudo -E ~/.venvs/midgard/bin/raijin-server menu
111
+ ```
112
+
141
113
  ### Execução Direta de Módulos
142
114
  ```bash
143
115
  # Executar módulo específico
144
- sudo raijin-server kubernetes
116
+ sudo -E ~/.venvs/midgard/bin/raijin-server kubernetes
145
117
 
146
118
  # Dry-run (simula sem aplicar)
147
- sudo raijin-server --dry-run kubernetes
119
+ sudo -E ~/.venvs/midgard/bin/raijin-server --dry-run kubernetes
148
120
 
149
121
  # Pular validações (não recomendado)
150
- sudo raijin-server --skip-validation kubernetes
122
+ sudo -E ~/.venvs/midgard/bin/raijin-server --skip-validation kubernetes
123
+ ```
124
+
125
+ ### Instalação Completa com seleção de passos
126
+ ```bash
127
+ # Rodar tudo (padrão)
128
+ sudo -E ~/.venvs/midgard/bin/raijin-server full-install
129
+
130
+ # Escolher passos antes de rodar
131
+ sudo -E ~/.venvs/midgard/bin/raijin-server full-install --select-steps
132
+
133
+ # Definir lista fixa (ordem original preservada)
134
+ sudo -E ~/.venvs/midgard/bin/raijin-server full-install --steps "kubernetes,calico,cert_manager,traefik"
135
+
136
+ # Pedir confirmação a cada módulo
137
+ sudo -E ~/.venvs/midgard/bin/raijin-server full-install --confirm-each
138
+
139
+ # Modo debug: snapshots + diagnose pós-módulo
140
+ sudo -E ~/.venvs/midgard/bin/raijin-server full-install --debug-mode
141
+
142
+ # Apenas snapshots após cada módulo (pós-kubernetes)
143
+ sudo -E ~/.venvs/midgard/bin/raijin-server full-install --snapshots
144
+
145
+ # Apenas diagnose pós-módulo (ex.: cert-manager)
146
+ sudo -E ~/.venvs/midgard/bin/raijin-server full-install --post-diagnose
147
+ ```
148
+
149
+ ### Depuração e Logs (pós-Kubernetes)
150
+ ```bash
151
+ # Ver todos os logs do CLI com pager (less)
152
+ sudo -E ~/.venvs/midgard/bin/raijin-server debug logs --lines 400
153
+
154
+ # Seguir logs em tempo real
155
+ sudo -E ~/.venvs/midgard/bin/raijin-server debug logs --follow
156
+
157
+ # Snapshot do cluster: nodes, pods e eventos (últimos 200)
158
+ sudo -E ~/.venvs/midgard/bin/raijin-server debug kube --events 200
159
+
160
+ # Focar em um namespace (ex.: cert-manager)
161
+ sudo -E ~/.venvs/midgard/bin/raijin-server debug kube --namespace cert-manager --events 150
162
+
163
+ # Consultar logs do kubelet via journalctl
164
+ sudo -E ~/.venvs/midgard/bin/raijin-server debug journal --service kubelet --lines 300
165
+
166
+ # Consultar outro serviço systemd (ex.: containerd)
167
+ sudo -E ~/.venvs/midgard/bin/raijin-server debug journal --service containerd --lines 200
151
168
  ```
152
169
 
153
170
  ### Automação via Arquivo de Configuração
@@ -161,41 +178,41 @@ sudo raijin-server --skip-validation kubernetes
161
178
 
162
179
  ```bash
163
180
  # 1. Validar sistema
164
- sudo raijin-server validate
181
+ sudo -E ~/.venvs/midgard/bin/raijin-server validate
165
182
 
166
183
  # 2. Base do sistema
167
- sudo raijin-server essentials
168
- sudo raijin-server hardening
169
- sudo raijin-server network # OPCIONAL: pule se IP já configurado via provedor ISP
170
- sudo raijin-server firewall
184
+ sudo -E ~/.venvs/midgard/bin/raijin-server essentials
185
+ sudo -E ~/.venvs/midgard/bin/raijin-server hardening
186
+ sudo -E ~/.venvs/midgard/bin/raijin-server network # OPCIONAL: pule se IP já configurado via provedor ISP
187
+ sudo -E ~/.venvs/midgard/bin/raijin-server firewall
171
188
 
172
189
  # 3. Kubernetes
173
- sudo raijin-server kubernetes
174
- sudo raijin-server calico
175
- sudo raijin-server secrets
176
- sudo raijin-server cert-manager
190
+ sudo -E ~/.venvs/midgard/bin/raijin-server kubernetes
191
+ sudo -E ~/.venvs/midgard/bin/raijin-server calico
192
+ sudo -E ~/.venvs/midgard/bin/raijin-server secrets
193
+ sudo -E ~/.venvs/midgard/bin/raijin-server cert-manager
177
194
 
178
195
  # 4. Ingress (escolha um)
179
- sudo raijin-server traefik
196
+ sudo -E ~/.venvs/midgard/bin/raijin-server traefik
180
197
  # OU
181
- sudo raijin-server kong
198
+ sudo -E ~/.venvs/midgard/bin/raijin-server kong
182
199
 
183
200
  # 5. Observabilidade
184
- sudo raijin-server prometheus
185
- sudo raijin-server grafana
186
- sudo raijin-server observability-ingress
187
- sudo raijin-server observability-dashboards
188
- sudo raijin-server loki
201
+ sudo -E ~/.venvs/midgard/bin/raijin-server prometheus
202
+ sudo -E ~/.venvs/midgard/bin/raijin-server grafana
203
+ sudo -E ~/.venvs/midgard/bin/raijin-server observability-ingress
204
+ sudo -E ~/.venvs/midgard/bin/raijin-server observability-dashboards
205
+ sudo -E ~/.venvs/midgard/bin/raijin-server loki
189
206
 
190
207
  # 6. Storage e Mensageria (opcional)
191
- sudo raijin-server minio
192
- sudo raijin-server kafka
208
+ sudo -E ~/.venvs/midgard/bin/raijin-server minio
209
+ sudo -E ~/.venvs/midgard/bin/raijin-server kafka
193
210
 
194
211
  # 7. Backup
195
- sudo raijin-server velero
212
+ sudo -E ~/.venvs/midgard/bin/raijin-server velero
196
213
 
197
214
  # 8. Service Mesh (opcional)
198
- sudo raijin-server istio
215
+ sudo -E ~/.venvs/midgard/bin/raijin-server istio
199
216
  ```
200
217
 
201
218
  ### IP Estático (pular se já configurado)
@@ -208,7 +225,7 @@ O módulo `network` é **opcional** quando:
208
225
  Para pular automaticamente em automações:
209
226
  ```bash
210
227
  export RAIJIN_SKIP_NETWORK=1
211
- sudo raijin-server full-install
228
+ sudo -E ~/.venvs/midgard/bin/raijin-server full-install
212
229
  ```
213
230
 
214
231
  O módulo detecta automaticamente se já existe um Netplan com IP estático e pergunta
@@ -222,12 +239,12 @@ se deseja pular. Se executar manualmente, basta responder "não" quando pergunta
222
239
  ### Comandos Úteis
223
240
  ```bash
224
241
  # Versão (flag ou comando)
225
- raijin-server --version
226
- raijin-server -V
227
- raijin-server version
242
+ ~/.venvs/midgard/bin/raijin-server --version
243
+ ~/.venvs/midgard/bin/raijin-server -V
244
+ ~/.venvs/midgard/bin/raijin-server version
228
245
 
229
246
  # Monitorar logs
230
- tail -f /var/log/raijin-server/raijin-server.log
247
+ sudo -E ~/.venvs/midgard/bin/raijin-server debug logs --follow
231
248
 
232
249
  # Rotacao de logs (default: 20MB, 5 backups)
233
250
  # Ajuste via env:
@@ -317,7 +334,7 @@ O helper garante o caminho absoluto correto independentemente de onde o pacote f
317
334
  O módulo [src/raijin_server/modules/apokolips_demo.py](src/raijin_server/modules/apokolips_demo.py) cria um namespace dedicado, ConfigMap com HTML, Deployment NGINX, Service e Ingress Traefik com uma landing page "Apokolips" para validar o tráfego externo.
318
335
 
319
336
  ```bash
320
- sudo raijin-server apokolips-demo
337
+ sudo -E ~/.venvs/midgard/bin/raijin-server apokolips-demo
321
338
  ```
322
339
 
323
340
  Personalização rápida:
@@ -364,7 +381,7 @@ Isso permite manter o isolamento padrão enquanto libera acesso seletivo para in
364
381
  Execute o modulo `secrets` para instalar os controladores:
365
382
 
366
383
  ```bash
367
- sudo raijin-server secrets
384
+ sudo -E ~/.venvs/midgard/bin/raijin-server secrets
368
385
  ```
369
386
 
370
387
  Passos realizados:
@@ -1,17 +1,17 @@
1
- raijin_server/__init__.py,sha256=7-69Vj-HYrv98hWrKmwDqDQ-ehtTqJebx1JeP4St6Q4,94
2
- raijin_server/cli.py,sha256=PfuIXc-pw1yZtJzCrxDVSWSsPAVBt9wqZBF-dWh6mwo,19274
1
+ raijin_server/__init__.py,sha256=30PUXP9hr-N0U9chGsPaORRkJKEeGnKMrcXhWTwR054,94
2
+ raijin_server/cli.py,sha256=aQxew8FCN-mZoN-ghBasm97gLk5WkOaIcpeucTpXpXY,24821
3
3
  raijin_server/config.py,sha256=Dta2CS1d6RgNiQ84P6dTXk98boFrjzuvhs_fCdlm0I4,4810
4
4
  raijin_server/healthchecks.py,sha256=BJyWyUDtEswEblvGwWMejtMnsUb8kJcULVdS9iycrcc,14565
5
- raijin_server/utils.py,sha256=Gs182mcLVM3ClCADFIK9Qi1fQA7BfunaTu0ie-8pAvo,19692
5
+ raijin_server/utils.py,sha256=9RnGnPoUTYOpMVRLNa4P4lIQrJNQLkSkPUxycZRGv78,20827
6
6
  raijin_server/validators.py,sha256=qOZMHgwjHogVf17UPlxfUCpQd9qAGQW7tycd8mUvnEs,9404
7
7
  raijin_server/modules/__init__.py,sha256=e_IbkhLGPcF8to9QUmIESP6fpcTOYcIhaXLKIvqRJMY,920
8
8
  raijin_server/modules/apokolips_demo.py,sha256=8ltsXRbVDwlDwLMIvh02NG-FeAfBWw_v6lh7IGOyNqs,13725
9
9
  raijin_server/modules/bootstrap.py,sha256=oVIGNRW_JbgY8zXNHGAIP0vGbbHNHyQexthxo5zhbcw,9762
10
- raijin_server/modules/calico.py,sha256=a8N7YYv7NoaspPKdhRtwHy3V2mM4cP5xA1H8BwslB18,4139
11
- raijin_server/modules/cert_manager.py,sha256=Kb8N60j3BDjkNS8t8aTsdsKy5syRWobccP3PBpv-Q8E,45887
10
+ raijin_server/modules/calico.py,sha256=TTPF1bLFdAKb3IVOqFqRxNblULkRmMMRylsIBp4w8I8,6700
11
+ raijin_server/modules/cert_manager.py,sha256=YvqInfnI06VLFEgau4H0koyBxarFh6vwxvhv7HuQ4Z0,46961
12
12
  raijin_server/modules/essentials.py,sha256=2xUXCyCQtFGd2DnCKV81N1R6bEJqH8zaet8mLovtQ1I,689
13
13
  raijin_server/modules/firewall.py,sha256=h6AISqiZeTinVT7BjmQIS872qRAFZJLg7meqlth3cfw,757
14
- raijin_server/modules/full_install.py,sha256=aR3yOuD7y0KLI20eMrxuFBNrWWn7JMpI4HFKNizEF3o,7464
14
+ raijin_server/modules/full_install.py,sha256=xiKe2GLuZ97c4YdTmhP-kwDVuJJ9Xq3dlgcLlqSPeYM,15518
15
15
  raijin_server/modules/grafana.py,sha256=zxYpWBM-fD8vTgoJ2Hmb9P66wz_JuiidO6_cGK3jG30,1809
16
16
  raijin_server/modules/hardening.py,sha256=4hz3ifkMhPlXa2n7gPxN0gitQgzALZ-073vuU3LM4RI,1616
17
17
  raijin_server/modules/harness.py,sha256=dhZ89YIhlkuxiRU1deN6wXVWnXm0xeI03PwYf_qgfak,1527
@@ -24,7 +24,7 @@ raijin_server/modules/minio.py,sha256=BVvsEaJlJUV92_ep7pKsBhSYPjWZrDOB3J6XAWYAHY
24
24
  raijin_server/modules/network.py,sha256=bwVljaVvTc6FbbD-XtDpqqNL-fXMB9-iWVWsXToBvt4,4804
25
25
  raijin_server/modules/observability_dashboards.py,sha256=fVz0WEOQrUTF5rJ__Nu_onyBuwL_exFmysWMmg8AE9w,7319
26
26
  raijin_server/modules/observability_ingress.py,sha256=Fh1rlFWueBNHnOkHuoHYyhILmpO-iQXINybSUYbYsHQ,5738
27
- raijin_server/modules/prometheus.py,sha256=Et-Tj6LrM7WDyoYRSY464E67TrEHbRe2G8T8obagC48,1066
27
+ raijin_server/modules/prometheus.py,sha256=Rs9BREmaoKlyteNdAQZnSIeJfsRO0RQKyyL2gTnXyCw,3716
28
28
  raijin_server/modules/sanitize.py,sha256=eytL_mCYF57qnjf6g752VRC4Yl27dDJ0OQP2rjxaR70,4523
29
29
  raijin_server/modules/secrets.py,sha256=xpV3gIMnwQdAI2j69Ck5daIK4wlYJA_1rkWTtSfVNk0,3715
30
30
  raijin_server/modules/ssh_hardening.py,sha256=oQdk-EVnEHNMKIWvoFuZzI4jK0nNO8IAY4hkB4pj8zw,4025
@@ -33,12 +33,12 @@ raijin_server/modules/velero.py,sha256=_CV0QQnWr5L-CWXDOiD9Ef4J7GaQT-s9yNBwqp_FL
33
33
  raijin_server/modules/vpn.py,sha256=hF-0vA17VKTxhQLDBSEeqI5aPQpiaaj4IpUf9l6lr64,8297
34
34
  raijin_server/scripts/__init__.py,sha256=deduGfHf8BMVWred4ux5LfBDT2NJ5XYeJAt2sDEU4qs,53
35
35
  raijin_server/scripts/checklist.sh,sha256=j6E0Kmk1EfjLvKK1VpCqzXJAXI_7Bm67LK4ndyCxWh0,1842
36
- raijin_server/scripts/install.sh,sha256=IZOTujOSGmKpznwgL59picsQNVzYkai6FtfFS3Klf34,3908
37
- raijin_server/scripts/log_size_metric.sh,sha256=rC2Ck4xnYVJV4Qymu24-indC8bkzfZs4FBqqxGPRl1I,1143
38
- raijin_server/scripts/pre-deploy-check.sh,sha256=naPUgKjnKgsh-eGDH2623C7zcr9VjDEw1H0lfYaXW8c,4853
39
- raijin_server-0.2.6.dist-info/licenses/LICENSE,sha256=kJsMCjOiRZE0AQNtxWqBa32z9kMAaF4EUxyHj3hKaJo,1105
40
- raijin_server-0.2.6.dist-info/METADATA,sha256=KXv3RV6GSO2qQJ85n_SFJP6h10rbph0WbTJ611fG-M4,18925
41
- raijin_server-0.2.6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
42
- raijin_server-0.2.6.dist-info/entry_points.txt,sha256=3ZvxDX4pvcjkIRsXAJ69wIfVmKa78LKo-C3QhqN2KVM,56
43
- raijin_server-0.2.6.dist-info/top_level.txt,sha256=Yz1xneCRtsZOzbPIcTAcrSxd-1p80pohMXYAZ74dpok,14
44
- raijin_server-0.2.6.dist-info/RECORD,,
36
+ raijin_server/scripts/install.sh,sha256=Y1ickbQ4siQ0NIPs6UgrqUr8WWy7U0LHmaTQbEgavoI,3949
37
+ raijin_server/scripts/log_size_metric.sh,sha256=Iv4SsX8AuCYRou-klYn32mX41xB6j0xJGLBO6riw4rU,1208
38
+ raijin_server/scripts/pre-deploy-check.sh,sha256=XqMo7IMIpwUHF17YEmU0-cVmTDMoCGMBFnmS39FidI4,4912
39
+ raijin_server-0.2.7.dist-info/licenses/LICENSE,sha256=kJsMCjOiRZE0AQNtxWqBa32z9kMAaF4EUxyHj3hKaJo,1105
40
+ raijin_server-0.2.7.dist-info/METADATA,sha256=YpgpUhp0TYGWYwEkKd8nDpCLY0MfyWsCWPq7D0zTrJQ,20362
41
+ raijin_server-0.2.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
42
+ raijin_server-0.2.7.dist-info/entry_points.txt,sha256=3ZvxDX4pvcjkIRsXAJ69wIfVmKa78LKo-C3QhqN2KVM,56
43
+ raijin_server-0.2.7.dist-info/top_level.txt,sha256=Yz1xneCRtsZOzbPIcTAcrSxd-1p80pohMXYAZ74dpok,14
44
+ raijin_server-0.2.7.dist-info/RECORD,,