raijin-server 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
raijin_server/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """Pacote principal do CLI Raijin Server."""
2
2
 
3
- __version__ = "0.2.4"
3
+ __version__ = "0.2.7"
4
4
 
5
5
  __all__ = ["__version__"]
raijin_server/cli.py CHANGED
@@ -6,6 +6,8 @@ import os
6
6
  from pathlib import Path
7
7
  from typing import Callable, Dict, Optional
8
8
 
9
+ import subprocess
10
+
9
11
  import typer
10
12
  from rich import box
11
13
  from rich.console import Console
@@ -42,7 +44,7 @@ from raijin_server.modules import (
42
44
  velero,
43
45
  vpn,
44
46
  )
45
- from raijin_server.utils import ExecutionContext, logger
47
+ from raijin_server.utils import ExecutionContext, logger, active_log_file, available_log_files, page_text, ensure_tool
46
48
  from raijin_server.validators import validate_system_requirements, check_module_dependencies
47
49
  from raijin_server.healthchecks import run_health_check
48
50
  from raijin_server.config import ConfigManager
@@ -131,6 +133,19 @@ MODULE_DESCRIPTIONS: Dict[str, str] = {
131
133
  }
132
134
 
133
135
 
136
+ def _capture_cmd(cmd: list[str], timeout: int = 30) -> str:
137
+ try:
138
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
139
+ if result.returncode == 0:
140
+ return result.stdout.strip() or "(sem saida)"
141
+ return (
142
+ f"✗ {' '.join(cmd)}\n"
143
+ f"{(result.stdout or '').strip()}\n{(result.stderr or '').strip()}".strip()
144
+ )
145
+ except Exception as exc:
146
+ return f"✗ {' '.join(cmd)} -> {exc}"
147
+
148
+
134
149
  def _run_module(ctx: typer.Context, name: str, skip_validation: bool = False) -> None:
135
150
  handler = MODULES.get(name)
136
151
  if handler is None:
@@ -542,6 +557,120 @@ def cert_list_issuers(ctx: typer.Context) -> None:
542
557
  pass
543
558
 
544
559
 
560
+ # ============================================================================
561
+ # Ferramentas de Depuração / Logs
562
+ # ============================================================================
563
+ debug_app = typer.Typer(help="Ferramentas de depuracao e investigacao de logs")
564
+ app.add_typer(debug_app, name="debug")
565
+
566
+
567
+ @debug_app.command(name="logs")
568
+ def debug_logs(
569
+ lines: int = typer.Option(200, "--lines", "-n", help="Quantidade de linhas ao ler"),
570
+ follow: bool = typer.Option(False, "--follow", "-f", help="Segue o log com tail -F"),
571
+ pager: bool = typer.Option(True, "--pager/--no-pager", help="Exibe com less"),
572
+ ) -> None:
573
+ """Mostra logs do raijin-server com opcao de follow."""
574
+
575
+ logs = available_log_files()
576
+ if not logs:
577
+ typer.secho("Nenhum log encontrado", fg=typer.colors.YELLOW)
578
+ return
579
+
580
+ main_log = active_log_file()
581
+ typer.echo(f"Log ativo: {main_log}")
582
+
583
+ if follow:
584
+ subprocess.run(["tail", "-n", str(lines), "-F", str(main_log)])
585
+ return
586
+
587
+ chunks = []
588
+ for path in logs:
589
+ try:
590
+ data = path.read_text()
591
+ except Exception as exc:
592
+ data = f"[erro ao ler {path}: {exc}]"
593
+ chunks.append(f"===== {path} =====\n{data}")
594
+
595
+ output = "\n\n".join(chunks)
596
+ if pager:
597
+ page_text(output)
598
+ else:
599
+ typer.echo(output)
600
+
601
+
602
+ @debug_app.command(name="kube")
603
+ def debug_kube(
604
+ ctx: typer.Context,
605
+ events: int = typer.Option(200, "--events", "-e", help="Quantas linhas finais de eventos exibir"),
606
+ namespace: Optional[str] = typer.Option(None, "--namespace", "-n", help="Filtra pods/eventos por namespace"),
607
+ pager: bool = typer.Option(True, "--pager/--no-pager", help="Exibe com less"),
608
+ ) -> None:
609
+ """Snapshot rapido de nodes, pods e eventos do cluster."""
610
+
611
+ exec_ctx = ctx.obj or ExecutionContext()
612
+ ensure_tool("kubectl", exec_ctx)
613
+
614
+ sections = []
615
+ sections.append(("kubectl get nodes -o wide", _capture_cmd(["kubectl", "get", "nodes", "-o", "wide"])))
616
+
617
+ pods_cmd: list[str] = ["kubectl", "get", "pods"]
618
+ if namespace:
619
+ pods_cmd.extend(["-n", namespace])
620
+ else:
621
+ pods_cmd.append("-A")
622
+ pods_cmd.extend(["-o", "wide"])
623
+ sections.append(("kubectl get pods", _capture_cmd(pods_cmd)))
624
+
625
+ events_cmd: list[str] = ["kubectl", "get", "events"]
626
+ if namespace:
627
+ events_cmd.extend(["-n", namespace])
628
+ else:
629
+ events_cmd.append("-A")
630
+ events_cmd.extend(["--sort-by=.lastTimestamp"])
631
+ events_output = _capture_cmd(events_cmd)
632
+ if events_output and events > 0:
633
+ events_output = "\n".join(events_output.splitlines()[-events:])
634
+ sections.append(("kubectl get events", events_output))
635
+
636
+ combined = "\n\n".join([f"[{title}]\n{body}" for title, body in sections])
637
+ if pager:
638
+ page_text(combined)
639
+ else:
640
+ typer.echo(combined)
641
+
642
+
643
+ @debug_app.command(name="journal")
644
+ def debug_journal(
645
+ ctx: typer.Context,
646
+ service: str = typer.Option("kubelet", "--service", "-s", help="Unidade systemd para inspecionar"),
647
+ lines: int = typer.Option(200, "--lines", "-n", help="Linhas a exibir"),
648
+ follow: bool = typer.Option(False, "--follow", "-f", help="Segue o journal em tempo real"),
649
+ pager: bool = typer.Option(True, "--pager/--no-pager", help="Exibe com less"),
650
+ ) -> None:
651
+ """Mostra logs de services (ex.: kubelet) via journalctl."""
652
+
653
+ exec_ctx = ctx.obj or ExecutionContext()
654
+ ensure_tool("journalctl", exec_ctx)
655
+
656
+ cmd = ["journalctl", "-u", service, "-n", str(lines)]
657
+ if follow:
658
+ cmd.append("-f")
659
+ subprocess.run(cmd)
660
+ return
661
+
662
+ cmd.append("--no-pager")
663
+ output = _capture_cmd(cmd, timeout=60)
664
+ if lines > 0:
665
+ output = "\n".join(output.splitlines()[-lines:])
666
+
667
+ text = f"[journalctl -u {service} -n {lines}]\n{output}"
668
+ if pager:
669
+ page_text(text)
670
+ else:
671
+ typer.echo(text)
672
+
673
+
545
674
  # ============================================================================
546
675
  # Comandos Existentes
547
676
  # ============================================================================
@@ -554,8 +683,24 @@ def bootstrap_cmd(ctx: typer.Context) -> None:
554
683
 
555
684
 
556
685
  @app.command(name="full-install")
557
- def full_install_cmd(ctx: typer.Context) -> None:
686
+ def full_install_cmd(
687
+ ctx: typer.Context,
688
+ steps: Optional[str] = typer.Option(None, "--steps", help="Lista de modulos, separado por virgula"),
689
+ confirm_each: bool = typer.Option(False, "--confirm-each", help="Pedir confirmacao antes de cada modulo"),
690
+ debug_mode: bool = typer.Option(False, "--debug-mode", help="Habilita snapshots e diagnose pos-modulo"),
691
+ snapshots: bool = typer.Option(False, "--snapshots", help="Habilita snapshots de cluster apos cada modulo"),
692
+ post_diagnose: bool = typer.Option(False, "--post-diagnose", help="Executa diagnose pos-modulo quando disponivel"),
693
+ select_steps: bool = typer.Option(False, "--select-steps", help="Pergunta quais modulos executar antes de iniciar"),
694
+ ) -> None:
558
695
  """Executa instalacao completa e automatizada do ambiente de producao."""
696
+ exec_ctx = ctx.obj or ExecutionContext()
697
+ if steps:
698
+ exec_ctx.selected_steps = [s.strip() for s in steps.split(",") if s.strip()]
699
+ exec_ctx.interactive_steps = select_steps
700
+ exec_ctx.confirm_each_step = confirm_each
701
+ exec_ctx.debug_snapshots = debug_mode or snapshots or exec_ctx.debug_snapshots
702
+ exec_ctx.post_diagnose = debug_mode or post_diagnose or exec_ctx.post_diagnose
703
+ ctx.obj = exec_ctx
559
704
  _run_module(ctx, "full_install")
560
705
 
561
706
 
raijin_server/config.py CHANGED
@@ -78,15 +78,15 @@ class ConfigManager:
78
78
  "modules": {
79
79
  "network": {
80
80
  "interface": "ens18",
81
- "address": "192.168.0.10/24",
82
- "gateway": "192.168.0.1",
83
- "dns": "1.1.1.1,8.8.8.8",
81
+ "address": "192.168.1.81/24",
82
+ "gateway": "192.168.1.254",
83
+ "dns": "177.128.80.44,177.128.80.45",
84
84
  },
85
85
  "kubernetes": {
86
86
  "pod_cidr": "10.244.0.0/16",
87
87
  "service_cidr": "10.96.0.0/12",
88
88
  "cluster_name": "raijin",
89
- "advertise_address": "0.0.0.0",
89
+ "advertise_address": "192.168.1.81",
90
90
  },
91
91
  "calico": {
92
92
  "pod_cidr": "10.244.0.0/16",
@@ -124,6 +124,21 @@ def check_k8s_pods_in_namespace(namespace: str, ctx: ExecutionContext, timeout:
124
124
  )
125
125
 
126
126
 
127
+ def check_swap_disabled(ctx: ExecutionContext) -> tuple[bool, str]:
128
+ """Confirma que nao ha swap ativa (requisito kubeadm/kubelet)."""
129
+ if ctx.dry_run:
130
+ return True, "dry-run"
131
+ try:
132
+ with open("/proc/swaps") as f:
133
+ lines = f.read().strip().splitlines()
134
+ # /proc/swaps tem header + linhas; se so header, swap esta off
135
+ if len(lines) <= 1:
136
+ return True, "swap desativada"
137
+ return False, "swap ativa (remova entradas do fstab e execute swapoff -a)"
138
+ except Exception as exc:
139
+ return False, f"falha ao verificar swap: {exc}"
140
+
141
+
127
142
  def check_helm_release(release: str, namespace: str, ctx: ExecutionContext) -> Tuple[bool, str]:
128
143
  """Verifica status de um release Helm."""
129
144
  if ctx.dry_run:
@@ -217,6 +232,13 @@ def verify_kubernetes(ctx: ExecutionContext) -> bool:
217
232
  services = ["kubelet", "containerd"]
218
233
  all_ok = True
219
234
 
235
+ swap_ok, swap_msg = check_swap_disabled(ctx)
236
+ if swap_ok:
237
+ typer.secho(f" ✓ Swap: {swap_msg}", fg=typer.colors.GREEN)
238
+ else:
239
+ typer.secho(f" ✗ Swap: {swap_msg}", fg=typer.colors.RED)
240
+ all_ok = False
241
+
220
242
  for service in services:
221
243
  ok, status = check_systemd_service(service, ctx)
222
244
  if ok:
@@ -1,7 +1,8 @@
1
1
  """Configuracao de Calico como CNI com CIDR customizado e policies opinativas."""
2
2
 
3
+ import json
3
4
  from pathlib import Path
4
- from typing import Iterable
5
+ from typing import Iterable, List
5
6
 
6
7
  import typer
7
8
 
@@ -16,6 +17,7 @@ from raijin_server.utils import (
16
17
 
17
18
  EGRESS_LABEL_KEY = "networking.raijin.dev/egress"
18
19
  EGRESS_LABEL_VALUE = "internet"
20
+ DEFAULT_WORKLOAD_NAMESPACE = "apps"
19
21
 
20
22
 
21
23
  def _apply_policy(content: str, ctx: ExecutionContext, suffix: str) -> None:
@@ -25,6 +27,23 @@ def _apply_policy(content: str, ctx: ExecutionContext, suffix: str) -> None:
25
27
  path.unlink(missing_ok=True)
26
28
 
27
29
 
30
+ def _ensure_namespace(namespace: str, ctx: ExecutionContext) -> None:
31
+ """Garante que um namespace de workloads exista com labels padrao."""
32
+ manifest = f"""apiVersion: v1
33
+ kind: Namespace
34
+ metadata:
35
+ name: {namespace}
36
+ labels:
37
+ raijin/workload-profile: production
38
+ networking.raijin.dev/default-egress: restricted
39
+ """
40
+
41
+ path = Path(f"/tmp/raijin-ns-{namespace}.yaml")
42
+ write_file(path, manifest, ctx)
43
+ kubectl_apply(str(path), ctx)
44
+ path.unlink(missing_ok=True)
45
+
46
+
28
47
  def _build_default_deny(namespace: str) -> str:
29
48
  return f"""apiVersion: networking.k8s.io/v1
30
49
  kind: NetworkPolicy
@@ -62,6 +81,43 @@ def _split_namespaces(raw_value: str) -> Iterable[str]:
62
81
  return [ns.strip() for ns in raw_value.split(",") if ns.strip()]
63
82
 
64
83
 
84
+ def _list_workloads_without_egress(namespaces: List[str], ctx: ExecutionContext) -> None:
85
+ """Lista workloads sem label de egress e apenas avisa se falhar."""
86
+ if ctx.dry_run:
87
+ typer.echo("[dry-run] Skip listagem de workloads para liberação de egress")
88
+ return
89
+
90
+ typer.secho("\nWorkloads sem liberação de egress (adicione label para liberar internet):", fg=typer.colors.CYAN)
91
+ for ns in namespaces:
92
+ result = run_cmd(
93
+ ["kubectl", "get", "deploy,statefulset,daemonset", "-n", ns, "-o", "json"],
94
+ ctx,
95
+ check=False,
96
+ )
97
+ if result.returncode != 0:
98
+ msg = (result.stderr or result.stdout or "erro desconhecido").strip()
99
+ typer.secho(f" Aviso: nao foi possivel listar workloads em '{ns}' ({msg})", fg=typer.colors.YELLOW)
100
+ continue
101
+
102
+ try:
103
+ data = json.loads(result.stdout or "{}")
104
+ items = data.get("items", [])
105
+ pending = []
106
+ for item in items:
107
+ meta = item.get("metadata", {})
108
+ labels = meta.get("labels", {}) or {}
109
+ if labels.get(EGRESS_LABEL_KEY) != EGRESS_LABEL_VALUE:
110
+ pending.append(f"{meta.get('namespace', ns)}/{meta.get('name', 'desconhecido')}")
111
+
112
+ if pending:
113
+ for name in pending:
114
+ typer.echo(f" - {name}")
115
+ else:
116
+ typer.echo(f" Nenhum workload pendente em '{ns}'")
117
+ except Exception as exc:
118
+ typer.secho(f" Aviso: falha ao processar workloads em '{ns}': {exc}", fg=typer.colors.YELLOW)
119
+
120
+
65
121
  def _check_cluster_available(ctx: ExecutionContext) -> bool:
66
122
  """Verifica se o cluster Kubernetes esta acessivel."""
67
123
  if ctx.dry_run:
@@ -99,13 +155,19 @@ def run(ctx: ExecutionContext) -> None:
99
155
  typer.echo("Aplicando Calico como CNI...")
100
156
  pod_cidr = typer.prompt("Pod CIDR (Calico)", default="10.244.0.0/16")
101
157
 
158
+ typer.secho(
159
+ f"Criando namespace padrao de workloads '{DEFAULT_WORKLOAD_NAMESPACE}' (production-ready)...",
160
+ fg=typer.colors.CYAN,
161
+ )
162
+ _ensure_namespace(DEFAULT_WORKLOAD_NAMESPACE, ctx)
163
+
102
164
  manifest_url = "https://raw.githubusercontent.com/projectcalico/calico/v3.27.2/manifests/calico.yaml"
103
165
  cmd = f"curl -s {manifest_url} | sed 's#192.168.0.0/16#{pod_cidr}#' | kubectl apply -f -"
104
166
  run_cmd(cmd, ctx, use_shell=True)
105
167
 
106
168
  deny_namespaces_raw = typer.prompt(
107
169
  "Namespaces para aplicar default-deny (CSV)",
108
- default="default",
170
+ default=DEFAULT_WORKLOAD_NAMESPACE,
109
171
  )
110
172
  for namespace in _split_namespaces(deny_namespaces_raw):
111
173
  typer.echo(f"Aplicando default-deny no namespace '{namespace}'...")
@@ -117,9 +179,12 @@ def run(ctx: ExecutionContext) -> None:
117
179
  ):
118
180
  allow_namespaces_raw = typer.prompt(
119
181
  "Namespaces com pods que precisam acessar APIs externas (CSV)",
120
- default="default",
182
+ default=DEFAULT_WORKLOAD_NAMESPACE,
121
183
  )
122
184
  cidr = typer.prompt("CIDR liberado (ex.: 0.0.0.0/0)", default="0.0.0.0/0")
185
+ namespaces = list(_split_namespaces(allow_namespaces_raw))
186
+ if namespaces:
187
+ _list_workloads_without_egress(namespaces, ctx)
123
188
  for namespace in _split_namespaces(allow_namespaces_raw):
124
189
  typer.echo(
125
190
  f"Criando policy allow-egress-internet em '{namespace}' para pods com "
@@ -18,6 +18,8 @@ from enum import Enum
18
18
  from pathlib import Path
19
19
  from typing import Callable, Optional, List
20
20
 
21
+ import os
22
+
21
23
  import typer
22
24
 
23
25
  from raijin_server.utils import (
@@ -34,11 +36,14 @@ CHART_REPO = "https://charts.jetstack.io"
34
36
  CHART_NAME = "cert-manager"
35
37
  NAMESPACE = "cert-manager"
36
38
  MANIFEST_PATH = Path("/tmp/raijin-cert-manager-issuer.yaml")
39
+ HELM_DATA_DIR = Path("/tmp/raijin-helm")
40
+ HELM_REPO_CONFIG = HELM_DATA_DIR / "repositories.yaml"
41
+ HELM_REPO_CACHE = HELM_DATA_DIR / "cache"
37
42
 
38
- # Timeouts mais generosos para ambientes lentos
39
- WEBHOOK_READY_TIMEOUT = 600 # 10 minutos
40
- POD_READY_TIMEOUT = 300 # 5 minutos
41
- CRD_READY_TIMEOUT = 180 # 3 minutos
43
+ # Timeouts enxutos (falha rápida em redes rápidas)
44
+ WEBHOOK_READY_TIMEOUT = 240 # 4 minutos
45
+ POD_READY_TIMEOUT = 180 # 3 minutos
46
+ CRD_READY_TIMEOUT = 120 # 2 minutos
42
47
 
43
48
 
44
49
  class DNSProvider(str, Enum):
@@ -82,6 +87,17 @@ def _get_acme_server(staging: bool) -> str:
82
87
  return "https://acme-v02.api.letsencrypt.org/directory"
83
88
 
84
89
 
90
+ def _helm_env() -> dict:
91
+ """Garante diretórios de cache/config do Helm isolados em /tmp para evitar erros de permissão."""
92
+ HELM_DATA_DIR.mkdir(parents=True, exist_ok=True)
93
+ HELM_REPO_CACHE.mkdir(parents=True, exist_ok=True)
94
+ return {
95
+ **os.environ,
96
+ "HELM_REPOSITORY_CONFIG": str(HELM_REPO_CONFIG),
97
+ "HELM_REPOSITORY_CACHE": str(HELM_REPO_CACHE),
98
+ }
99
+
100
+
85
101
  # =============================================================================
86
102
  # Builders de Manifests YAML
87
103
  # =============================================================================
@@ -519,6 +535,7 @@ def _add_helm_repo(ctx: ExecutionContext) -> bool:
519
535
  capture_output=True,
520
536
  text=True,
521
537
  timeout=60,
538
+ env=_helm_env(),
522
539
  )
523
540
 
524
541
  if result.returncode != 0:
@@ -539,6 +556,7 @@ def _add_helm_repo(ctx: ExecutionContext) -> bool:
539
556
  capture_output=True,
540
557
  text=True,
541
558
  timeout=120,
559
+ env=_helm_env(),
542
560
  )
543
561
 
544
562
  elapsed_update = time.time() - start
@@ -562,8 +580,8 @@ def _add_helm_repo(ctx: ExecutionContext) -> bool:
562
580
  return False
563
581
 
564
582
 
565
- def _run_helm_install(ctx: ExecutionContext) -> bool:
566
- """Executa o helm upgrade --install."""
583
+ def _run_helm_install(ctx: ExecutionContext, attempt: int = 1) -> bool:
584
+ """Executa o helm upgrade --install, com uma tentativa de retry para repo/config."""
567
585
  if ctx.dry_run:
568
586
  typer.echo(" [4/5] [dry-run] Executando helm upgrade --install...")
569
587
  return True
@@ -574,6 +592,7 @@ def _run_helm_install(ctx: ExecutionContext) -> bool:
574
592
 
575
593
  cmd = [
576
594
  "helm", "upgrade", "--install", "cert-manager", "jetstack/cert-manager",
595
+ "--repo", CHART_REPO,
577
596
  "-n", NAMESPACE,
578
597
  "--create-namespace",
579
598
  "--set", "installCRDs=true",
@@ -598,6 +617,7 @@ def _run_helm_install(ctx: ExecutionContext) -> bool:
598
617
  stdout=subprocess.PIPE,
599
618
  stderr=subprocess.STDOUT,
600
619
  text=True,
620
+ env=_helm_env(),
601
621
  )
602
622
 
603
623
  output_lines = []
@@ -628,7 +648,13 @@ def _run_helm_install(ctx: ExecutionContext) -> bool:
628
648
  output = "".join(output_lines[-20:]) # Últimas 20 linhas
629
649
  logger.error(f"Helm install falhou (código {return_code}): {output}")
630
650
  typer.secho(f" ✗ Helm install falhou (código {return_code})", fg=typer.colors.RED)
631
-
651
+
652
+ needs_repo_retry = "repo jetstack not found" in output.lower() or "repositories.yaml" in output.lower()
653
+ if needs_repo_retry and attempt == 1:
654
+ typer.echo(" → Reconfigurando repositório Helm e tentando novamente...")
655
+ if _add_helm_repo(ctx):
656
+ return _run_helm_install(ctx, attempt=2)
657
+
632
658
  # Mostra as últimas linhas do erro
633
659
  typer.echo("\n Últimas linhas do log:")
634
660
  for line in output_lines[-10:]: