raijin-server 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- raijin_server/__init__.py +1 -1
- raijin_server/cli.py +147 -2
- raijin_server/config.py +4 -4
- raijin_server/healthchecks.py +22 -0
- raijin_server/modules/calico.py +68 -3
- raijin_server/modules/cert_manager.py +33 -7
- raijin_server/modules/full_install.py +228 -1
- raijin_server/modules/kubernetes.py +18 -1
- raijin_server/modules/network.py +3 -3
- raijin_server/modules/prometheus.py +90 -5
- raijin_server/modules/sanitize.py +49 -1
- raijin_server/scripts/install.sh +3 -2
- raijin_server/scripts/log_size_metric.sh +17 -13
- raijin_server/scripts/pre-deploy-check.sh +2 -1
- raijin_server/utils.py +37 -1
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/METADATA +96 -79
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/RECORD +21 -21
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/WHEEL +0 -0
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/entry_points.txt +0 -0
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {raijin_server-0.2.6.dist-info → raijin_server-0.2.8.dist-info}/top_level.txt +0 -0
raijin_server/__init__.py
CHANGED
raijin_server/cli.py
CHANGED
|
@@ -6,6 +6,8 @@ import os
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Callable, Dict, Optional
|
|
8
8
|
|
|
9
|
+
import subprocess
|
|
10
|
+
|
|
9
11
|
import typer
|
|
10
12
|
from rich import box
|
|
11
13
|
from rich.console import Console
|
|
@@ -42,7 +44,7 @@ from raijin_server.modules import (
|
|
|
42
44
|
velero,
|
|
43
45
|
vpn,
|
|
44
46
|
)
|
|
45
|
-
from raijin_server.utils import ExecutionContext, logger
|
|
47
|
+
from raijin_server.utils import ExecutionContext, logger, active_log_file, available_log_files, page_text, ensure_tool
|
|
46
48
|
from raijin_server.validators import validate_system_requirements, check_module_dependencies
|
|
47
49
|
from raijin_server.healthchecks import run_health_check
|
|
48
50
|
from raijin_server.config import ConfigManager
|
|
@@ -131,6 +133,19 @@ MODULE_DESCRIPTIONS: Dict[str, str] = {
|
|
|
131
133
|
}
|
|
132
134
|
|
|
133
135
|
|
|
136
|
+
def _capture_cmd(cmd: list[str], timeout: int = 30) -> str:
|
|
137
|
+
try:
|
|
138
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
|
139
|
+
if result.returncode == 0:
|
|
140
|
+
return result.stdout.strip() or "(sem saida)"
|
|
141
|
+
return (
|
|
142
|
+
f"✗ {' '.join(cmd)}\n"
|
|
143
|
+
f"{(result.stdout or '').strip()}\n{(result.stderr or '').strip()}".strip()
|
|
144
|
+
)
|
|
145
|
+
except Exception as exc:
|
|
146
|
+
return f"✗ {' '.join(cmd)} -> {exc}"
|
|
147
|
+
|
|
148
|
+
|
|
134
149
|
def _run_module(ctx: typer.Context, name: str, skip_validation: bool = False) -> None:
|
|
135
150
|
handler = MODULES.get(name)
|
|
136
151
|
if handler is None:
|
|
@@ -542,6 +557,120 @@ def cert_list_issuers(ctx: typer.Context) -> None:
|
|
|
542
557
|
pass
|
|
543
558
|
|
|
544
559
|
|
|
560
|
+
# ============================================================================
|
|
561
|
+
# Ferramentas de Depuração / Logs
|
|
562
|
+
# ============================================================================
|
|
563
|
+
debug_app = typer.Typer(help="Ferramentas de depuracao e investigacao de logs")
|
|
564
|
+
app.add_typer(debug_app, name="debug")
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
@debug_app.command(name="logs")
|
|
568
|
+
def debug_logs(
|
|
569
|
+
lines: int = typer.Option(200, "--lines", "-n", help="Quantidade de linhas ao ler"),
|
|
570
|
+
follow: bool = typer.Option(False, "--follow", "-f", help="Segue o log com tail -F"),
|
|
571
|
+
pager: bool = typer.Option(True, "--pager/--no-pager", help="Exibe com less"),
|
|
572
|
+
) -> None:
|
|
573
|
+
"""Mostra logs do raijin-server com opcao de follow."""
|
|
574
|
+
|
|
575
|
+
logs = available_log_files()
|
|
576
|
+
if not logs:
|
|
577
|
+
typer.secho("Nenhum log encontrado", fg=typer.colors.YELLOW)
|
|
578
|
+
return
|
|
579
|
+
|
|
580
|
+
main_log = active_log_file()
|
|
581
|
+
typer.echo(f"Log ativo: {main_log}")
|
|
582
|
+
|
|
583
|
+
if follow:
|
|
584
|
+
subprocess.run(["tail", "-n", str(lines), "-F", str(main_log)])
|
|
585
|
+
return
|
|
586
|
+
|
|
587
|
+
chunks = []
|
|
588
|
+
for path in logs:
|
|
589
|
+
try:
|
|
590
|
+
data = path.read_text()
|
|
591
|
+
except Exception as exc:
|
|
592
|
+
data = f"[erro ao ler {path}: {exc}]"
|
|
593
|
+
chunks.append(f"===== {path} =====\n{data}")
|
|
594
|
+
|
|
595
|
+
output = "\n\n".join(chunks)
|
|
596
|
+
if pager:
|
|
597
|
+
page_text(output)
|
|
598
|
+
else:
|
|
599
|
+
typer.echo(output)
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
@debug_app.command(name="kube")
|
|
603
|
+
def debug_kube(
|
|
604
|
+
ctx: typer.Context,
|
|
605
|
+
events: int = typer.Option(200, "--events", "-e", help="Quantas linhas finais de eventos exibir"),
|
|
606
|
+
namespace: Optional[str] = typer.Option(None, "--namespace", "-n", help="Filtra pods/eventos por namespace"),
|
|
607
|
+
pager: bool = typer.Option(True, "--pager/--no-pager", help="Exibe com less"),
|
|
608
|
+
) -> None:
|
|
609
|
+
"""Snapshot rapido de nodes, pods e eventos do cluster."""
|
|
610
|
+
|
|
611
|
+
exec_ctx = ctx.obj or ExecutionContext()
|
|
612
|
+
ensure_tool("kubectl", exec_ctx)
|
|
613
|
+
|
|
614
|
+
sections = []
|
|
615
|
+
sections.append(("kubectl get nodes -o wide", _capture_cmd(["kubectl", "get", "nodes", "-o", "wide"])))
|
|
616
|
+
|
|
617
|
+
pods_cmd: list[str] = ["kubectl", "get", "pods"]
|
|
618
|
+
if namespace:
|
|
619
|
+
pods_cmd.extend(["-n", namespace])
|
|
620
|
+
else:
|
|
621
|
+
pods_cmd.append("-A")
|
|
622
|
+
pods_cmd.extend(["-o", "wide"])
|
|
623
|
+
sections.append(("kubectl get pods", _capture_cmd(pods_cmd)))
|
|
624
|
+
|
|
625
|
+
events_cmd: list[str] = ["kubectl", "get", "events"]
|
|
626
|
+
if namespace:
|
|
627
|
+
events_cmd.extend(["-n", namespace])
|
|
628
|
+
else:
|
|
629
|
+
events_cmd.append("-A")
|
|
630
|
+
events_cmd.extend(["--sort-by=.lastTimestamp"])
|
|
631
|
+
events_output = _capture_cmd(events_cmd)
|
|
632
|
+
if events_output and events > 0:
|
|
633
|
+
events_output = "\n".join(events_output.splitlines()[-events:])
|
|
634
|
+
sections.append(("kubectl get events", events_output))
|
|
635
|
+
|
|
636
|
+
combined = "\n\n".join([f"[{title}]\n{body}" for title, body in sections])
|
|
637
|
+
if pager:
|
|
638
|
+
page_text(combined)
|
|
639
|
+
else:
|
|
640
|
+
typer.echo(combined)
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
@debug_app.command(name="journal")
|
|
644
|
+
def debug_journal(
|
|
645
|
+
ctx: typer.Context,
|
|
646
|
+
service: str = typer.Option("kubelet", "--service", "-s", help="Unidade systemd para inspecionar"),
|
|
647
|
+
lines: int = typer.Option(200, "--lines", "-n", help="Linhas a exibir"),
|
|
648
|
+
follow: bool = typer.Option(False, "--follow", "-f", help="Segue o journal em tempo real"),
|
|
649
|
+
pager: bool = typer.Option(True, "--pager/--no-pager", help="Exibe com less"),
|
|
650
|
+
) -> None:
|
|
651
|
+
"""Mostra logs de services (ex.: kubelet) via journalctl."""
|
|
652
|
+
|
|
653
|
+
exec_ctx = ctx.obj or ExecutionContext()
|
|
654
|
+
ensure_tool("journalctl", exec_ctx)
|
|
655
|
+
|
|
656
|
+
cmd = ["journalctl", "-u", service, "-n", str(lines)]
|
|
657
|
+
if follow:
|
|
658
|
+
cmd.append("-f")
|
|
659
|
+
subprocess.run(cmd)
|
|
660
|
+
return
|
|
661
|
+
|
|
662
|
+
cmd.append("--no-pager")
|
|
663
|
+
output = _capture_cmd(cmd, timeout=60)
|
|
664
|
+
if lines > 0:
|
|
665
|
+
output = "\n".join(output.splitlines()[-lines:])
|
|
666
|
+
|
|
667
|
+
text = f"[journalctl -u {service} -n {lines}]\n{output}"
|
|
668
|
+
if pager:
|
|
669
|
+
page_text(text)
|
|
670
|
+
else:
|
|
671
|
+
typer.echo(text)
|
|
672
|
+
|
|
673
|
+
|
|
545
674
|
# ============================================================================
|
|
546
675
|
# Comandos Existentes
|
|
547
676
|
# ============================================================================
|
|
@@ -554,8 +683,24 @@ def bootstrap_cmd(ctx: typer.Context) -> None:
|
|
|
554
683
|
|
|
555
684
|
|
|
556
685
|
@app.command(name="full-install")
|
|
557
|
-
def full_install_cmd(
|
|
686
|
+
def full_install_cmd(
|
|
687
|
+
ctx: typer.Context,
|
|
688
|
+
steps: Optional[str] = typer.Option(None, "--steps", help="Lista de modulos, separado por virgula"),
|
|
689
|
+
confirm_each: bool = typer.Option(False, "--confirm-each", help="Pedir confirmacao antes de cada modulo"),
|
|
690
|
+
debug_mode: bool = typer.Option(False, "--debug-mode", help="Habilita snapshots e diagnose pos-modulo"),
|
|
691
|
+
snapshots: bool = typer.Option(False, "--snapshots", help="Habilita snapshots de cluster apos cada modulo"),
|
|
692
|
+
post_diagnose: bool = typer.Option(False, "--post-diagnose", help="Executa diagnose pos-modulo quando disponivel"),
|
|
693
|
+
select_steps: bool = typer.Option(False, "--select-steps", help="Pergunta quais modulos executar antes de iniciar"),
|
|
694
|
+
) -> None:
|
|
558
695
|
"""Executa instalacao completa e automatizada do ambiente de producao."""
|
|
696
|
+
exec_ctx = ctx.obj or ExecutionContext()
|
|
697
|
+
if steps:
|
|
698
|
+
exec_ctx.selected_steps = [s.strip() for s in steps.split(",") if s.strip()]
|
|
699
|
+
exec_ctx.interactive_steps = select_steps
|
|
700
|
+
exec_ctx.confirm_each_step = confirm_each
|
|
701
|
+
exec_ctx.debug_snapshots = debug_mode or snapshots or exec_ctx.debug_snapshots
|
|
702
|
+
exec_ctx.post_diagnose = debug_mode or post_diagnose or exec_ctx.post_diagnose
|
|
703
|
+
ctx.obj = exec_ctx
|
|
559
704
|
_run_module(ctx, "full_install")
|
|
560
705
|
|
|
561
706
|
|
raijin_server/config.py
CHANGED
|
@@ -78,15 +78,15 @@ class ConfigManager:
|
|
|
78
78
|
"modules": {
|
|
79
79
|
"network": {
|
|
80
80
|
"interface": "ens18",
|
|
81
|
-
"address": "192.168.
|
|
82
|
-
"gateway": "192.168.
|
|
83
|
-
"dns": "
|
|
81
|
+
"address": "192.168.1.81/24",
|
|
82
|
+
"gateway": "192.168.1.254",
|
|
83
|
+
"dns": "177.128.80.44,177.128.80.45",
|
|
84
84
|
},
|
|
85
85
|
"kubernetes": {
|
|
86
86
|
"pod_cidr": "10.244.0.0/16",
|
|
87
87
|
"service_cidr": "10.96.0.0/12",
|
|
88
88
|
"cluster_name": "raijin",
|
|
89
|
-
"advertise_address": "
|
|
89
|
+
"advertise_address": "192.168.1.81",
|
|
90
90
|
},
|
|
91
91
|
"calico": {
|
|
92
92
|
"pod_cidr": "10.244.0.0/16",
|
raijin_server/healthchecks.py
CHANGED
|
@@ -124,6 +124,21 @@ def check_k8s_pods_in_namespace(namespace: str, ctx: ExecutionContext, timeout:
|
|
|
124
124
|
)
|
|
125
125
|
|
|
126
126
|
|
|
127
|
+
def check_swap_disabled(ctx: ExecutionContext) -> tuple[bool, str]:
|
|
128
|
+
"""Confirma que nao ha swap ativa (requisito kubeadm/kubelet)."""
|
|
129
|
+
if ctx.dry_run:
|
|
130
|
+
return True, "dry-run"
|
|
131
|
+
try:
|
|
132
|
+
with open("/proc/swaps") as f:
|
|
133
|
+
lines = f.read().strip().splitlines()
|
|
134
|
+
# /proc/swaps tem header + linhas; se so header, swap esta off
|
|
135
|
+
if len(lines) <= 1:
|
|
136
|
+
return True, "swap desativada"
|
|
137
|
+
return False, "swap ativa (remova entradas do fstab e execute swapoff -a)"
|
|
138
|
+
except Exception as exc:
|
|
139
|
+
return False, f"falha ao verificar swap: {exc}"
|
|
140
|
+
|
|
141
|
+
|
|
127
142
|
def check_helm_release(release: str, namespace: str, ctx: ExecutionContext) -> Tuple[bool, str]:
|
|
128
143
|
"""Verifica status de um release Helm."""
|
|
129
144
|
if ctx.dry_run:
|
|
@@ -217,6 +232,13 @@ def verify_kubernetes(ctx: ExecutionContext) -> bool:
|
|
|
217
232
|
services = ["kubelet", "containerd"]
|
|
218
233
|
all_ok = True
|
|
219
234
|
|
|
235
|
+
swap_ok, swap_msg = check_swap_disabled(ctx)
|
|
236
|
+
if swap_ok:
|
|
237
|
+
typer.secho(f" ✓ Swap: {swap_msg}", fg=typer.colors.GREEN)
|
|
238
|
+
else:
|
|
239
|
+
typer.secho(f" ✗ Swap: {swap_msg}", fg=typer.colors.RED)
|
|
240
|
+
all_ok = False
|
|
241
|
+
|
|
220
242
|
for service in services:
|
|
221
243
|
ok, status = check_systemd_service(service, ctx)
|
|
222
244
|
if ok:
|
raijin_server/modules/calico.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Configuracao de Calico como CNI com CIDR customizado e policies opinativas."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
from pathlib import Path
|
|
4
|
-
from typing import Iterable
|
|
5
|
+
from typing import Iterable, List
|
|
5
6
|
|
|
6
7
|
import typer
|
|
7
8
|
|
|
@@ -16,6 +17,7 @@ from raijin_server.utils import (
|
|
|
16
17
|
|
|
17
18
|
EGRESS_LABEL_KEY = "networking.raijin.dev/egress"
|
|
18
19
|
EGRESS_LABEL_VALUE = "internet"
|
|
20
|
+
DEFAULT_WORKLOAD_NAMESPACE = "apps"
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
def _apply_policy(content: str, ctx: ExecutionContext, suffix: str) -> None:
|
|
@@ -25,6 +27,23 @@ def _apply_policy(content: str, ctx: ExecutionContext, suffix: str) -> None:
|
|
|
25
27
|
path.unlink(missing_ok=True)
|
|
26
28
|
|
|
27
29
|
|
|
30
|
+
def _ensure_namespace(namespace: str, ctx: ExecutionContext) -> None:
|
|
31
|
+
"""Garante que um namespace de workloads exista com labels padrao."""
|
|
32
|
+
manifest = f"""apiVersion: v1
|
|
33
|
+
kind: Namespace
|
|
34
|
+
metadata:
|
|
35
|
+
name: {namespace}
|
|
36
|
+
labels:
|
|
37
|
+
raijin/workload-profile: production
|
|
38
|
+
networking.raijin.dev/default-egress: restricted
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
path = Path(f"/tmp/raijin-ns-{namespace}.yaml")
|
|
42
|
+
write_file(path, manifest, ctx)
|
|
43
|
+
kubectl_apply(str(path), ctx)
|
|
44
|
+
path.unlink(missing_ok=True)
|
|
45
|
+
|
|
46
|
+
|
|
28
47
|
def _build_default_deny(namespace: str) -> str:
|
|
29
48
|
return f"""apiVersion: networking.k8s.io/v1
|
|
30
49
|
kind: NetworkPolicy
|
|
@@ -62,6 +81,43 @@ def _split_namespaces(raw_value: str) -> Iterable[str]:
|
|
|
62
81
|
return [ns.strip() for ns in raw_value.split(",") if ns.strip()]
|
|
63
82
|
|
|
64
83
|
|
|
84
|
+
def _list_workloads_without_egress(namespaces: List[str], ctx: ExecutionContext) -> None:
|
|
85
|
+
"""Lista workloads sem label de egress e apenas avisa se falhar."""
|
|
86
|
+
if ctx.dry_run:
|
|
87
|
+
typer.echo("[dry-run] Skip listagem de workloads para liberação de egress")
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
typer.secho("\nWorkloads sem liberação de egress (adicione label para liberar internet):", fg=typer.colors.CYAN)
|
|
91
|
+
for ns in namespaces:
|
|
92
|
+
result = run_cmd(
|
|
93
|
+
["kubectl", "get", "deploy,statefulset,daemonset", "-n", ns, "-o", "json"],
|
|
94
|
+
ctx,
|
|
95
|
+
check=False,
|
|
96
|
+
)
|
|
97
|
+
if result.returncode != 0:
|
|
98
|
+
msg = (result.stderr or result.stdout or "erro desconhecido").strip()
|
|
99
|
+
typer.secho(f" Aviso: nao foi possivel listar workloads em '{ns}' ({msg})", fg=typer.colors.YELLOW)
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
data = json.loads(result.stdout or "{}")
|
|
104
|
+
items = data.get("items", [])
|
|
105
|
+
pending = []
|
|
106
|
+
for item in items:
|
|
107
|
+
meta = item.get("metadata", {})
|
|
108
|
+
labels = meta.get("labels", {}) or {}
|
|
109
|
+
if labels.get(EGRESS_LABEL_KEY) != EGRESS_LABEL_VALUE:
|
|
110
|
+
pending.append(f"{meta.get('namespace', ns)}/{meta.get('name', 'desconhecido')}")
|
|
111
|
+
|
|
112
|
+
if pending:
|
|
113
|
+
for name in pending:
|
|
114
|
+
typer.echo(f" - {name}")
|
|
115
|
+
else:
|
|
116
|
+
typer.echo(f" Nenhum workload pendente em '{ns}'")
|
|
117
|
+
except Exception as exc:
|
|
118
|
+
typer.secho(f" Aviso: falha ao processar workloads em '{ns}': {exc}", fg=typer.colors.YELLOW)
|
|
119
|
+
|
|
120
|
+
|
|
65
121
|
def _check_cluster_available(ctx: ExecutionContext) -> bool:
|
|
66
122
|
"""Verifica se o cluster Kubernetes esta acessivel."""
|
|
67
123
|
if ctx.dry_run:
|
|
@@ -99,13 +155,19 @@ def run(ctx: ExecutionContext) -> None:
|
|
|
99
155
|
typer.echo("Aplicando Calico como CNI...")
|
|
100
156
|
pod_cidr = typer.prompt("Pod CIDR (Calico)", default="10.244.0.0/16")
|
|
101
157
|
|
|
158
|
+
typer.secho(
|
|
159
|
+
f"Criando namespace padrao de workloads '{DEFAULT_WORKLOAD_NAMESPACE}' (production-ready)...",
|
|
160
|
+
fg=typer.colors.CYAN,
|
|
161
|
+
)
|
|
162
|
+
_ensure_namespace(DEFAULT_WORKLOAD_NAMESPACE, ctx)
|
|
163
|
+
|
|
102
164
|
manifest_url = "https://raw.githubusercontent.com/projectcalico/calico/v3.27.2/manifests/calico.yaml"
|
|
103
165
|
cmd = f"curl -s {manifest_url} | sed 's#192.168.0.0/16#{pod_cidr}#' | kubectl apply -f -"
|
|
104
166
|
run_cmd(cmd, ctx, use_shell=True)
|
|
105
167
|
|
|
106
168
|
deny_namespaces_raw = typer.prompt(
|
|
107
169
|
"Namespaces para aplicar default-deny (CSV)",
|
|
108
|
-
default=
|
|
170
|
+
default=DEFAULT_WORKLOAD_NAMESPACE,
|
|
109
171
|
)
|
|
110
172
|
for namespace in _split_namespaces(deny_namespaces_raw):
|
|
111
173
|
typer.echo(f"Aplicando default-deny no namespace '{namespace}'...")
|
|
@@ -117,9 +179,12 @@ def run(ctx: ExecutionContext) -> None:
|
|
|
117
179
|
):
|
|
118
180
|
allow_namespaces_raw = typer.prompt(
|
|
119
181
|
"Namespaces com pods que precisam acessar APIs externas (CSV)",
|
|
120
|
-
default=
|
|
182
|
+
default=DEFAULT_WORKLOAD_NAMESPACE,
|
|
121
183
|
)
|
|
122
184
|
cidr = typer.prompt("CIDR liberado (ex.: 0.0.0.0/0)", default="0.0.0.0/0")
|
|
185
|
+
namespaces = list(_split_namespaces(allow_namespaces_raw))
|
|
186
|
+
if namespaces:
|
|
187
|
+
_list_workloads_without_egress(namespaces, ctx)
|
|
123
188
|
for namespace in _split_namespaces(allow_namespaces_raw):
|
|
124
189
|
typer.echo(
|
|
125
190
|
f"Criando policy allow-egress-internet em '{namespace}' para pods com "
|
|
@@ -18,6 +18,8 @@ from enum import Enum
|
|
|
18
18
|
from pathlib import Path
|
|
19
19
|
from typing import Callable, Optional, List
|
|
20
20
|
|
|
21
|
+
import os
|
|
22
|
+
|
|
21
23
|
import typer
|
|
22
24
|
|
|
23
25
|
from raijin_server.utils import (
|
|
@@ -34,11 +36,14 @@ CHART_REPO = "https://charts.jetstack.io"
|
|
|
34
36
|
CHART_NAME = "cert-manager"
|
|
35
37
|
NAMESPACE = "cert-manager"
|
|
36
38
|
MANIFEST_PATH = Path("/tmp/raijin-cert-manager-issuer.yaml")
|
|
39
|
+
HELM_DATA_DIR = Path("/tmp/raijin-helm")
|
|
40
|
+
HELM_REPO_CONFIG = HELM_DATA_DIR / "repositories.yaml"
|
|
41
|
+
HELM_REPO_CACHE = HELM_DATA_DIR / "cache"
|
|
37
42
|
|
|
38
|
-
# Timeouts
|
|
39
|
-
WEBHOOK_READY_TIMEOUT =
|
|
40
|
-
POD_READY_TIMEOUT =
|
|
41
|
-
CRD_READY_TIMEOUT =
|
|
43
|
+
# Timeouts enxutos (falha rápida em redes rápidas)
|
|
44
|
+
WEBHOOK_READY_TIMEOUT = 240 # 4 minutos
|
|
45
|
+
POD_READY_TIMEOUT = 180 # 3 minutos
|
|
46
|
+
CRD_READY_TIMEOUT = 120 # 2 minutos
|
|
42
47
|
|
|
43
48
|
|
|
44
49
|
class DNSProvider(str, Enum):
|
|
@@ -82,6 +87,17 @@ def _get_acme_server(staging: bool) -> str:
|
|
|
82
87
|
return "https://acme-v02.api.letsencrypt.org/directory"
|
|
83
88
|
|
|
84
89
|
|
|
90
|
+
def _helm_env() -> dict:
|
|
91
|
+
"""Garante diretórios de cache/config do Helm isolados em /tmp para evitar erros de permissão."""
|
|
92
|
+
HELM_DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
HELM_REPO_CACHE.mkdir(parents=True, exist_ok=True)
|
|
94
|
+
return {
|
|
95
|
+
**os.environ,
|
|
96
|
+
"HELM_REPOSITORY_CONFIG": str(HELM_REPO_CONFIG),
|
|
97
|
+
"HELM_REPOSITORY_CACHE": str(HELM_REPO_CACHE),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
85
101
|
# =============================================================================
|
|
86
102
|
# Builders de Manifests YAML
|
|
87
103
|
# =============================================================================
|
|
@@ -519,6 +535,7 @@ def _add_helm_repo(ctx: ExecutionContext) -> bool:
|
|
|
519
535
|
capture_output=True,
|
|
520
536
|
text=True,
|
|
521
537
|
timeout=60,
|
|
538
|
+
env=_helm_env(),
|
|
522
539
|
)
|
|
523
540
|
|
|
524
541
|
if result.returncode != 0:
|
|
@@ -539,6 +556,7 @@ def _add_helm_repo(ctx: ExecutionContext) -> bool:
|
|
|
539
556
|
capture_output=True,
|
|
540
557
|
text=True,
|
|
541
558
|
timeout=120,
|
|
559
|
+
env=_helm_env(),
|
|
542
560
|
)
|
|
543
561
|
|
|
544
562
|
elapsed_update = time.time() - start
|
|
@@ -562,8 +580,8 @@ def _add_helm_repo(ctx: ExecutionContext) -> bool:
|
|
|
562
580
|
return False
|
|
563
581
|
|
|
564
582
|
|
|
565
|
-
def _run_helm_install(ctx: ExecutionContext) -> bool:
|
|
566
|
-
"""Executa o helm upgrade --install."""
|
|
583
|
+
def _run_helm_install(ctx: ExecutionContext, attempt: int = 1) -> bool:
|
|
584
|
+
"""Executa o helm upgrade --install, com uma tentativa de retry para repo/config."""
|
|
567
585
|
if ctx.dry_run:
|
|
568
586
|
typer.echo(" [4/5] [dry-run] Executando helm upgrade --install...")
|
|
569
587
|
return True
|
|
@@ -574,6 +592,7 @@ def _run_helm_install(ctx: ExecutionContext) -> bool:
|
|
|
574
592
|
|
|
575
593
|
cmd = [
|
|
576
594
|
"helm", "upgrade", "--install", "cert-manager", "jetstack/cert-manager",
|
|
595
|
+
"--repo", CHART_REPO,
|
|
577
596
|
"-n", NAMESPACE,
|
|
578
597
|
"--create-namespace",
|
|
579
598
|
"--set", "installCRDs=true",
|
|
@@ -598,6 +617,7 @@ def _run_helm_install(ctx: ExecutionContext) -> bool:
|
|
|
598
617
|
stdout=subprocess.PIPE,
|
|
599
618
|
stderr=subprocess.STDOUT,
|
|
600
619
|
text=True,
|
|
620
|
+
env=_helm_env(),
|
|
601
621
|
)
|
|
602
622
|
|
|
603
623
|
output_lines = []
|
|
@@ -628,7 +648,13 @@ def _run_helm_install(ctx: ExecutionContext) -> bool:
|
|
|
628
648
|
output = "".join(output_lines[-20:]) # Últimas 20 linhas
|
|
629
649
|
logger.error(f"Helm install falhou (código {return_code}): {output}")
|
|
630
650
|
typer.secho(f" ✗ Helm install falhou (código {return_code})", fg=typer.colors.RED)
|
|
631
|
-
|
|
651
|
+
|
|
652
|
+
needs_repo_retry = "repo jetstack not found" in output.lower() or "repositories.yaml" in output.lower()
|
|
653
|
+
if needs_repo_retry and attempt == 1:
|
|
654
|
+
typer.echo(" → Reconfigurando repositório Helm e tentando novamente...")
|
|
655
|
+
if _add_helm_repo(ctx):
|
|
656
|
+
return _run_helm_install(ctx, attempt=2)
|
|
657
|
+
|
|
632
658
|
# Mostra as últimas linhas do erro
|
|
633
659
|
typer.echo("\n Últimas linhas do log:")
|
|
634
660
|
for line in output_lines[-10:]:
|