raijin-server 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,13 @@
1
1
  """Instalacao completa e automatizada do ambiente produtivo."""
2
2
 
3
3
  import os
4
+ import subprocess
5
+ from typing import List
4
6
 
5
7
  import typer
6
8
 
7
9
  from raijin_server.utils import ExecutionContext, require_root
10
+ from raijin_server.healthchecks import run_health_check
8
11
  from raijin_server.modules import (
9
12
  bootstrap,
10
13
  calico,
@@ -68,6 +71,196 @@ def _cert_manager_install_only(ctx: ExecutionContext) -> None:
68
71
  )
69
72
 
70
73
 
74
+ def _confirm_colored(message: str, default: bool = True) -> bool:
75
+ """Confirmação com destaque visual."""
76
+ styled = typer.style(message, fg=typer.colors.YELLOW, bold=True)
77
+ return typer.confirm(styled, default=default)
78
+
79
+
80
+ def _select_steps_interactively() -> List[str] | None:
81
+ typer.secho("Selecione passos (separados por vírgula) ou ENTER para todos:", fg=typer.colors.CYAN)
82
+ typer.echo("Exemplo: kubernetes,calico,cert_manager,traefik")
83
+ answer = typer.prompt("Passos", default="").strip()
84
+ if not answer:
85
+ return None
86
+ steps = [s.strip() for s in answer.split(",") if s.strip()]
87
+ return steps or None
88
+
89
+
90
+ def _kube_snapshot(ctx: ExecutionContext, events: int = 100, namespace: str | None = None) -> None:
91
+ """Coleta snapshot rápido de cluster para debug (best-effort)."""
92
+ cmds = []
93
+ cmds.append(["kubectl", "get", "nodes", "-o", "wide"])
94
+
95
+ pods_cmd = ["kubectl", "get", "pods"]
96
+ if namespace:
97
+ pods_cmd += ["-n", namespace]
98
+ else:
99
+ pods_cmd.append("-A")
100
+ pods_cmd += ["-o", "wide"]
101
+ cmds.append(pods_cmd)
102
+
103
+ events_cmd = ["kubectl", "get", "events"]
104
+ if namespace:
105
+ events_cmd += ["-n", namespace]
106
+ else:
107
+ events_cmd.append("-A")
108
+ events_cmd += ["--sort-by=.lastTimestamp"]
109
+ cmds.append(events_cmd)
110
+
111
+ typer.secho("\n[DEBUG] Snapshot do cluster", fg=typer.colors.CYAN)
112
+ for cmd in cmds:
113
+ try:
114
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
115
+ typer.echo(f"$ {' '.join(cmd)}")
116
+ if result.stdout:
117
+ lines = result.stdout.strip().splitlines()
118
+ if cmd is events_cmd:
119
+ lines = lines[-events:]
120
+ typer.echo("\n".join(lines))
121
+ elif result.stderr:
122
+ typer.echo(result.stderr.strip())
123
+ except Exception as exc:
124
+ typer.secho(f"(snapshot falhou: {exc})", fg=typer.colors.YELLOW)
125
+
126
+
127
+ def _run_cmd(title: str, cmd: List[str], ctx: ExecutionContext, tail: int | None = None) -> None:
128
+ """Executa comando kubectl/helm best-effort para diagnosticos rapidos."""
129
+ typer.secho(f"\n[diagnose] {title}", fg=typer.colors.CYAN)
130
+ if ctx.dry_run:
131
+ typer.echo("[dry-run] comando nao executado")
132
+ return
133
+
134
+ try:
135
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=40)
136
+ typer.echo(f"$ {' '.join(cmd)}")
137
+ output = result.stdout.strip() or result.stderr.strip()
138
+ if output:
139
+ lines = output.splitlines()
140
+ if tail:
141
+ lines = lines[-tail:]
142
+ typer.echo("\n".join(lines))
143
+ else:
144
+ typer.echo("(sem saida)")
145
+ except Exception as exc:
146
+ typer.secho(f"(falha ao executar: {exc})", fg=typer.colors.YELLOW)
147
+
148
+
149
+ def _diag_namespace(ns: str, ctx: ExecutionContext, tail_events: int = 50) -> None:
150
+ _run_cmd(f"Pods em {ns}", ["kubectl", "get", "pods", "-n", ns, "-o", "wide"], ctx)
151
+ _run_cmd(f"Services em {ns}", ["kubectl", "get", "svc", "-n", ns], ctx)
152
+ _run_cmd(f"Deployments em {ns}", ["kubectl", "get", "deploy", "-n", ns], ctx)
153
+ _run_cmd(
154
+ f"Eventos em {ns}",
155
+ ["kubectl", "get", "events", "-n", ns, "--sort-by=.lastTimestamp"],
156
+ ctx,
157
+ tail=tail_events,
158
+ )
159
+
160
+
161
+ def _diag_calico(ctx: ExecutionContext) -> None:
162
+ ns = "kube-system"
163
+ _run_cmd("Calico DaemonSets", ["kubectl", "get", "ds", "-n", ns, "-o", "wide"], ctx)
164
+ _run_cmd("Calico pods", ["kubectl", "get", "pods", "-n", ns, "-l", "k8s-app=calico-node", "-o", "wide"], ctx)
165
+ _run_cmd("Calico typha", ["kubectl", "get", "pods", "-n", ns, "-l", "k8s-app=calico-typha", "-o", "wide"], ctx)
166
+ _run_cmd("Calico events", ["kubectl", "get", "events", "-n", ns, "--sort-by=.lastTimestamp"], ctx, tail=50)
167
+
168
+
169
+ def _diag_secrets(ctx: ExecutionContext) -> None:
170
+ _diag_namespace("kube-system", ctx)
171
+ _diag_namespace("external-secrets", ctx)
172
+
173
+
174
+ def _diag_prometheus(ctx: ExecutionContext) -> None:
175
+ ns = "observability"
176
+ _run_cmd("Prometheus pods", ["kubectl", "get", "pods", "-n", ns, "-l", "app.kubernetes.io/name=prometheus"], ctx)
177
+ _diag_namespace(ns, ctx)
178
+
179
+
180
+ def _diag_grafana(ctx: ExecutionContext) -> None:
181
+ ns = "observability"
182
+ _run_cmd("Grafana svc", ["kubectl", "get", "svc", "-n", ns, "-l", "app.kubernetes.io/name=grafana"], ctx)
183
+ _diag_namespace(ns, ctx)
184
+
185
+
186
+ def _diag_loki(ctx: ExecutionContext) -> None:
187
+ ns = "observability"
188
+ _run_cmd("Loki statefulsets", ["kubectl", "get", "sts", "-n", ns, "-l", "app.kubernetes.io/name=loki"], ctx)
189
+ _diag_namespace(ns, ctx)
190
+
191
+
192
+ def _diag_traefik(ctx: ExecutionContext) -> None:
193
+ ns = "traefik"
194
+ _run_cmd("Traefik ingress", ["kubectl", "get", "ingress", "-n", ns], ctx)
195
+ _diag_namespace(ns, ctx)
196
+
197
+
198
+ def _diag_observability_ingress(ctx: ExecutionContext) -> None:
199
+ ns = "observability"
200
+ _run_cmd("Ingress objects", ["kubectl", "get", "ingress", "-n", ns], ctx)
201
+ _diag_namespace(ns, ctx)
202
+
203
+
204
+ def _diag_observability_dashboards(ctx: ExecutionContext) -> None:
205
+ ns = "observability"
206
+ _run_cmd("ConfigMaps dashboards", ["kubectl", "get", "configmap", "-n", ns, "-l", "raijin/dashboards=true"], ctx)
207
+ _diag_namespace(ns, ctx)
208
+
209
+
210
+ def _diag_minio(ctx: ExecutionContext) -> None:
211
+ ns = "minio"
212
+ _diag_namespace(ns, ctx)
213
+
214
+
215
+ def _diag_kafka(ctx: ExecutionContext) -> None:
216
+ ns = "kafka"
217
+ _run_cmd("Kafka pods", ["kubectl", "get", "pods", "-n", ns, "-o", "wide"], ctx)
218
+ _diag_namespace(ns, ctx)
219
+
220
+
221
+ def _diag_velero(ctx: ExecutionContext) -> None:
222
+ ns = "velero"
223
+ _diag_namespace(ns, ctx)
224
+
225
+
226
+ def _diag_kong(ctx: ExecutionContext) -> None:
227
+ ns = "kong"
228
+ _diag_namespace(ns, ctx)
229
+
230
+
231
+ DIAG_HANDLERS = {
232
+ "cert_manager": cert_manager.diagnose,
233
+ "calico": _diag_calico,
234
+ "secrets": _diag_secrets,
235
+ "prometheus": _diag_prometheus,
236
+ "grafana": _diag_grafana,
237
+ "loki": _diag_loki,
238
+ "traefik": _diag_traefik,
239
+ "observability_ingress": _diag_observability_ingress,
240
+ "observability_dashboards": _diag_observability_dashboards,
241
+ "minio": _diag_minio,
242
+ "kafka": _diag_kafka,
243
+ "velero": _diag_velero,
244
+ "kong": _diag_kong,
245
+ }
246
+
247
+
248
+ def _maybe_diagnose(name: str, ctx: ExecutionContext) -> None:
249
+ try:
250
+ if name in DIAG_HANDLERS:
251
+ DIAG_HANDLERS[name](ctx)
252
+ return
253
+
254
+ # fallback: health check se existir
255
+ ok = run_health_check(name, ctx)
256
+ if ok:
257
+ typer.secho(f"[diagnose] {name}: OK", fg=typer.colors.GREEN)
258
+ else:
259
+ typer.secho(f"[diagnose] {name}: falhou", fg=typer.colors.YELLOW)
260
+ except Exception as exc:
261
+ typer.secho(f"[diagnose] {name} falhou: {exc}", fg=typer.colors.YELLOW)
262
+
263
+
71
264
  # Ordem de execucao dos modulos para instalacao completa
72
265
  # Modulos marcados com skip_env podem ser pulados via variavel de ambiente
73
266
  INSTALL_SEQUENCE = [
@@ -108,12 +301,25 @@ def run(ctx: ExecutionContext) -> None:
108
301
  fg=typer.colors.CYAN,
109
302
  )
110
303
 
304
+ steps_override = ctx.selected_steps
305
+ if steps_override is None and ctx.interactive_steps:
306
+ steps_override = _select_steps_interactively()
307
+
308
+ # Debug/diagnose menu simples
309
+ if not ctx.debug_snapshots and not ctx.post_diagnose:
310
+ typer.secho("Ativar modo debug (snapshots + diagnose pos-modulo)?", fg=typer.colors.YELLOW)
311
+ if typer.confirm("Habilitar debug?", default=False):
312
+ ctx.debug_snapshots = True
313
+ ctx.post_diagnose = True
314
+
111
315
  # Mostra sequencia de instalacao
112
316
  typer.echo("Sequencia de instalacao:")
113
317
  for i, (name, _, desc, skip_env) in enumerate(INSTALL_SEQUENCE, 1):
114
318
  suffix = ""
115
319
  if skip_env and os.environ.get(skip_env, "").strip() in ("1", "true", "yes"):
116
320
  suffix = " [SKIP]"
321
+ if steps_override and name not in steps_override:
322
+ suffix = " [IGNORADO]"
117
323
  typer.echo(f" {i:2}. {name:25} - {desc}{suffix}")
118
324
 
119
325
  typer.echo("")
@@ -126,7 +332,7 @@ def run(ctx: ExecutionContext) -> None:
126
332
  typer.echo("")
127
333
 
128
334
  if not ctx.dry_run:
129
- if not typer.confirm("Deseja continuar com a instalacao completa?", default=True):
335
+ if not _confirm_colored("Deseja continuar com a instalacao completa?", default=True):
130
336
  typer.echo("Instalacao cancelada.")
131
337
  raise typer.Exit(code=0)
132
338
 
@@ -135,13 +341,25 @@ def run(ctx: ExecutionContext) -> None:
135
341
  succeeded = []
136
342
  skipped = []
137
343
 
344
+ cluster_ready = False
345
+
138
346
  for i, (name, handler, desc, skip_env) in enumerate(INSTALL_SEQUENCE, 1):
347
+ if steps_override and name not in steps_override:
348
+ skipped.append(name)
349
+ typer.secho(f"⏭ {name} ignorado (fora da lista selecionada)", fg=typer.colors.YELLOW)
350
+ continue
351
+
139
352
  # Verifica se modulo deve ser pulado via env
140
353
  if skip_env and os.environ.get(skip_env, "").strip() in ("1", "true", "yes"):
141
354
  skipped.append(name)
142
355
  typer.secho(f"⏭ {name} pulado via {skip_env}=1", fg=typer.colors.YELLOW)
143
356
  continue
144
357
 
358
+ if ctx.confirm_each_step:
359
+ if not _confirm_colored(f"Executar modulo '{name}' agora?", default=True):
360
+ skipped.append(name)
361
+ continue
362
+
145
363
  typer.secho(
146
364
  f"\n{'='*60}",
147
365
  fg=typer.colors.CYAN,
@@ -160,6 +378,15 @@ def run(ctx: ExecutionContext) -> None:
160
378
  handler(ctx)
161
379
  succeeded.append(name)
162
380
  typer.secho(f"✓ {name} concluido com sucesso", fg=typer.colors.GREEN)
381
+
382
+ if name == "kubernetes":
383
+ cluster_ready = True
384
+
385
+ if ctx.post_diagnose and cluster_ready:
386
+ _maybe_diagnose(name, ctx)
387
+
388
+ if ctx.debug_snapshots and cluster_ready:
389
+ _kube_snapshot(ctx, events=80)
163
390
  except KeyboardInterrupt:
164
391
  typer.secho(f"\n⚠ Instalacao interrompida pelo usuario no modulo '{name}'", fg=typer.colors.YELLOW)
165
392
  raise typer.Exit(code=130)
@@ -1,30 +1,115 @@
1
- """Configuracao do Prometheus Stack via Helm."""
1
+ """Configuracao do Prometheus Stack via Helm (robust, production-ready)."""
2
+
3
+ from __future__ import annotations
2
4
 
3
5
  import typer
4
6
 
5
- from raijin_server.utils import ExecutionContext, helm_upgrade_install, require_root
7
+ from raijin_server.utils import (
8
+ ExecutionContext,
9
+ helm_upgrade_install,
10
+ kubectl_create_ns,
11
+ require_root,
12
+ run_cmd,
13
+ )
14
+
15
+ DEFAULT_NAMESPACE = "observability"
16
+
17
+
18
+ def _get_default_storage_class(ctx: ExecutionContext) -> str:
19
+ if ctx.dry_run:
20
+ return ""
21
+ result = run_cmd(
22
+ [
23
+ "kubectl",
24
+ "get",
25
+ "storageclass",
26
+ "-o",
27
+ "jsonpath={.items[?(@.metadata.annotations['storageclass.kubernetes.io/is-default-class']=='true')].metadata.name}",
28
+ ],
29
+ ctx,
30
+ check=False,
31
+ )
32
+ return (result.stdout or "").strip()
33
+
34
+
35
+ def _ensure_cluster_access(ctx: ExecutionContext) -> None:
36
+ if ctx.dry_run:
37
+ return
38
+ result = run_cmd(["kubectl", "cluster-info"], ctx, check=False)
39
+ if result.returncode != 0:
40
+ typer.secho("Cluster Kubernetes nao acessivel. Verifique kubeconfig/controle-plane.", fg=typer.colors.RED)
41
+ raise typer.Exit(code=1)
6
42
 
7
43
 
8
44
  def run(ctx: ExecutionContext) -> None:
9
45
  require_root(ctx)
46
+ _ensure_cluster_access(ctx)
47
+
10
48
  typer.echo("Instalando kube-prometheus-stack via Helm...")
11
49
 
50
+ namespace = typer.prompt("Namespace destino", default=DEFAULT_NAMESPACE)
51
+ kubectl_create_ns(namespace, ctx)
52
+
53
+ default_sc = _get_default_storage_class(ctx)
54
+ enable_persistence = typer.confirm(
55
+ "Habilitar PVC para Prometheus e Alertmanager?", default=bool(default_sc)
56
+ )
57
+
12
58
  values = [
13
59
  "grafana.enabled=false",
14
60
  "prometheus.prometheusSpec.retention=15d",
15
61
  "prometheus.prometheusSpec.enableAdminAPI=true",
16
62
  "prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false",
17
- "prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage=20Gi",
18
- "alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.resources.requests.storage=10Gi",
63
+ "prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false",
19
64
  "defaultRules.create=true",
20
65
  ]
21
66
 
67
+ extra_args = ["--wait", "--timeout", "5m", "--atomic"]
68
+
69
+ chart_version = typer.prompt(
70
+ "Versao do chart (vazio para latest)",
71
+ default="",
72
+ ).strip()
73
+ if chart_version:
74
+ extra_args.extend(["--version", chart_version])
75
+
76
+ if enable_persistence:
77
+ storage_class = typer.prompt(
78
+ "StorageClass para PVC",
79
+ default=default_sc or "",
80
+ ).strip()
81
+ prom_size = typer.prompt("Tamanho PVC Prometheus", default="20Gi")
82
+ alert_size = typer.prompt("Tamanho PVC Alertmanager", default="10Gi")
83
+
84
+ if storage_class:
85
+ values.extend(
86
+ [
87
+ f"prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.storageClassName={storage_class}",
88
+ f"alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.storageClassName={storage_class}",
89
+ ]
90
+ )
91
+
92
+ values.extend(
93
+ [
94
+ f"prometheus.prometheusSpec.storageSpec.volumeClaimTemplate.spec.resources.requests.storage={prom_size}",
95
+ f"alertmanager.alertmanagerSpec.storage.volumeClaimTemplate.spec.resources.requests.storage={alert_size}",
96
+ ]
97
+ )
98
+ else:
99
+ typer.secho(
100
+ "PVC desativado: Prometheus/Alertmanager usarao volumes efemeros (sem retenção apos restart).",
101
+ fg=typer.colors.YELLOW,
102
+ )
103
+
22
104
  helm_upgrade_install(
23
105
  release="kube-prometheus-stack",
24
106
  chart="kube-prometheus-stack",
25
- namespace="observability",
107
+ namespace=namespace,
26
108
  repo="prometheus-community",
27
109
  repo_url="https://prometheus-community.github.io/helm-charts",
28
110
  ctx=ctx,
29
111
  values=values,
112
+ extra_args=extra_args,
30
113
  )
114
+
115
+ typer.secho("kube-prometheus-stack instalado com sucesso.", fg=typer.colors.GREEN)
File without changes
@@ -38,7 +38,7 @@ echo "Escolha o tipo de instalação:"
38
38
  echo " 1) Global (requer sudo, todos os usuários)"
39
39
  echo " 2) Virtual env (recomendado para desenvolvimento)"
40
40
  echo " 3) User install (apenas usuário atual)"
41
- read -p "Opção [2]: " INSTALL_TYPE
41
+ read -r -p "Opção [2]: " INSTALL_TYPE
42
42
  INSTALL_TYPE=${INSTALL_TYPE:-2}
43
43
 
44
44
  echo ""
@@ -51,6 +51,7 @@ case $INSTALL_TYPE in
51
51
  2)
52
52
  echo -e "${YELLOW}Criando virtual environment...${NC}"
53
53
  python3 -m venv .venv
54
+ # shellcheck disable=SC1091
54
55
  source .venv/bin/activate
55
56
  pip install --upgrade pip
56
57
  pip install -e .
@@ -73,7 +74,7 @@ EOF
73
74
 
74
75
  # Adicionar ao PATH se necessário
75
76
  if [[ ":$PATH:" != *":$HOME/.local/bin:"* ]]; then
76
- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc
77
+ echo "export PATH=\"$HOME/.local/bin:$PATH\"" >> ~/.bashrc
77
78
  echo -e "${YELLOW}⚠${NC} Adicionado $HOME/.local/bin ao PATH"
78
79
  echo "Execute: source ~/.bashrc"
79
80
  fi
@@ -11,21 +11,25 @@ OUTPUT=${RAIJIN_METRIC_FILE:-/var/lib/node_exporter/textfile_collector/raijin_lo
11
11
  # Calcula soma de todos os logs (principal + rotações)
12
12
  TOTAL_BYTES=0
13
13
  shopt -s nullglob
14
+
15
+ METRICS_TMP=$(mktemp)
16
+ trap 'rm -f "$METRICS_TMP"' EXIT
17
+
14
18
  for f in "$LOG_DIR"/$LOG_PATTERN; do
15
19
  size=$(stat -c%s "$f" 2>/dev/null || echo 0)
16
20
  TOTAL_BYTES=$((TOTAL_BYTES + size))
17
21
  if [[ "$f" =~ raijin-server\.log(\.\d+)?$ ]]; then
18
- printf "raijin_log_size_bytes{file=\"%s\"} %d\n" "$(basename "$f")" "$size"
22
+ printf "raijin_log_size_bytes{file=\"%s\"} %d\n" "$(basename "$f")" "$size" >> "$METRICS_TMP"
19
23
  fi
20
- done | {
21
- # Escreve métricas no arquivo final
22
- mkdir -p "$(dirname "$OUTPUT")"
23
- {
24
- echo "# HELP raijin_log_size_bytes Tamanho dos logs do raijin-server (bytes)"
25
- echo "# TYPE raijin_log_size_bytes gauge"
26
- cat
27
- echo "# HELP raijin_log_size_total_bytes Soma dos logs do raijin-server (bytes)"
28
- echo "# TYPE raijin_log_size_total_bytes gauge"
29
- echo "raijin_log_size_total_bytes ${TOTAL_BYTES}"
30
- } > "$OUTPUT"
31
- }
24
+ done
25
+
26
+ # Escreve métricas no arquivo final
27
+ mkdir -p "$(dirname "$OUTPUT")"
28
+ {
29
+ echo "# HELP raijin_log_size_bytes Tamanho dos logs do raijin-server (bytes)"
30
+ echo "# TYPE raijin_log_size_bytes gauge"
31
+ cat "$METRICS_TMP"
32
+ echo "# HELP raijin_log_size_total_bytes Soma dos logs do raijin-server (bytes)"
33
+ echo "# TYPE raijin_log_size_total_bytes gauge"
34
+ echo "raijin_log_size_total_bytes ${TOTAL_BYTES}"
35
+ } > "$OUTPUT"
@@ -49,6 +49,7 @@ fi
49
49
  echo ""
50
50
  echo "2. Verificando Sistema Operacional..."
51
51
  if [ -f /etc/os-release ]; then
52
+ # shellcheck disable=SC1091
52
53
  . /etc/os-release
53
54
  if [[ "$ID" == "ubuntu" ]]; then
54
55
  VERSION_NUM=$(echo "$VERSION_ID" | cut -d. -f1)
@@ -152,7 +153,7 @@ STATE_DIRS=("/var/lib/raijin-server/state" "$HOME/.local/share/raijin-server/sta
152
153
  FOUND_STATE=0
153
154
  for dir in "${STATE_DIRS[@]}"; do
154
155
  if [[ -d "$dir" ]]; then
155
- MODULE_COUNT=$(ls -1 "$dir"/*.done 2>/dev/null | wc -l)
156
+ MODULE_COUNT=$(find "$dir" -maxdepth 1 -name '*.done' -type f 2>/dev/null | wc -l)
156
157
  if [[ $MODULE_COUNT -gt 0 ]]; then
157
158
  check_pass "$MODULE_COUNT modulos concluidos (em $dir)"
158
159
  FOUND_STATE=1