@dataif/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/bin/dataif.js +623 -0
- package/package.json +26 -0
- package/scripts/build-template.mjs +72 -0
- package/templates/dataif/README.md +157 -0
- package/templates/dataif/infra/.env.example +119 -0
- package/templates/dataif/infra/.env.stg.example +119 -0
- package/templates/dataif/infra/airflow/Dockerfile +11 -0
- package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
- package/templates/dataif/infra/airflow/requirements.txt +3 -0
- package/templates/dataif/infra/docker-compose.yml +306 -0
- package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
- package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
- package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
- package/templates/dataif/infra/keycloak/Dockerfile +4 -0
- package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
- package/templates/dataif/infra/ollama/Dockerfile +9 -0
- package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
- package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
- package/templates/dataif/infra/postgres/Dockerfile +4 -0
- package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
- package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
- package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
- package/templates/dataif/scripts/configure-env.sh +149 -0
- package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
- package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
- package/templates/dataif/scripts/deploy.sh +79 -0
- package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
- package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
- package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
- package/templates/dataif/scripts/publish-images.sh +31 -0
- package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
- package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
- package/templates/dataif/services/api/.dockerignore +18 -0
- package/templates/dataif/services/api/Dockerfile +12 -0
- package/templates/dataif/services/api/app/__init__.py +1 -0
- package/templates/dataif/services/api/app/auth.py +48 -0
- package/templates/dataif/services/api/app/config.py +59 -0
- package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
- package/templates/dataif/services/api/app/main.py +2432 -0
- package/templates/dataif/services/api/app/metabase_admin.py +191 -0
- package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
- package/templates/dataif/services/api/app/metabase_embed.py +15 -0
- package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
- package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
- package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
- package/templates/dataif/services/api/app/vanna_client.py +32 -0
- package/templates/dataif/services/api/requirements.txt +9 -0
- package/templates/dataif/services/vanna/.dockerignore +18 -0
- package/templates/dataif/services/vanna/Dockerfile +12 -0
- package/templates/dataif/services/vanna/app/config.py +57 -0
- package/templates/dataif/services/vanna/app/main.py +108 -0
- package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
- package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
- package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
- package/templates/dataif/services/vanna/requirements.txt +8 -0
- package/templates/dataif/services/web/.dockerignore +13 -0
- package/templates/dataif/services/web/Dockerfile +16 -0
- package/templates/dataif/services/web/index.html +12 -0
- package/templates/dataif/services/web/nginx.conf +74 -0
- package/templates/dataif/services/web/package-lock.json +4397 -0
- package/templates/dataif/services/web/package.json +32 -0
- package/templates/dataif/services/web/postcss.config.mjs +5 -0
- package/templates/dataif/services/web/src/App.jsx +2817 -0
- package/templates/dataif/services/web/src/adminAuth.js +245 -0
- package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
- package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
- package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
- package/templates/dataif/services/web/src/assets/if.svg +0 -0
- package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
- package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
- package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
- package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
- package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
- package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
- package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
- package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
- package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
- package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
- package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
- package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
- package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
- package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
- package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
- package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
- package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
- package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
- package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
- package/templates/dataif/services/web/src/main.jsx +14 -0
- package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
- package/templates/dataif/services/web/src/styles/globals.css +60 -0
- package/templates/dataif/services/web/src/styles/theme.css +1326 -0
- package/templates/dataif/services/web/src/styles/typography.css +430 -0
- package/templates/dataif/services/web/src/styles.css +1287 -0
- package/templates/dataif/services/web/src/utils/cx.ts +24 -0
- package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
- package/templates/dataif/services/web/vite.config.js +14 -0
- package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
- package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
- package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
- package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
- package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
- package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
- package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
- package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
- package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
- package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
- package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
- package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
- package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
- package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
|
@@ -0,0 +1,2432 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from threading import Lock
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from time import monotonic, sleep
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
import psycopg2
|
|
12
|
+
from croniter import croniter
|
|
13
|
+
from fastapi import Depends, FastAPI, HTTPException
|
|
14
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
15
|
+
from pydantic import BaseModel, Field, model_validator
|
|
16
|
+
from psycopg2.extras import RealDictCursor
|
|
17
|
+
|
|
18
|
+
from .auth import require_admin, verify_optional_bearer
|
|
19
|
+
from .config import settings
|
|
20
|
+
from .keycloak_admin import KeycloakAdminClient
|
|
21
|
+
from .metabase_admin import MetabaseAdminClient
|
|
22
|
+
from .metabase_embed import build_signed_dashboard_url
|
|
23
|
+
from . import pnp_dag_provisioner, pnp_instance_repository
|
|
24
|
+
from .pnp_powerbi import DEFAULT_PNP_POWERBI_REPORT_URL, PNP_MICRODADOS_TYPES, load_public_microdados_catalog
|
|
25
|
+
from .vanna_client import ask_vanna
|
|
26
|
+
|
|
27
|
+
PNP_INTERNAL_CONNECTOR_ID = "nilo_pecanha"
|
|
28
|
+
PNP_POWERBI_GROUP_LABEL = "Microdados Publicos"
|
|
29
|
+
PNP_POWERBI_SOURCE_LABEL = "Catalogo publico de microdados via Power BI"
|
|
30
|
+
PNP_CONNECTION_ENTITY = "connection"
|
|
31
|
+
PNP_PIPELINE_ENTITY = "pipeline"
|
|
32
|
+
METABASE_DEFAULT_DASHBOARD_SETTING_KEY = "metabase.default_dashboard_id"
|
|
33
|
+
VANNA_LLM_SETTING_KEY = "vanna.llm_config"
|
|
34
|
+
VANNA_USER_LLM_SETTING_PREFIX = "vanna.llm_config.user."
|
|
35
|
+
PNP_RUNTIME_TASK_META = {
|
|
36
|
+
"load_instance_config": {
|
|
37
|
+
"stage": "load_instance_config",
|
|
38
|
+
"stage_label": "Carregamento da configuração",
|
|
39
|
+
"message": "A configuração da pipeline foi carregada.",
|
|
40
|
+
},
|
|
41
|
+
"resolve_powerbi_catalog": {
|
|
42
|
+
"stage": "resolve_powerbi_catalog",
|
|
43
|
+
"stage_label": "Resolução do catálogo",
|
|
44
|
+
"message": "O catalogo Power BI foi resolvido.",
|
|
45
|
+
},
|
|
46
|
+
"extract_raw": {
|
|
47
|
+
"stage": "extract_raw",
|
|
48
|
+
"stage_label": "Extracao de microdados",
|
|
49
|
+
"message": "A extração e a carga bruta dos microdados foram concluídas.",
|
|
50
|
+
},
|
|
51
|
+
"materialize_staging": {
|
|
52
|
+
"stage": "materialize_staging",
|
|
53
|
+
"stage_label": "Materializacao de staging",
|
|
54
|
+
"message": "A staging deduplicada foi materializada.",
|
|
55
|
+
},
|
|
56
|
+
"build_curated_views": {
|
|
57
|
+
"stage": "build_curated_views",
|
|
58
|
+
"stage_label": "Publicacao de curated",
|
|
59
|
+
"message": "As views e materialized views curadas foram publicadas.",
|
|
60
|
+
},
|
|
61
|
+
"run_quality_checks": {
|
|
62
|
+
"stage": "run_quality_checks",
|
|
63
|
+
"stage_label": "Checagens de qualidade",
|
|
64
|
+
"message": "As checagens operacionais e de qualidade foram executadas.",
|
|
65
|
+
},
|
|
66
|
+
"finalize_run": {
|
|
67
|
+
"stage": "finalize_run",
|
|
68
|
+
"stage_label": "Encerramento da execução",
|
|
69
|
+
"message": "A execucao da pipeline foi finalizada.",
|
|
70
|
+
},
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
_PNP_CATALOG_CACHE: dict[str, Any] = {"value": None, "loaded_at": 0.0}
|
|
74
|
+
_PNP_CATALOG_CACHE_LOCK = Lock()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _coerce_int(value: object) -> int | None:
|
|
78
|
+
if isinstance(value, bool):
|
|
79
|
+
return int(value)
|
|
80
|
+
if isinstance(value, int):
|
|
81
|
+
return value
|
|
82
|
+
if isinstance(value, float):
|
|
83
|
+
return int(value)
|
|
84
|
+
if isinstance(value, str):
|
|
85
|
+
normalized = value.strip()
|
|
86
|
+
if normalized and normalized.lstrip("-").isdigit():
|
|
87
|
+
return int(normalized)
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _parse_iso_datetime(value: object) -> datetime | None:
|
|
92
|
+
if isinstance(value, datetime):
|
|
93
|
+
if value.tzinfo is None:
|
|
94
|
+
return value.replace(tzinfo=timezone.utc)
|
|
95
|
+
return value.astimezone(timezone.utc)
|
|
96
|
+
if not isinstance(value, str) or not value.strip():
|
|
97
|
+
return None
|
|
98
|
+
candidate = value.strip().replace("Z", "+00:00")
|
|
99
|
+
try:
|
|
100
|
+
parsed = datetime.fromisoformat(candidate)
|
|
101
|
+
except ValueError:
|
|
102
|
+
return None
|
|
103
|
+
if parsed.tzinfo is None:
|
|
104
|
+
return parsed.replace(tzinfo=timezone.utc)
|
|
105
|
+
return parsed.astimezone(timezone.utc)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class EmbedRequest(BaseModel):
|
|
109
|
+
dashboard_id: int = Field(..., ge=1)
|
|
110
|
+
params: dict[str, object] = Field(default_factory=dict)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class AskRequest(BaseModel):
|
|
114
|
+
question: str = Field(..., min_length=3, max_length=1000)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class AdminSqlQueryRequest(BaseModel):
|
|
118
|
+
sql: str = Field(..., min_length=1, max_length=100_000)
|
|
119
|
+
max_rows: int = Field(default=500, ge=1, le=5_000)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class AdminLoginRequest(BaseModel):
|
|
123
|
+
username: str = Field(..., min_length=1, max_length=120)
|
|
124
|
+
password: str = Field(..., min_length=1, max_length=255)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class AdminRefreshRequest(BaseModel):
|
|
128
|
+
refresh_token: str = Field(..., min_length=1, max_length=4096)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class LlmProviderOllamaRequest(BaseModel):
|
|
132
|
+
base_url: str = Field(default="http://ollama:11434", min_length=1, max_length=255)
|
|
133
|
+
model: str = Field(default="sabia-7b", min_length=1, max_length=120)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class LlmProviderMaritacaRequest(BaseModel):
|
|
137
|
+
api_url: str = Field(default="https://chat.maritaca.ai/api/chat/completions", min_length=1, max_length=255)
|
|
138
|
+
model: str = Field(default="sabia-4", min_length=1, max_length=120)
|
|
139
|
+
timeout_seconds: int = Field(default=60, ge=1, le=300)
|
|
140
|
+
api_key: str | None = Field(default=None, max_length=4096)
|
|
141
|
+
clear_api_key: bool = False
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class AdminLlmSettingsUpdateRequest(BaseModel):
|
|
145
|
+
provider: str = Field(..., pattern="^(ollama|maritaca)$")
|
|
146
|
+
ollama: LlmProviderOllamaRequest
|
|
147
|
+
maritaca: LlmProviderMaritacaRequest
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class AdminUserCreateRequest(BaseModel):
|
|
151
|
+
username: str = Field(..., min_length=3, max_length=120)
|
|
152
|
+
email: str = Field(..., min_length=3, max_length=255)
|
|
153
|
+
password: str = Field(..., min_length=8, max_length=255)
|
|
154
|
+
first_name: str = Field(default="", max_length=120)
|
|
155
|
+
last_name: str = Field(default="", max_length=120)
|
|
156
|
+
enabled: bool = True
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class AdminUserMetabaseSyncRequest(BaseModel):
|
|
160
|
+
password: str = Field(..., min_length=8, max_length=255)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class PnpInstanceCreateRequest(BaseModel):
|
|
164
|
+
instance_name: str = Field(..., min_length=3, max_length=120)
|
|
165
|
+
selected_years: list[str] = Field(..., min_length=1)
|
|
166
|
+
selected_microdados_types: list[str] = Field(..., min_length=1)
|
|
167
|
+
schedule: str | None = Field(default=None, max_length=120)
|
|
168
|
+
is_active: bool = False
|
|
169
|
+
|
|
170
|
+
@model_validator(mode="after")
|
|
171
|
+
def validate_sources(self) -> "PnpInstanceCreateRequest":
|
|
172
|
+
normalized_years = [item.strip() for item in self.selected_years if isinstance(item, str) and item.strip()]
|
|
173
|
+
normalized_types: list[str] = []
|
|
174
|
+
for item in self.selected_microdados_types:
|
|
175
|
+
cleaned = item.strip()
|
|
176
|
+
if not cleaned:
|
|
177
|
+
continue
|
|
178
|
+
if cleaned not in PNP_MICRODADOS_TYPES:
|
|
179
|
+
raise ValueError(f"Unsupported PNP microdados type: {cleaned}")
|
|
180
|
+
normalized_types.append(cleaned)
|
|
181
|
+
|
|
182
|
+
if not normalized_years:
|
|
183
|
+
raise ValueError("At least one selected_years entry is required")
|
|
184
|
+
if not normalized_types:
|
|
185
|
+
raise ValueError("At least one selected_microdados_types entry is required")
|
|
186
|
+
|
|
187
|
+
self.selected_years = list(dict.fromkeys(normalized_years))
|
|
188
|
+
self.selected_microdados_types = list(dict.fromkeys(normalized_types))
|
|
189
|
+
return self
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class PnpInstanceUpdateRequest(BaseModel):
|
|
193
|
+
schedule: str | None = Field(default=None, max_length=120)
|
|
194
|
+
is_active: bool | None = None
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class PnpConnectionCreateRequest(BaseModel):
|
|
198
|
+
connection_name: str = Field(..., min_length=3, max_length=120)
|
|
199
|
+
is_active: bool = True
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class PnpPipelineCreateRequest(BaseModel):
|
|
203
|
+
pipeline_name: str = Field(..., min_length=3, max_length=120)
|
|
204
|
+
connection_key: str = Field(..., min_length=3, max_length=120)
|
|
205
|
+
selected_years: list[str] = Field(..., min_length=1)
|
|
206
|
+
selected_microdados_types: list[str] = Field(..., min_length=1)
|
|
207
|
+
schedule: str | None = Field(default=None, max_length=120)
|
|
208
|
+
is_active: bool = True
|
|
209
|
+
|
|
210
|
+
@model_validator(mode="after")
|
|
211
|
+
def validate_sources(self) -> "PnpPipelineCreateRequest":
|
|
212
|
+
normalized_years = [item.strip() for item in self.selected_years if isinstance(item, str) and item.strip()]
|
|
213
|
+
normalized_types: list[str] = []
|
|
214
|
+
for item in self.selected_microdados_types:
|
|
215
|
+
cleaned = item.strip()
|
|
216
|
+
if not cleaned:
|
|
217
|
+
continue
|
|
218
|
+
if cleaned not in PNP_MICRODADOS_TYPES:
|
|
219
|
+
raise ValueError(f"Unsupported PNP microdados type: {cleaned}")
|
|
220
|
+
normalized_types.append(cleaned)
|
|
221
|
+
|
|
222
|
+
if not normalized_years:
|
|
223
|
+
raise ValueError("At least one selected_years entry is required")
|
|
224
|
+
if not normalized_types:
|
|
225
|
+
raise ValueError("At least one selected_microdados_types entry is required")
|
|
226
|
+
|
|
227
|
+
self.selected_years = list(dict.fromkeys(normalized_years))
|
|
228
|
+
self.selected_microdados_types = list(dict.fromkeys(normalized_types))
|
|
229
|
+
return self
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
app = FastAPI(title="dataif-api", version="0.4.0")
|
|
233
|
+
|
|
234
|
+
allowed_origins = [origin.strip() for origin in settings.cors_allow_origins.split(",") if origin.strip()]
|
|
235
|
+
app.add_middleware(
|
|
236
|
+
CORSMiddleware,
|
|
237
|
+
allow_origins=allowed_origins,
|
|
238
|
+
allow_credentials=True,
|
|
239
|
+
allow_methods=["*"],
|
|
240
|
+
allow_headers=["*"],
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _require_admin(payload: dict[str, object] | None = Depends(verify_optional_bearer)) -> dict[str, object]:
|
|
245
|
+
require_admin(payload)
|
|
246
|
+
return payload or {}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _db_connect():
|
|
250
|
+
if not settings.warehouse_dsn:
|
|
251
|
+
raise HTTPException(status_code=500, detail="WAREHOUSE_DSN not configured")
|
|
252
|
+
return psycopg2.connect(settings.warehouse_dsn, cursor_factory=RealDictCursor)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _compact_sql(statement: str) -> str:
|
|
256
|
+
without_block_comments = re.sub(r"/\*.*?\*/", " ", statement, flags=re.DOTALL)
|
|
257
|
+
without_line_comments = re.sub(r"--.*?$", " ", without_block_comments, flags=re.MULTILINE)
|
|
258
|
+
return re.sub(r"\s+", " ", without_line_comments).strip()
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _validate_admin_sql(statement: str) -> str:
|
|
262
|
+
compact = _compact_sql(statement)
|
|
263
|
+
normalized = compact.lower()
|
|
264
|
+
|
|
265
|
+
if not normalized:
|
|
266
|
+
raise HTTPException(status_code=422, detail="SQL vazio.")
|
|
267
|
+
if not (normalized.startswith("select") or normalized.startswith("with")):
|
|
268
|
+
raise HTTPException(status_code=422, detail="Apenas SELECT ou WITH sao permitidos.")
|
|
269
|
+
if ";" in compact.rstrip(";"):
|
|
270
|
+
raise HTTPException(status_code=422, detail="Apenas uma instrucao SQL e permitida.")
|
|
271
|
+
|
|
272
|
+
forbidden_patterns = [
|
|
273
|
+
r"\binsert\b",
|
|
274
|
+
r"\bupdate\b",
|
|
275
|
+
r"\bdelete\b",
|
|
276
|
+
r"\bdrop\b",
|
|
277
|
+
r"\balter\b",
|
|
278
|
+
r"\btruncate\b",
|
|
279
|
+
r"\bcreate\b",
|
|
280
|
+
r"\bgrant\b",
|
|
281
|
+
r"\brevoke\b",
|
|
282
|
+
r"\bcopy\b",
|
|
283
|
+
r"\bcall\b",
|
|
284
|
+
r"\bdo\b",
|
|
285
|
+
r"\bexecute\b",
|
|
286
|
+
r"\bvacuum\b",
|
|
287
|
+
r"\banalyze\b",
|
|
288
|
+
r"\bset\b",
|
|
289
|
+
r"\breset\b",
|
|
290
|
+
]
|
|
291
|
+
for pattern in forbidden_patterns:
|
|
292
|
+
if re.search(pattern, normalized):
|
|
293
|
+
raise HTTPException(status_code=422, detail="A consulta contem palavra-chave nao permitida.")
|
|
294
|
+
|
|
295
|
+
return compact.rstrip(";")
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _bounded_admin_sql(statement: str, max_rows: int) -> str:
|
|
299
|
+
return f"SELECT * FROM ({statement}) AS dataif_admin_sql_result LIMIT {max_rows + 1}"
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _admin_sql_catalog() -> list[dict[str, object]]:
|
|
303
|
+
with _db_connect() as conn, conn.cursor() as cur:
|
|
304
|
+
cur.execute(
|
|
305
|
+
"""
|
|
306
|
+
WITH relations AS (
|
|
307
|
+
SELECT
|
|
308
|
+
table_schema AS schema_name,
|
|
309
|
+
table_name AS relation_name,
|
|
310
|
+
CASE table_type
|
|
311
|
+
WHEN 'VIEW' THEN 'view'
|
|
312
|
+
ELSE 'table'
|
|
313
|
+
END AS relation_type
|
|
314
|
+
FROM information_schema.tables
|
|
315
|
+
WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
|
|
316
|
+
AND table_type IN ('BASE TABLE', 'VIEW')
|
|
317
|
+
UNION ALL
|
|
318
|
+
SELECT
|
|
319
|
+
schemaname AS schema_name,
|
|
320
|
+
matviewname AS relation_name,
|
|
321
|
+
'materialized_view' AS relation_type
|
|
322
|
+
FROM pg_catalog.pg_matviews
|
|
323
|
+
WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
|
|
324
|
+
)
|
|
325
|
+
SELECT
|
|
326
|
+
relations.schema_name,
|
|
327
|
+
relations.relation_name,
|
|
328
|
+
relations.relation_type,
|
|
329
|
+
columns.column_name
|
|
330
|
+
FROM relations
|
|
331
|
+
LEFT JOIN information_schema.columns AS columns
|
|
332
|
+
ON columns.table_schema = relations.schema_name
|
|
333
|
+
AND columns.table_name = relations.relation_name
|
|
334
|
+
ORDER BY relations.schema_name, relations.relation_name, columns.ordinal_position NULLS LAST;
|
|
335
|
+
"""
|
|
336
|
+
)
|
|
337
|
+
return list(cur.fetchall())
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def _metabase_dashboard_id_list() -> list[int]:
|
|
341
|
+
allowed: list[int] = []
|
|
342
|
+
for item in settings.metabase_allowed_dashboard_ids.split(","):
|
|
343
|
+
cleaned = item.strip()
|
|
344
|
+
if not cleaned:
|
|
345
|
+
continue
|
|
346
|
+
try:
|
|
347
|
+
dashboard_id = int(cleaned)
|
|
348
|
+
except ValueError as exc:
|
|
349
|
+
raise HTTPException(status_code=500, detail="METABASE_ALLOWED_DASHBOARD_IDS is invalid") from exc
|
|
350
|
+
if dashboard_id not in allowed:
|
|
351
|
+
allowed.append(dashboard_id)
|
|
352
|
+
if not allowed:
|
|
353
|
+
raise HTTPException(status_code=500, detail="METABASE_ALLOWED_DASHBOARD_IDS is empty")
|
|
354
|
+
return allowed
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _allowed_metabase_dashboard_ids() -> set[int]:
|
|
358
|
+
return set(_metabase_dashboard_id_list())
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _fallback_metabase_dashboard_id(allowed_ids: list[int]) -> int:
|
|
362
|
+
allowed = set(allowed_ids)
|
|
363
|
+
configured = _coerce_int(settings.metabase_default_dashboard_id)
|
|
364
|
+
if configured is not None:
|
|
365
|
+
if configured not in allowed:
|
|
366
|
+
raise HTTPException(status_code=500, detail="METABASE_DEFAULT_DASHBOARD_ID is not allowed")
|
|
367
|
+
return configured
|
|
368
|
+
return allowed_ids[0]
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _validate_metabase_dashboard_id(dashboard_id: int) -> None:
|
|
372
|
+
if dashboard_id not in _allowed_metabase_dashboard_ids():
|
|
373
|
+
raise HTTPException(status_code=403, detail="Dashboard id is not allowed for public embed")
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def _ensure_app_settings_table(conn) -> None:
|
|
377
|
+
with conn.cursor() as cur:
|
|
378
|
+
cur.execute(
|
|
379
|
+
"""
|
|
380
|
+
CREATE TABLE IF NOT EXISTS config.app_settings (
|
|
381
|
+
setting_key TEXT PRIMARY KEY,
|
|
382
|
+
setting_value JSONB NOT NULL,
|
|
383
|
+
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
384
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
385
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
386
|
+
)
|
|
387
|
+
"""
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def _read_metabase_default_dashboard_id() -> int:
|
|
392
|
+
allowed_ids = _metabase_dashboard_id_list()
|
|
393
|
+
allowed = set(allowed_ids)
|
|
394
|
+
try:
|
|
395
|
+
with _db_connect() as conn:
|
|
396
|
+
_ensure_app_settings_table(conn)
|
|
397
|
+
with conn.cursor() as cur:
|
|
398
|
+
cur.execute(
|
|
399
|
+
"SELECT setting_value FROM config.app_settings WHERE setting_key = %s",
|
|
400
|
+
(METABASE_DEFAULT_DASHBOARD_SETTING_KEY,),
|
|
401
|
+
)
|
|
402
|
+
row = cur.fetchone()
|
|
403
|
+
except HTTPException:
|
|
404
|
+
raise
|
|
405
|
+
except Exception as exc:
|
|
406
|
+
raise HTTPException(status_code=500, detail=f"Failed to read default Metabase dashboard: {exc}") from exc
|
|
407
|
+
|
|
408
|
+
if row:
|
|
409
|
+
value = row["setting_value"]
|
|
410
|
+
dashboard_id = _coerce_int(value.get("dashboard_id") if isinstance(value, dict) else value)
|
|
411
|
+
if dashboard_id is not None and dashboard_id in allowed:
|
|
412
|
+
return dashboard_id
|
|
413
|
+
return _fallback_metabase_dashboard_id(allowed_ids)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def _write_metabase_default_dashboard_id(dashboard_id: int) -> None:
|
|
417
|
+
_validate_metabase_dashboard_id(dashboard_id)
|
|
418
|
+
try:
|
|
419
|
+
with _db_connect() as conn:
|
|
420
|
+
_ensure_app_settings_table(conn)
|
|
421
|
+
with conn.cursor() as cur:
|
|
422
|
+
cur.execute(
|
|
423
|
+
"""
|
|
424
|
+
INSERT INTO config.app_settings (setting_key, setting_value)
|
|
425
|
+
VALUES (%s, %s::jsonb)
|
|
426
|
+
ON CONFLICT (setting_key) DO UPDATE
|
|
427
|
+
SET setting_value = EXCLUDED.setting_value,
|
|
428
|
+
updated_at = NOW()
|
|
429
|
+
""",
|
|
430
|
+
(
|
|
431
|
+
METABASE_DEFAULT_DASHBOARD_SETTING_KEY,
|
|
432
|
+
json.dumps({"dashboard_id": dashboard_id}),
|
|
433
|
+
),
|
|
434
|
+
)
|
|
435
|
+
except HTTPException:
|
|
436
|
+
raise
|
|
437
|
+
except Exception as exc:
|
|
438
|
+
raise HTTPException(status_code=500, detail=f"Failed to save default Metabase dashboard: {exc}") from exc
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _signed_metabase_dashboard_payload(dashboard_id: int, params: dict[str, object] | None = None) -> dict[str, object]:
|
|
442
|
+
_validate_metabase_dashboard_id(dashboard_id)
|
|
443
|
+
signed_url = build_signed_dashboard_url(
|
|
444
|
+
site_url=settings.metabase_site_url,
|
|
445
|
+
embed_secret=settings.metabase_embed_secret,
|
|
446
|
+
dashboard_id=dashboard_id,
|
|
447
|
+
params=params or {},
|
|
448
|
+
)
|
|
449
|
+
return {"dashboard_id": dashboard_id, "signed_url": signed_url}
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _read_app_setting(setting_key: str) -> dict[str, Any] | None:
|
|
453
|
+
try:
|
|
454
|
+
with _db_connect() as conn:
|
|
455
|
+
_ensure_app_settings_table(conn)
|
|
456
|
+
with conn.cursor() as cur:
|
|
457
|
+
cur.execute(
|
|
458
|
+
"SELECT setting_value FROM config.app_settings WHERE setting_key = %s",
|
|
459
|
+
(setting_key,),
|
|
460
|
+
)
|
|
461
|
+
row = cur.fetchone()
|
|
462
|
+
except HTTPException:
|
|
463
|
+
raise
|
|
464
|
+
except Exception as exc:
|
|
465
|
+
raise HTTPException(status_code=500, detail=f"Failed to read app setting {setting_key}: {exc}") from exc
|
|
466
|
+
|
|
467
|
+
if not row:
|
|
468
|
+
return None
|
|
469
|
+
value = row["setting_value"]
|
|
470
|
+
return value if isinstance(value, dict) else None
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _write_app_setting(setting_key: str, setting_value: dict[str, Any]) -> None:
|
|
474
|
+
try:
|
|
475
|
+
with _db_connect() as conn:
|
|
476
|
+
_ensure_app_settings_table(conn)
|
|
477
|
+
with conn.cursor() as cur:
|
|
478
|
+
cur.execute(
|
|
479
|
+
"""
|
|
480
|
+
INSERT INTO config.app_settings (setting_key, setting_value)
|
|
481
|
+
VALUES (%s, %s::jsonb)
|
|
482
|
+
ON CONFLICT (setting_key) DO UPDATE
|
|
483
|
+
SET setting_value = EXCLUDED.setting_value,
|
|
484
|
+
updated_at = NOW()
|
|
485
|
+
""",
|
|
486
|
+
(setting_key, json.dumps(setting_value)),
|
|
487
|
+
)
|
|
488
|
+
except HTTPException:
|
|
489
|
+
raise
|
|
490
|
+
except Exception as exc:
|
|
491
|
+
raise HTTPException(status_code=500, detail=f"Failed to save app setting {setting_key}: {exc}") from exc
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def _default_vanna_llm_settings() -> dict[str, Any]:
|
|
495
|
+
return {
|
|
496
|
+
"provider": "ollama",
|
|
497
|
+
"ollama": {
|
|
498
|
+
"base_url": "http://ollama:11434",
|
|
499
|
+
"model": "sabia-7b",
|
|
500
|
+
},
|
|
501
|
+
"maritaca": {
|
|
502
|
+
"api_url": "https://chat.maritaca.ai/api/chat/completions",
|
|
503
|
+
"api_key": "",
|
|
504
|
+
"model": "sabia-4",
|
|
505
|
+
"timeout_seconds": 60,
|
|
506
|
+
},
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def _vanna_llm_settings_from_env() -> dict[str, Any]:
|
|
511
|
+
defaults = _default_vanna_llm_settings()
|
|
512
|
+
return {
|
|
513
|
+
"provider": str(settings.vanna_llm_provider).strip().lower() or defaults["provider"],
|
|
514
|
+
"ollama": {
|
|
515
|
+
"base_url": str(settings.vanna_ollama_base_url).strip() or defaults["ollama"]["base_url"],
|
|
516
|
+
"model": str(settings.vanna_ollama_model).strip() or defaults["ollama"]["model"],
|
|
517
|
+
},
|
|
518
|
+
"maritaca": {
|
|
519
|
+
"api_url": str(settings.vanna_maritaca_api_url).strip() or defaults["maritaca"]["api_url"],
|
|
520
|
+
"api_key": str(settings.vanna_maritaca_api_key),
|
|
521
|
+
"model": str(settings.vanna_maritaca_model).strip() or defaults["maritaca"]["model"],
|
|
522
|
+
"timeout_seconds": _coerce_positive_int(
|
|
523
|
+
settings.vanna_maritaca_timeout_seconds,
|
|
524
|
+
defaults["maritaca"]["timeout_seconds"],
|
|
525
|
+
),
|
|
526
|
+
},
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def _effective_vanna_llm_settings() -> dict[str, Any]:
|
|
531
|
+
return _effective_global_vanna_llm_settings()
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _effective_global_vanna_llm_settings() -> dict[str, Any]:
|
|
535
|
+
effective = _vanna_llm_settings_from_env()
|
|
536
|
+
persisted = _read_app_setting(VANNA_LLM_SETTING_KEY)
|
|
537
|
+
if not isinstance(persisted, dict):
|
|
538
|
+
return effective
|
|
539
|
+
|
|
540
|
+
provider = str(persisted.get("provider") or effective["provider"]).strip().lower() or effective["provider"]
|
|
541
|
+
ollama = persisted.get("ollama") if isinstance(persisted.get("ollama"), dict) else {}
|
|
542
|
+
maritaca = persisted.get("maritaca") if isinstance(persisted.get("maritaca"), dict) else {}
|
|
543
|
+
return {
|
|
544
|
+
"provider": provider,
|
|
545
|
+
"ollama": {
|
|
546
|
+
"base_url": str(ollama.get("base_url") or effective["ollama"]["base_url"]).strip() or effective["ollama"]["base_url"],
|
|
547
|
+
"model": str(ollama.get("model") or effective["ollama"]["model"]).strip() or effective["ollama"]["model"],
|
|
548
|
+
},
|
|
549
|
+
"maritaca": {
|
|
550
|
+
"api_url": str(maritaca.get("api_url") or effective["maritaca"]["api_url"]).strip()
|
|
551
|
+
or effective["maritaca"]["api_url"],
|
|
552
|
+
"api_key": str(maritaca.get("api_key") or effective["maritaca"]["api_key"]),
|
|
553
|
+
"model": str(maritaca.get("model") or effective["maritaca"]["model"]).strip() or effective["maritaca"]["model"],
|
|
554
|
+
"timeout_seconds": _coerce_positive_int(
|
|
555
|
+
maritaca.get("timeout_seconds"),
|
|
556
|
+
effective["maritaca"]["timeout_seconds"],
|
|
557
|
+
),
|
|
558
|
+
},
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def _user_vanna_llm_setting_key(payload: dict[str, object] | None) -> str | None:
|
|
563
|
+
if not payload:
|
|
564
|
+
return None
|
|
565
|
+
subject = str(payload.get("sub") or "").strip()
|
|
566
|
+
if subject:
|
|
567
|
+
return f"{VANNA_USER_LLM_SETTING_PREFIX}{subject}"
|
|
568
|
+
|
|
569
|
+
fallback = str(payload.get("preferred_username") or payload.get("email") or "").strip().lower()
|
|
570
|
+
if not fallback:
|
|
571
|
+
return None
|
|
572
|
+
safe_fallback = re.sub(r"[^a-z0-9_.@-]+", "_", fallback)
|
|
573
|
+
return f"{VANNA_USER_LLM_SETTING_PREFIX}{safe_fallback}"
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def _read_user_vanna_llm_settings(payload: dict[str, object] | None) -> dict[str, Any] | None:
|
|
577
|
+
setting_key = _user_vanna_llm_setting_key(payload)
|
|
578
|
+
if not setting_key:
|
|
579
|
+
return None
|
|
580
|
+
value = _read_app_setting(setting_key)
|
|
581
|
+
return value if isinstance(value, dict) else None
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def _effective_vanna_llm_settings_for_user(payload: dict[str, object] | None) -> dict[str, Any]:
|
|
585
|
+
config = _effective_global_vanna_llm_settings()
|
|
586
|
+
scope = "global" if str(config["maritaca"].get("api_key") or "").strip() else "empty"
|
|
587
|
+
personal = _read_user_vanna_llm_settings(payload)
|
|
588
|
+
if isinstance(personal, dict):
|
|
589
|
+
maritaca = personal.get("maritaca") if isinstance(personal.get("maritaca"), dict) else {}
|
|
590
|
+
personal_key = str(maritaca.get("api_key") or "")
|
|
591
|
+
if personal_key.strip():
|
|
592
|
+
config = {
|
|
593
|
+
**config,
|
|
594
|
+
"maritaca": {
|
|
595
|
+
**config["maritaca"],
|
|
596
|
+
"api_key": personal_key,
|
|
597
|
+
},
|
|
598
|
+
}
|
|
599
|
+
scope = "personal"
|
|
600
|
+
config["_maritaca_api_key_scope"] = scope
|
|
601
|
+
return config
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def _serialize_vanna_llm_settings_public(config: dict[str, Any]) -> dict[str, Any]:
|
|
605
|
+
maritaca = config["maritaca"]
|
|
606
|
+
masked_key = _mask_secret(str(maritaca.get("api_key") or ""))
|
|
607
|
+
key_scope = str(config.get("_maritaca_api_key_scope") or ("configured" if masked_key else "empty"))
|
|
608
|
+
return {
|
|
609
|
+
"provider": config["provider"],
|
|
610
|
+
"ollama": {
|
|
611
|
+
"base_url": config["ollama"]["base_url"],
|
|
612
|
+
"model": config["ollama"]["model"],
|
|
613
|
+
},
|
|
614
|
+
"maritaca": {
|
|
615
|
+
"api_url": maritaca["api_url"],
|
|
616
|
+
"model": maritaca["model"],
|
|
617
|
+
"timeout_seconds": maritaca["timeout_seconds"],
|
|
618
|
+
"has_api_key": bool(str(maritaca.get("api_key") or "").strip()),
|
|
619
|
+
"api_key_scope": key_scope,
|
|
620
|
+
"has_personal_api_key": key_scope == "personal",
|
|
621
|
+
"masked_api_key": masked_key,
|
|
622
|
+
},
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
def _persist_vanna_llm_settings(
|
|
627
|
+
payload: AdminLlmSettingsUpdateRequest,
|
|
628
|
+
admin_payload: dict[str, object] | None = None,
|
|
629
|
+
) -> dict[str, Any]:
|
|
630
|
+
current_global = _effective_global_vanna_llm_settings()
|
|
631
|
+
next_global = {
|
|
632
|
+
"provider": payload.provider.strip().lower(),
|
|
633
|
+
"ollama": {
|
|
634
|
+
"base_url": payload.ollama.base_url.strip(),
|
|
635
|
+
"model": payload.ollama.model.strip(),
|
|
636
|
+
},
|
|
637
|
+
"maritaca": {
|
|
638
|
+
"api_url": payload.maritaca.api_url.strip(),
|
|
639
|
+
"model": payload.maritaca.model.strip(),
|
|
640
|
+
"timeout_seconds": int(payload.maritaca.timeout_seconds),
|
|
641
|
+
"api_key": current_global["maritaca"]["api_key"],
|
|
642
|
+
},
|
|
643
|
+
}
|
|
644
|
+
_write_app_setting(VANNA_LLM_SETTING_KEY, next_global)
|
|
645
|
+
|
|
646
|
+
user_setting_key = _user_vanna_llm_setting_key(admin_payload)
|
|
647
|
+
if user_setting_key:
|
|
648
|
+
if payload.maritaca.clear_api_key:
|
|
649
|
+
_write_app_setting(user_setting_key, {"maritaca": {"api_key": ""}})
|
|
650
|
+
elif payload.maritaca.api_key is not None:
|
|
651
|
+
_write_app_setting(user_setting_key, {"maritaca": {"api_key": payload.maritaca.api_key.strip()}})
|
|
652
|
+
elif payload.maritaca.clear_api_key:
|
|
653
|
+
next_global["maritaca"]["api_key"] = ""
|
|
654
|
+
_write_app_setting(VANNA_LLM_SETTING_KEY, next_global)
|
|
655
|
+
elif payload.maritaca.api_key is not None:
|
|
656
|
+
next_global["maritaca"]["api_key"] = payload.maritaca.api_key.strip()
|
|
657
|
+
_write_app_setting(VANNA_LLM_SETTING_KEY, next_global)
|
|
658
|
+
|
|
659
|
+
return _effective_vanna_llm_settings_for_user(admin_payload)
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def _vanna_llm_override_payload(config: dict[str, Any]) -> dict[str, object]:
|
|
663
|
+
return {
|
|
664
|
+
"provider": config["provider"],
|
|
665
|
+
"ollama": {
|
|
666
|
+
"base_url": config["ollama"]["base_url"],
|
|
667
|
+
"model": config["ollama"]["model"],
|
|
668
|
+
},
|
|
669
|
+
"maritaca": {
|
|
670
|
+
"api_url": config["maritaca"]["api_url"],
|
|
671
|
+
"api_key": config["maritaca"]["api_key"],
|
|
672
|
+
"model": config["maritaca"]["model"],
|
|
673
|
+
"timeout_seconds": config["maritaca"]["timeout_seconds"],
|
|
674
|
+
},
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def _vanna_provider_status(config: dict[str, Any]) -> dict[str, Any]:
|
|
679
|
+
provider = str(config["provider"]).strip().lower()
|
|
680
|
+
if provider == "maritaca":
|
|
681
|
+
has_key = bool(str(config["maritaca"].get("api_key") or "").strip())
|
|
682
|
+
return {
|
|
683
|
+
"provider": provider,
|
|
684
|
+
"available": has_key,
|
|
685
|
+
"detail": "Maritaca API key configured" if has_key else "Maritaca API key is not configured",
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
target_url = f"{str(config['ollama']['base_url']).rstrip('/')}/api/tags"
|
|
689
|
+
try:
|
|
690
|
+
with httpx.Client(timeout=5, follow_redirects=True) as client:
|
|
691
|
+
response = client.get(target_url)
|
|
692
|
+
except httpx.RequestError as exc:
|
|
693
|
+
return {"provider": provider, "available": False, "detail": f"Ollama is not reachable: {exc}"}
|
|
694
|
+
|
|
695
|
+
available = response.status_code < 500
|
|
696
|
+
return {
|
|
697
|
+
"provider": provider,
|
|
698
|
+
"available": available,
|
|
699
|
+
"detail": f"Ollama responded with HTTP {response.status_code}" if available else f"Ollama returned HTTP {response.status_code}",
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
def _mask_secret(value: str) -> str:
|
|
704
|
+
secret = value.strip()
|
|
705
|
+
if not secret:
|
|
706
|
+
return ""
|
|
707
|
+
if len(secret) <= 6:
|
|
708
|
+
return "*" * len(secret)
|
|
709
|
+
return f"{secret[:3]}{'*' * max(len(secret) - 6, 1)}{secret[-3:]}"
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def _coerce_positive_int(value: object, default: int) -> int:
|
|
713
|
+
if isinstance(value, bool):
|
|
714
|
+
return int(value) or default
|
|
715
|
+
if isinstance(value, int):
|
|
716
|
+
return value if value > 0 else default
|
|
717
|
+
if isinstance(value, float):
|
|
718
|
+
parsed = int(value)
|
|
719
|
+
return parsed if parsed > 0 else default
|
|
720
|
+
if isinstance(value, str):
|
|
721
|
+
normalized = value.strip()
|
|
722
|
+
if normalized.isdigit():
|
|
723
|
+
parsed = int(normalized)
|
|
724
|
+
return parsed if parsed > 0 else default
|
|
725
|
+
return default
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _keycloak_admin_client() -> KeycloakAdminClient:
|
|
729
|
+
return KeycloakAdminClient(
|
|
730
|
+
base_url=settings.keycloak_url,
|
|
731
|
+
realm=settings.keycloak_realm,
|
|
732
|
+
admin_realm=settings.keycloak_admin_realm,
|
|
733
|
+
admin_client_id=settings.keycloak_admin_client_id,
|
|
734
|
+
admin_username=settings.keycloak_admin_username,
|
|
735
|
+
admin_password=settings.keycloak_admin_password,
|
|
736
|
+
timeout_seconds=max(settings.nilo_timeout_seconds, 30.0),
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def _metabase_admin_client() -> MetabaseAdminClient:
|
|
741
|
+
return MetabaseAdminClient(
|
|
742
|
+
base_url=settings.metabase_api_url,
|
|
743
|
+
admin_email=settings.metabase_admin_email,
|
|
744
|
+
admin_password=settings.metabase_admin_password,
|
|
745
|
+
timeout_seconds=max(settings.nilo_timeout_seconds, 30.0),
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
def _list_admin_users_with_metabase_state() -> list[dict[str, Any]]:
|
|
750
|
+
keycloak_users = _keycloak_admin_client().list_admin_users()
|
|
751
|
+
metabase_users = {
|
|
752
|
+
str(item.get("email") or "").strip().lower(): item
|
|
753
|
+
for item in _metabase_admin_client().list_admin_users()
|
|
754
|
+
if str(item.get("email") or "").strip()
|
|
755
|
+
}
|
|
756
|
+
items: list[dict[str, Any]] = []
|
|
757
|
+
for user in keycloak_users:
|
|
758
|
+
email_key = str(user.get("email") or "").strip().lower()
|
|
759
|
+
metabase_user = metabase_users.get(email_key)
|
|
760
|
+
items.append(
|
|
761
|
+
{
|
|
762
|
+
**user,
|
|
763
|
+
"metabase_synced": metabase_user is not None,
|
|
764
|
+
"metabase_user_id": metabase_user.get("id") if metabase_user else None,
|
|
765
|
+
}
|
|
766
|
+
)
|
|
767
|
+
return items
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def _keycloak_openid_url(path: str) -> str:
|
|
771
|
+
return f"{settings.keycloak_url.rstrip('/')}/realms/{settings.keycloak_realm}/protocol/openid-connect/{path.lstrip('/')}"
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def _request_keycloak_token(form_fields: dict[str, str]) -> dict[str, Any]:
|
|
775
|
+
payload = {"client_id": settings.keycloak_client_id, **form_fields}
|
|
776
|
+
if settings.keycloak_client_secret:
|
|
777
|
+
payload["client_secret"] = settings.keycloak_client_secret
|
|
778
|
+
|
|
779
|
+
try:
|
|
780
|
+
with httpx.Client(timeout=max(settings.nilo_timeout_seconds, 30.0), follow_redirects=True) as client:
|
|
781
|
+
response = client.post(
|
|
782
|
+
_keycloak_openid_url("token"),
|
|
783
|
+
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
784
|
+
data=payload,
|
|
785
|
+
)
|
|
786
|
+
except Exception as exc:
|
|
787
|
+
raise HTTPException(status_code=502, detail=f"Keycloak unavailable: {exc}") from exc
|
|
788
|
+
|
|
789
|
+
if response.status_code >= 400:
|
|
790
|
+
detail = response.text
|
|
791
|
+
try:
|
|
792
|
+
error_payload = response.json()
|
|
793
|
+
except ValueError:
|
|
794
|
+
error_payload = None
|
|
795
|
+
if isinstance(error_payload, dict):
|
|
796
|
+
detail = str(error_payload.get("error_description") or error_payload.get("error") or detail)
|
|
797
|
+
status_code = 401 if response.status_code in {400, 401} else response.status_code
|
|
798
|
+
raise HTTPException(status_code=status_code, detail=f"Falha ao autenticar no Keycloak: {detail}")
|
|
799
|
+
|
|
800
|
+
try:
|
|
801
|
+
token_payload = response.json()
|
|
802
|
+
except ValueError as exc:
|
|
803
|
+
raise HTTPException(status_code=502, detail="Keycloak returned a non-JSON token payload") from exc
|
|
804
|
+
|
|
805
|
+
if not isinstance(token_payload, dict) or not token_payload.get("access_token"):
|
|
806
|
+
raise HTTPException(status_code=502, detail="Keycloak returned an invalid token payload")
|
|
807
|
+
return token_payload
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
def _slugify_instance_name(value: str) -> str:
|
|
811
|
+
normalized = "".join(char.lower() if char.isalnum() else "_" for char in value.strip())
|
|
812
|
+
collapsed = "_".join(part for part in normalized.split("_") if part)
|
|
813
|
+
return collapsed[:80] or "pnp_instance"
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
def _build_pnp_instance_key(instance_name: str) -> str:
|
|
817
|
+
return f"pnp_{_slugify_instance_name(instance_name)}"
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
def _build_pnp_connection_key(connection_name: str) -> str:
|
|
821
|
+
return f"pnp_conn_{_slugify_instance_name(connection_name)}"
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
def _build_pnp_pipeline_key(pipeline_name: str) -> str:
|
|
825
|
+
return f"pnp_pipe_{_slugify_instance_name(pipeline_name)}"
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def _normalize_pipeline_schedule(schedule: str | None) -> str | None:
|
|
829
|
+
if schedule is None:
|
|
830
|
+
return None
|
|
831
|
+
normalized = schedule.strip()
|
|
832
|
+
if not normalized:
|
|
833
|
+
return None
|
|
834
|
+
if not croniter.is_valid(normalized):
|
|
835
|
+
raise HTTPException(status_code=422, detail="Invalid pipeline schedule cron expression")
|
|
836
|
+
return normalized
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def _load_pnp_powerbi_catalog_or_502() -> dict[str, Any]:
|
|
840
|
+
ttl_seconds = max(float(settings.pnp_catalog_cache_ttl_seconds), 0.0)
|
|
841
|
+
cached_catalog = _PNP_CATALOG_CACHE.get("value")
|
|
842
|
+
loaded_at = float(_PNP_CATALOG_CACHE.get("loaded_at") or 0.0)
|
|
843
|
+
now = monotonic()
|
|
844
|
+
|
|
845
|
+
if cached_catalog is not None and ttl_seconds > 0 and (now - loaded_at) < ttl_seconds:
|
|
846
|
+
return cached_catalog
|
|
847
|
+
|
|
848
|
+
try:
|
|
849
|
+
with _PNP_CATALOG_CACHE_LOCK:
|
|
850
|
+
cached_catalog = _PNP_CATALOG_CACHE.get("value")
|
|
851
|
+
loaded_at = float(_PNP_CATALOG_CACHE.get("loaded_at") or 0.0)
|
|
852
|
+
now = monotonic()
|
|
853
|
+
|
|
854
|
+
if cached_catalog is not None and ttl_seconds > 0 and (now - loaded_at) < ttl_seconds:
|
|
855
|
+
return cached_catalog
|
|
856
|
+
|
|
857
|
+
catalog = load_public_microdados_catalog(timeout_seconds=max(float(settings.nilo_timeout_seconds), 30.0))
|
|
858
|
+
_PNP_CATALOG_CACHE["value"] = catalog
|
|
859
|
+
_PNP_CATALOG_CACHE["loaded_at"] = monotonic()
|
|
860
|
+
return catalog
|
|
861
|
+
except Exception as exc:
|
|
862
|
+
stale_catalog = _PNP_CATALOG_CACHE.get("value")
|
|
863
|
+
if stale_catalog is not None:
|
|
864
|
+
return stale_catalog
|
|
865
|
+
raise HTTPException(status_code=502, detail=f"Falha ao consultar o catálogo público de microdados da PNP: {exc}") from exc
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
def _validate_pnp_selection_against_catalog(
|
|
869
|
+
*,
|
|
870
|
+
selected_years: list[str],
|
|
871
|
+
selected_microdados_types: list[str],
|
|
872
|
+
catalog: dict[str, Any],
|
|
873
|
+
) -> None:
|
|
874
|
+
available_years = {str(item).strip() for item in (catalog.get("available_years") or []) if isinstance(item, str)}
|
|
875
|
+
missing_years = [item for item in selected_years if item not in available_years]
|
|
876
|
+
if missing_years:
|
|
877
|
+
raise HTTPException(
|
|
878
|
+
status_code=422,
|
|
879
|
+
detail=f"Anos indisponiveis no catalogo publico da PNP: {', '.join(missing_years)}",
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
types_by_year = {
|
|
883
|
+
str(year): {str(item).strip() for item in items if isinstance(item, str)}
|
|
884
|
+
for year, items in dict(catalog.get("types_by_year") or {}).items()
|
|
885
|
+
}
|
|
886
|
+
for year in selected_years:
|
|
887
|
+
missing_types = [item for item in selected_microdados_types if item not in types_by_year.get(year, set())]
|
|
888
|
+
if missing_types:
|
|
889
|
+
raise HTTPException(
|
|
890
|
+
status_code=422,
|
|
891
|
+
detail=f"Tipos de microdados indisponiveis para o ano {year}: {', '.join(missing_types)}",
|
|
892
|
+
)
|
|
893
|
+
|
|
894
|
+
|
|
895
|
+
def _normalize_pnp_selected_downloads(items: list[dict[str, Any]] | tuple[dict[str, Any], ...] | None) -> list[dict[str, str]]:
|
|
896
|
+
normalized: list[dict[str, str]] = []
|
|
897
|
+
seen: set[tuple[str, str, str]] = set()
|
|
898
|
+
|
|
899
|
+
for item in items or []:
|
|
900
|
+
if not isinstance(item, dict):
|
|
901
|
+
continue
|
|
902
|
+
ano_base = str(item.get("ano_base") or "").strip()
|
|
903
|
+
tipo_microdados = str(item.get("tipo_microdados") or "").strip()
|
|
904
|
+
microdados_url = str(item.get("microdados_url") or "").strip()
|
|
905
|
+
if not ano_base or not tipo_microdados or not microdados_url:
|
|
906
|
+
continue
|
|
907
|
+
key = (ano_base, tipo_microdados, microdados_url)
|
|
908
|
+
if key in seen:
|
|
909
|
+
continue
|
|
910
|
+
seen.add(key)
|
|
911
|
+
normalized.append(
|
|
912
|
+
{
|
|
913
|
+
"ano_base": ano_base,
|
|
914
|
+
"tipo_microdados": tipo_microdados,
|
|
915
|
+
"microdados_url": microdados_url,
|
|
916
|
+
}
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
return normalized
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
def _resolve_pnp_selected_downloads(
|
|
923
|
+
*,
|
|
924
|
+
selected_years: list[str],
|
|
925
|
+
selected_microdados_types: list[str],
|
|
926
|
+
catalog: dict[str, Any],
|
|
927
|
+
) -> list[dict[str, str]]:
|
|
928
|
+
year_rank = {str(item).strip(): index for index, item in enumerate(catalog.get("available_years") or [])}
|
|
929
|
+
type_rank = {item: index for index, item in enumerate(PNP_MICRODADOS_TYPES)}
|
|
930
|
+
selected_years_set = set(selected_years)
|
|
931
|
+
selected_types_set = set(selected_microdados_types)
|
|
932
|
+
|
|
933
|
+
filtered = _normalize_pnp_selected_downloads(
|
|
934
|
+
[
|
|
935
|
+
item
|
|
936
|
+
for item in (catalog.get("items") or [])
|
|
937
|
+
if isinstance(item, dict)
|
|
938
|
+
and str(item.get("ano_base") or "").strip() in selected_years_set
|
|
939
|
+
and str(item.get("tipo_microdados") or "").strip() in selected_types_set
|
|
940
|
+
]
|
|
941
|
+
)
|
|
942
|
+
filtered.sort(
|
|
943
|
+
key=lambda item: (
|
|
944
|
+
year_rank.get(item["ano_base"], len(year_rank)),
|
|
945
|
+
type_rank.get(item["tipo_microdados"], 999),
|
|
946
|
+
item["microdados_url"],
|
|
947
|
+
)
|
|
948
|
+
)
|
|
949
|
+
|
|
950
|
+
expected_pairs = {(year, microdados_type) for year in selected_years for microdados_type in selected_microdados_types}
|
|
951
|
+
resolved_pairs = {(item["ano_base"], item["tipo_microdados"]) for item in filtered}
|
|
952
|
+
missing_pairs = sorted(expected_pairs - resolved_pairs)
|
|
953
|
+
if missing_pairs:
|
|
954
|
+
detail = ", ".join(f"{year} / {microdados_type}" for year, microdados_type in missing_pairs)
|
|
955
|
+
raise HTTPException(
|
|
956
|
+
status_code=422,
|
|
957
|
+
detail=f"O catálogo público nao expôs links de download para o recorte selecionado: {detail}",
|
|
958
|
+
)
|
|
959
|
+
|
|
960
|
+
return filtered
|
|
961
|
+
|
|
962
|
+
|
|
963
|
+
def _build_pnp_connection_payload(
|
|
964
|
+
connection_key: str,
|
|
965
|
+
connection_name: str,
|
|
966
|
+
page_url: str,
|
|
967
|
+
) -> dict[str, Any]:
|
|
968
|
+
request_params: dict[str, Any] = {
|
|
969
|
+
"mode": "powerbi_microdados",
|
|
970
|
+
"entity_type": PNP_CONNECTION_ENTITY,
|
|
971
|
+
"connection_key": connection_key,
|
|
972
|
+
"connection_name": connection_name,
|
|
973
|
+
"selected_source_label": PNP_POWERBI_SOURCE_LABEL,
|
|
974
|
+
"selected_source_group": PNP_POWERBI_GROUP_LABEL,
|
|
975
|
+
"source_path": "powerbi_microdados",
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
return {
|
|
979
|
+
"endpoint_key": f"{connection_key}__connection",
|
|
980
|
+
"description": f"{connection_name} - conexão PNP",
|
|
981
|
+
"page_url": page_url,
|
|
982
|
+
"api_endpoint_url": None,
|
|
983
|
+
"csv_url": None,
|
|
984
|
+
"dictionary_url": None,
|
|
985
|
+
"request_params": request_params,
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
def _build_pnp_pipeline_payload(
|
|
990
|
+
pipeline_key: str,
|
|
991
|
+
pipeline_name: str,
|
|
992
|
+
connection_key: str,
|
|
993
|
+
connection_name: str,
|
|
994
|
+
page_url: str,
|
|
995
|
+
selected_years: list[str],
|
|
996
|
+
selected_microdados_types: list[str],
|
|
997
|
+
selected_downloads: list[dict[str, str]],
|
|
998
|
+
schedule: str | None,
|
|
999
|
+
) -> dict[str, Any]:
|
|
1000
|
+
request_params: dict[str, Any] = {
|
|
1001
|
+
"mode": "powerbi_microdados",
|
|
1002
|
+
"entity_type": PNP_PIPELINE_ENTITY,
|
|
1003
|
+
"pipeline_key": pipeline_key,
|
|
1004
|
+
"pipeline_name": pipeline_name,
|
|
1005
|
+
"connection_key": connection_key,
|
|
1006
|
+
"connection_name": connection_name,
|
|
1007
|
+
"instance_key": pipeline_key,
|
|
1008
|
+
"instance_name": pipeline_name,
|
|
1009
|
+
"selected_years": list(selected_years),
|
|
1010
|
+
"selected_microdados_types": list(selected_microdados_types),
|
|
1011
|
+
"selected_downloads": _normalize_pnp_selected_downloads(selected_downloads),
|
|
1012
|
+
"selected_source_label": PNP_POWERBI_SOURCE_LABEL,
|
|
1013
|
+
"selected_source_group": PNP_POWERBI_GROUP_LABEL,
|
|
1014
|
+
"source_path": "powerbi_microdados",
|
|
1015
|
+
}
|
|
1016
|
+
if schedule and schedule.strip():
|
|
1017
|
+
request_params["schedule"] = schedule.strip()
|
|
1018
|
+
|
|
1019
|
+
return {
|
|
1020
|
+
"endpoint_key": f"{pipeline_key}__powerbi_microdados",
|
|
1021
|
+
"description": f"{pipeline_name} - {PNP_POWERBI_SOURCE_LABEL}",
|
|
1022
|
+
"page_url": page_url,
|
|
1023
|
+
"api_endpoint_url": None,
|
|
1024
|
+
"csv_url": None,
|
|
1025
|
+
"dictionary_url": None,
|
|
1026
|
+
"request_params": request_params,
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
def _row_entity_type(request_params: dict[str, Any]) -> str:
|
|
1031
|
+
entity_type = str(request_params.get("entity_type") or "").strip().lower()
|
|
1032
|
+
if entity_type in {PNP_CONNECTION_ENTITY, PNP_PIPELINE_ENTITY}:
|
|
1033
|
+
return entity_type
|
|
1034
|
+
if request_params.get("selected_years") or request_params.get("selected_microdados_types"):
|
|
1035
|
+
return PNP_PIPELINE_ENTITY
|
|
1036
|
+
return PNP_CONNECTION_ENTITY
|
|
1037
|
+
|
|
1038
|
+
|
|
1039
|
+
def _is_deleted_row(request_params: dict[str, Any]) -> bool:
|
|
1040
|
+
return bool(request_params.get("deleted"))
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
def _group_pnp_connections(
|
|
1044
|
+
rows: list[dict[str, Any]],
|
|
1045
|
+
*,
|
|
1046
|
+
include_deleted: bool = False,
|
|
1047
|
+
include_virtual: bool = True,
|
|
1048
|
+
) -> list[dict[str, Any]]:
|
|
1049
|
+
grouped: dict[str, dict[str, Any]] = {}
|
|
1050
|
+
|
|
1051
|
+
for row in rows:
|
|
1052
|
+
request_params = dict(row.get("request_params") or {})
|
|
1053
|
+
if str(request_params.get("mode") or "").strip().lower() != "powerbi_microdados":
|
|
1054
|
+
continue
|
|
1055
|
+
if not include_deleted and _is_deleted_row(request_params):
|
|
1056
|
+
continue
|
|
1057
|
+
|
|
1058
|
+
entity_type = _row_entity_type(request_params)
|
|
1059
|
+
if entity_type == PNP_CONNECTION_ENTITY:
|
|
1060
|
+
connection_key = str(request_params.get("connection_key") or "").strip()
|
|
1061
|
+
connection_name = str(request_params.get("connection_name") or connection_key).strip()
|
|
1062
|
+
elif include_virtual:
|
|
1063
|
+
connection_key = str(request_params.get("connection_key") or request_params.get("instance_key") or "").strip()
|
|
1064
|
+
connection_name = str(
|
|
1065
|
+
request_params.get("connection_name") or request_params.get("instance_name") or connection_key
|
|
1066
|
+
).strip()
|
|
1067
|
+
else:
|
|
1068
|
+
continue
|
|
1069
|
+
|
|
1070
|
+
if not connection_key:
|
|
1071
|
+
continue
|
|
1072
|
+
|
|
1073
|
+
connection = grouped.setdefault(
|
|
1074
|
+
connection_key,
|
|
1075
|
+
{
|
|
1076
|
+
"connection_key": connection_key,
|
|
1077
|
+
"connection_name": connection_name or connection_key,
|
|
1078
|
+
"connector_id": "pnp",
|
|
1079
|
+
"page_url": row.get("page_url"),
|
|
1080
|
+
"is_active": False,
|
|
1081
|
+
"validation_status": "pending",
|
|
1082
|
+
"validation_message": "conexão sem validação recente.",
|
|
1083
|
+
"pipeline_count": 0,
|
|
1084
|
+
"pipelines": [],
|
|
1085
|
+
"created_at": row.get("created_at"),
|
|
1086
|
+
"updated_at": row.get("updated_at"),
|
|
1087
|
+
},
|
|
1088
|
+
)
|
|
1089
|
+
|
|
1090
|
+
connection["is_active"] = bool(connection["is_active"] or row.get("is_active"))
|
|
1091
|
+
if row.get("page_url"):
|
|
1092
|
+
connection["page_url"] = row.get("page_url")
|
|
1093
|
+
if row.get("updated_at") and (connection["updated_at"] is None or row.get("updated_at") > connection["updated_at"]):
|
|
1094
|
+
connection["updated_at"] = row.get("updated_at")
|
|
1095
|
+
|
|
1096
|
+
if entity_type == PNP_PIPELINE_ENTITY:
|
|
1097
|
+
pipeline_key = str(request_params.get("pipeline_key") or request_params.get("instance_key") or "").strip()
|
|
1098
|
+
pipeline_id = str(request_params.get("pipeline_id") or "").strip()
|
|
1099
|
+
pipeline_name = str(request_params.get("pipeline_name") or request_params.get("instance_name") or pipeline_key).strip()
|
|
1100
|
+
if pipeline_key and pipeline_key not in {item["pipeline_key"] for item in connection["pipelines"]}:
|
|
1101
|
+
connection["pipelines"].append(
|
|
1102
|
+
{
|
|
1103
|
+
"pipeline_id": pipeline_id or None,
|
|
1104
|
+
"pipeline_key": pipeline_key,
|
|
1105
|
+
"pipeline_name": pipeline_name or pipeline_key,
|
|
1106
|
+
}
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
for connection in grouped.values():
|
|
1110
|
+
connection["pipelines"].sort(key=lambda item: item["pipeline_name"].lower())
|
|
1111
|
+
connection["pipeline_count"] = len(connection["pipelines"])
|
|
1112
|
+
|
|
1113
|
+
return sorted(grouped.values(), key=lambda item: item["connection_name"].lower())
|
|
1114
|
+
|
|
1115
|
+
|
|
1116
|
+
def _group_pnp_instances(rows: list[dict[str, Any]], *, include_deleted: bool = False) -> list[dict[str, Any]]:
|
|
1117
|
+
grouped: dict[str, dict[str, Any]] = {}
|
|
1118
|
+
|
|
1119
|
+
for row in rows:
|
|
1120
|
+
request_params = dict(row.get("request_params") or {})
|
|
1121
|
+
ingestion_mode = str(request_params.get("mode") or "").strip().lower()
|
|
1122
|
+
if ingestion_mode != "powerbi_microdados":
|
|
1123
|
+
continue
|
|
1124
|
+
if _row_entity_type(request_params) != PNP_PIPELINE_ENTITY:
|
|
1125
|
+
continue
|
|
1126
|
+
if not include_deleted and _is_deleted_row(request_params):
|
|
1127
|
+
continue
|
|
1128
|
+
|
|
1129
|
+
instance_key = str(request_params.get("pipeline_key") or request_params.get("instance_key") or "").strip()
|
|
1130
|
+
if not instance_key:
|
|
1131
|
+
continue
|
|
1132
|
+
|
|
1133
|
+
instance = grouped.setdefault(
|
|
1134
|
+
instance_key,
|
|
1135
|
+
{
|
|
1136
|
+
"pipeline_id": str(request_params.get("pipeline_id") or "").strip() or None,
|
|
1137
|
+
"instance_key": instance_key,
|
|
1138
|
+
"instance_name": str(request_params.get("pipeline_name") or request_params.get("instance_name") or instance_key),
|
|
1139
|
+
"connector_id": "pnp",
|
|
1140
|
+
"ingestion_mode": "powerbi_microdados",
|
|
1141
|
+
"connection_key": str(request_params.get("connection_key") or instance_key),
|
|
1142
|
+
"connection_name": str(
|
|
1143
|
+
request_params.get("connection_name") or request_params.get("instance_name") or instance_key
|
|
1144
|
+
),
|
|
1145
|
+
"schedule": request_params.get("schedule"),
|
|
1146
|
+
"is_active": False,
|
|
1147
|
+
"source_count": 0,
|
|
1148
|
+
"selection_count": 0,
|
|
1149
|
+
"download_count": 0,
|
|
1150
|
+
"selected_years": [],
|
|
1151
|
+
"selected_microdados_types": [],
|
|
1152
|
+
"selected_downloads": [],
|
|
1153
|
+
"selected_endpoints": [],
|
|
1154
|
+
"endpoint_tables": [],
|
|
1155
|
+
"endpoints": [],
|
|
1156
|
+
"created_at": row.get("created_at"),
|
|
1157
|
+
"updated_at": row.get("updated_at"),
|
|
1158
|
+
},
|
|
1159
|
+
)
|
|
1160
|
+
|
|
1161
|
+
selected_years = [
|
|
1162
|
+
str(item).strip()
|
|
1163
|
+
for item in (request_params.get("selected_years") or [])
|
|
1164
|
+
if isinstance(item, str) and item.strip()
|
|
1165
|
+
]
|
|
1166
|
+
selected_microdados_types = [
|
|
1167
|
+
str(item).strip()
|
|
1168
|
+
for item in (request_params.get("selected_microdados_types") or [])
|
|
1169
|
+
if isinstance(item, str) and item.strip()
|
|
1170
|
+
]
|
|
1171
|
+
selected_downloads = _normalize_pnp_selected_downloads(request_params.get("selected_downloads"))
|
|
1172
|
+
selected_endpoints = [
|
|
1173
|
+
str(item).strip()
|
|
1174
|
+
for item in (request_params.get("selected_endpoints") or [])
|
|
1175
|
+
if isinstance(item, str) and item.strip()
|
|
1176
|
+
]
|
|
1177
|
+
endpoint_tables = [
|
|
1178
|
+
dict(item)
|
|
1179
|
+
for item in (request_params.get("endpoint_tables") or [])
|
|
1180
|
+
if isinstance(item, dict)
|
|
1181
|
+
]
|
|
1182
|
+
|
|
1183
|
+
instance["is_active"] = bool(instance["is_active"] or row.get("is_active"))
|
|
1184
|
+
if request_params.get("pipeline_id"):
|
|
1185
|
+
instance["pipeline_id"] = str(request_params.get("pipeline_id"))
|
|
1186
|
+
if request_params.get("schedule"):
|
|
1187
|
+
instance["schedule"] = request_params.get("schedule")
|
|
1188
|
+
if row.get("updated_at") and (instance["updated_at"] is None or row.get("updated_at") > instance["updated_at"]):
|
|
1189
|
+
instance["updated_at"] = row.get("updated_at")
|
|
1190
|
+
|
|
1191
|
+
instance["selected_years"] = sorted({*instance["selected_years"], *selected_years}, reverse=True)
|
|
1192
|
+
instance["selected_microdados_types"] = sorted(
|
|
1193
|
+
{*instance["selected_microdados_types"], *selected_microdados_types},
|
|
1194
|
+
key=lambda item: (PNP_MICRODADOS_TYPES.index(item) if item in PNP_MICRODADOS_TYPES else 999, item),
|
|
1195
|
+
)
|
|
1196
|
+
instance["selected_downloads"] = _normalize_pnp_selected_downloads([*instance["selected_downloads"], *selected_downloads])
|
|
1197
|
+
instance["selected_endpoints"] = sorted({*instance["selected_endpoints"], *selected_endpoints})
|
|
1198
|
+
existing_endpoint_keys = {str(item.get("endpoint_key") or "") for item in instance["endpoint_tables"]}
|
|
1199
|
+
for endpoint_table in endpoint_tables:
|
|
1200
|
+
endpoint_key = str(endpoint_table.get("endpoint_key") or "").strip()
|
|
1201
|
+
if endpoint_key and endpoint_key not in existing_endpoint_keys:
|
|
1202
|
+
instance["endpoint_tables"].append(endpoint_table)
|
|
1203
|
+
existing_endpoint_keys.add(endpoint_key)
|
|
1204
|
+
instance["endpoints"].append(
|
|
1205
|
+
{
|
|
1206
|
+
"id": row.get("id"),
|
|
1207
|
+
"endpoint_key": row.get("endpoint_key"),
|
|
1208
|
+
"page_url": row.get("page_url"),
|
|
1209
|
+
"is_active": row.get("is_active"),
|
|
1210
|
+
"selected_years": selected_years,
|
|
1211
|
+
"selected_microdados_types": selected_microdados_types,
|
|
1212
|
+
"selected_downloads": selected_downloads,
|
|
1213
|
+
"source_label": PNP_POWERBI_SOURCE_LABEL,
|
|
1214
|
+
"source_group": PNP_POWERBI_GROUP_LABEL,
|
|
1215
|
+
"source_path": "powerbi_microdados",
|
|
1216
|
+
}
|
|
1217
|
+
)
|
|
1218
|
+
|
|
1219
|
+
for instance in grouped.values():
|
|
1220
|
+
instance["endpoint_tables"].sort(key=lambda item: str(item.get("endpoint_key") or ""))
|
|
1221
|
+
instance["endpoints"].sort(key=lambda item: item["endpoint_key"])
|
|
1222
|
+
instance["download_count"] = len(instance["selected_downloads"])
|
|
1223
|
+
instance["selection_count"] = instance["download_count"] or (
|
|
1224
|
+
len(instance["selected_years"]) * len(instance["selected_microdados_types"])
|
|
1225
|
+
)
|
|
1226
|
+
instance["source_count"] = instance["selection_count"]
|
|
1227
|
+
|
|
1228
|
+
return sorted(grouped.values(), key=lambda item: item["instance_name"].lower())
|
|
1229
|
+
|
|
1230
|
+
|
|
1231
|
+
def _load_pnp_instance_rows(instance_key: str, *, include_deleted: bool = False) -> list[dict[str, Any]]:
|
|
1232
|
+
try:
|
|
1233
|
+
return pnp_instance_repository.load_instance_rows(
|
|
1234
|
+
_db_connect,
|
|
1235
|
+
instance_key,
|
|
1236
|
+
include_deleted=include_deleted,
|
|
1237
|
+
)
|
|
1238
|
+
except LookupError as exc:
|
|
1239
|
+
raise HTTPException(status_code=404, detail="PNP instance not found") from exc
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
def _load_all_pnp_rows(*, include_deleted: bool = False) -> list[dict[str, Any]]:
|
|
1243
|
+
return pnp_instance_repository.load_all_rows(_db_connect, include_deleted=include_deleted)
|
|
1244
|
+
|
|
1245
|
+
|
|
1246
|
+
def _load_pnp_connection(connection_key: str) -> dict[str, Any]:
|
|
1247
|
+
try:
|
|
1248
|
+
row = pnp_instance_repository.load_connection(_db_connect, connection_key)
|
|
1249
|
+
except pnp_instance_repository.PnpConnectionNotFoundError as exc:
|
|
1250
|
+
raise HTTPException(status_code=404, detail="PNP connection not found") from exc
|
|
1251
|
+
|
|
1252
|
+
grouped = _group_pnp_connections([row], include_virtual=False)
|
|
1253
|
+
if grouped:
|
|
1254
|
+
return grouped[0]
|
|
1255
|
+
raise HTTPException(status_code=404, detail="PNP connection not found")
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
def _connection_health_snapshot() -> dict[str, str]:
|
|
1259
|
+
cached_catalog = _PNP_CATALOG_CACHE.get("value")
|
|
1260
|
+
if isinstance(cached_catalog, dict):
|
|
1261
|
+
page_url = str(cached_catalog.get("page_url") or DEFAULT_PNP_POWERBI_REPORT_URL)
|
|
1262
|
+
return {
|
|
1263
|
+
"validation_status": "validated",
|
|
1264
|
+
"validation_message": "Conector PNP validado a partir do catalogo em cache.",
|
|
1265
|
+
"page_url": page_url,
|
|
1266
|
+
}
|
|
1267
|
+
return {
|
|
1268
|
+
"validation_status": "pending",
|
|
1269
|
+
"validation_message": "A validação online ainda nao foi executada nesta sessão da API.",
|
|
1270
|
+
"page_url": DEFAULT_PNP_POWERBI_REPORT_URL,
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
|
|
1274
|
+
def _enrich_connections_with_health(connections: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
1275
|
+
snapshot = _connection_health_snapshot()
|
|
1276
|
+
items: list[dict[str, Any]] = []
|
|
1277
|
+
for connection in connections:
|
|
1278
|
+
items.append(
|
|
1279
|
+
{
|
|
1280
|
+
**connection,
|
|
1281
|
+
"validation_status": snapshot["validation_status"],
|
|
1282
|
+
"validation_message": snapshot["validation_message"],
|
|
1283
|
+
"page_url": connection.get("page_url") or snapshot["page_url"],
|
|
1284
|
+
}
|
|
1285
|
+
)
|
|
1286
|
+
return items
|
|
1287
|
+
|
|
1288
|
+
|
|
1289
|
+
def _load_pnp_instance(instance_key: str) -> dict[str, Any]:
|
|
1290
|
+
grouped = _group_pnp_instances(_load_pnp_instance_rows(instance_key))
|
|
1291
|
+
if not grouped:
|
|
1292
|
+
raise HTTPException(status_code=404, detail="PNP instance not found")
|
|
1293
|
+
return grouped[0]
|
|
1294
|
+
|
|
1295
|
+
|
|
1296
|
+
def _delete_pnp_instance(instance_key: str) -> dict[str, Any]:
|
|
1297
|
+
try:
|
|
1298
|
+
return pnp_instance_repository.delete_instance(_db_connect, instance_key=instance_key)
|
|
1299
|
+
except LookupError as exc:
|
|
1300
|
+
raise HTTPException(status_code=404, detail="PNP instance not found") from exc
|
|
1301
|
+
|
|
1302
|
+
|
|
1303
|
+
def _delete_pnp_connection(connection_key: str) -> dict[str, Any]:
|
|
1304
|
+
try:
|
|
1305
|
+
return pnp_instance_repository.delete_connection(_db_connect, connection_key=connection_key)
|
|
1306
|
+
except pnp_instance_repository.PnpConnectionNotFoundError as exc:
|
|
1307
|
+
raise HTTPException(status_code=404, detail="PNP connection not found") from exc
|
|
1308
|
+
|
|
1309
|
+
|
|
1310
|
+
def _safe_parse_json_text(value: object) -> dict[str, Any]:
|
|
1311
|
+
if not isinstance(value, str) or not value.strip():
|
|
1312
|
+
return {}
|
|
1313
|
+
try:
|
|
1314
|
+
parsed = json.loads(value)
|
|
1315
|
+
except json.JSONDecodeError:
|
|
1316
|
+
return {}
|
|
1317
|
+
return parsed if isinstance(parsed, dict) else {}
|
|
1318
|
+
|
|
1319
|
+
|
|
1320
|
+
def _describe_pnp_diagnostic(item: dict[str, Any]) -> dict[str, Any]:
|
|
1321
|
+
status = str(item.get("status") or "missing").strip().lower()
|
|
1322
|
+
raw_record_count = _coerce_int(item.get("raw_record_count")) or 0
|
|
1323
|
+
staging_record_count = _coerce_int(item.get("staging_record_count")) or 0
|
|
1324
|
+
curated_record_count = _coerce_int(item.get("curated_record_count")) or 0
|
|
1325
|
+
|
|
1326
|
+
if curated_record_count > 0:
|
|
1327
|
+
return {
|
|
1328
|
+
"operational_status": "curated_ready",
|
|
1329
|
+
"severity": "ready",
|
|
1330
|
+
"message": "A pipeline ja publicou o endpoint na camada curated.",
|
|
1331
|
+
}
|
|
1332
|
+
|
|
1333
|
+
if staging_record_count > 0:
|
|
1334
|
+
return {
|
|
1335
|
+
"operational_status": "staging_ready",
|
|
1336
|
+
"severity": "ready",
|
|
1337
|
+
"message": "O endpoint ja foi deduplicado e materializado em staging.",
|
|
1338
|
+
}
|
|
1339
|
+
|
|
1340
|
+
if status in {"running", "queued"}:
|
|
1341
|
+
return {
|
|
1342
|
+
"operational_status": "running",
|
|
1343
|
+
"severity": "pending",
|
|
1344
|
+
"message": "O endpoint esta em processamento na execucao atual.",
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
if status in {"ok", "success", "cataloged"}:
|
|
1348
|
+
if raw_record_count > 0:
|
|
1349
|
+
return {
|
|
1350
|
+
"operational_status": "raw_loaded",
|
|
1351
|
+
"severity": "ready",
|
|
1352
|
+
"message": "Microdados públicos validados e persistidos em raw.",
|
|
1353
|
+
}
|
|
1354
|
+
return {
|
|
1355
|
+
"operational_status": "validated",
|
|
1356
|
+
"severity": "ready",
|
|
1357
|
+
"message": "Catálogo público resolvido e pronto para ingestão.",
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
if status == "error":
|
|
1361
|
+
return {
|
|
1362
|
+
"operational_status": "error",
|
|
1363
|
+
"severity": "danger",
|
|
1364
|
+
"message": "A leitura dos microdados públicos falhou.",
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
return {
|
|
1368
|
+
"operational_status": "missing",
|
|
1369
|
+
"severity": "pending",
|
|
1370
|
+
"message": "A fonte ainda nao produziu manifesto recente.",
|
|
1371
|
+
}
|
|
1372
|
+
|
|
1373
|
+
|
|
1374
|
+
def _summarize_pnp_diagnostics(diagnostics: list[dict[str, Any]]) -> dict[str, Any]:
|
|
1375
|
+
summary = {
|
|
1376
|
+
"total": len(diagnostics),
|
|
1377
|
+
"ready": 0,
|
|
1378
|
+
"attention": 0,
|
|
1379
|
+
"missing": 0,
|
|
1380
|
+
"raw_loaded": 0,
|
|
1381
|
+
"validated": 0,
|
|
1382
|
+
"last_updated_at": None,
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
latest_timestamp: datetime | None = None
|
|
1386
|
+
for item in diagnostics:
|
|
1387
|
+
operational_status = str(item.get("operational_status") or "missing")
|
|
1388
|
+
if operational_status == "raw_loaded":
|
|
1389
|
+
summary["raw_loaded"] += 1
|
|
1390
|
+
summary["ready"] += 1
|
|
1391
|
+
elif operational_status == "validated":
|
|
1392
|
+
summary["validated"] += 1
|
|
1393
|
+
summary["ready"] += 1
|
|
1394
|
+
elif operational_status == "missing":
|
|
1395
|
+
summary["missing"] += 1
|
|
1396
|
+
else:
|
|
1397
|
+
summary["attention"] += 1
|
|
1398
|
+
|
|
1399
|
+
updated_at = _parse_iso_datetime(item.get("updated_at"))
|
|
1400
|
+
if updated_at and (latest_timestamp is None or updated_at > latest_timestamp):
|
|
1401
|
+
latest_timestamp = updated_at
|
|
1402
|
+
summary["last_updated_at"] = item.get("updated_at")
|
|
1403
|
+
|
|
1404
|
+
return summary
|
|
1405
|
+
|
|
1406
|
+
|
|
1407
|
+
def _load_pnp_instance_diagnostics(instance_key: str) -> list[dict[str, Any]]:
|
|
1408
|
+
with _db_connect() as conn, conn.cursor() as cur:
|
|
1409
|
+
cur.execute(
|
|
1410
|
+
"""
|
|
1411
|
+
WITH pipeline_endpoints AS (
|
|
1412
|
+
SELECT
|
|
1413
|
+
pe.instance_key,
|
|
1414
|
+
pe.endpoint_key,
|
|
1415
|
+
et.endpoint_name,
|
|
1416
|
+
et.tipo_microdados
|
|
1417
|
+
FROM raw.pnp_pipeline_endpoints pe
|
|
1418
|
+
JOIN raw.pnp_endpoint_tables et
|
|
1419
|
+
ON et.endpoint_key = pe.endpoint_key
|
|
1420
|
+
WHERE pe.instance_key = %s
|
|
1421
|
+
AND pe.is_active = TRUE
|
|
1422
|
+
AND et.is_active = TRUE
|
|
1423
|
+
),
|
|
1424
|
+
endpoint_runs AS (
|
|
1425
|
+
SELECT
|
|
1426
|
+
pe.endpoint_key,
|
|
1427
|
+
pe.endpoint_name,
|
|
1428
|
+
pe.tipo_microdados,
|
|
1429
|
+
r.run_id,
|
|
1430
|
+
r.status AS run_status,
|
|
1431
|
+
r.started_at,
|
|
1432
|
+
r.finished_at,
|
|
1433
|
+
d.microdados_url AS source_url,
|
|
1434
|
+
d.status AS download_status,
|
|
1435
|
+
d.error_message AS download_error,
|
|
1436
|
+
d.row_count_raw,
|
|
1437
|
+
COALESCE(d.finished_at, d.started_at, r.finished_at, r.started_at) AS updated_at,
|
|
1438
|
+
CASE pe.endpoint_key
|
|
1439
|
+
WHEN 'matriculas' THEN (SELECT COUNT(*) FROM raw.pnp_matriculas_src src WHERE src.run_id = r.run_id)
|
|
1440
|
+
WHEN 'eficiencia_academica' THEN (SELECT COUNT(*) FROM raw.pnp_eficiencia_academica_src src WHERE src.run_id = r.run_id)
|
|
1441
|
+
WHEN 'servidores' THEN (SELECT COUNT(*) FROM raw.pnp_servidores_src src WHERE src.run_id = r.run_id)
|
|
1442
|
+
WHEN 'financeiro' THEN (SELECT COUNT(*) FROM raw.pnp_financeiro_src src WHERE src.run_id = r.run_id)
|
|
1443
|
+
ELSE 0
|
|
1444
|
+
END AS raw_record_count,
|
|
1445
|
+
CASE pe.endpoint_key
|
|
1446
|
+
WHEN 'matriculas' THEN (SELECT COUNT(*) FROM staging.pnp_matriculas src WHERE src.run_id = r.run_id)
|
|
1447
|
+
WHEN 'eficiencia_academica' THEN (SELECT COUNT(*) FROM staging.pnp_eficiencia_academica src WHERE src.run_id = r.run_id)
|
|
1448
|
+
WHEN 'servidores' THEN (SELECT COUNT(*) FROM staging.pnp_servidores src WHERE src.run_id = r.run_id)
|
|
1449
|
+
WHEN 'financeiro' THEN (SELECT COUNT(*) FROM staging.pnp_financeiro src WHERE src.run_id = r.run_id)
|
|
1450
|
+
ELSE 0
|
|
1451
|
+
END AS staging_record_count,
|
|
1452
|
+
CASE pe.endpoint_key
|
|
1453
|
+
WHEN 'matriculas' THEN (SELECT COUNT(*) FROM curated.vw_pnp_matriculas_perfil src WHERE src.run_id = r.run_id)
|
|
1454
|
+
WHEN 'eficiencia_academica' THEN (SELECT COUNT(*) FROM curated.vw_pnp_eficiencia_situacao src WHERE src.run_id = r.run_id)
|
|
1455
|
+
WHEN 'servidores' THEN (SELECT COUNT(*) FROM curated.vw_pnp_servidores_quadro src WHERE src.run_id = r.run_id)
|
|
1456
|
+
WHEN 'financeiro' THEN (SELECT COUNT(*) FROM curated.vw_pnp_financeiro_execucao src WHERE src.run_id = r.run_id)
|
|
1457
|
+
ELSE 0
|
|
1458
|
+
END AS curated_record_count,
|
|
1459
|
+
(
|
|
1460
|
+
SELECT COUNT(*)
|
|
1461
|
+
FROM raw.pnp_catalog_entries c
|
|
1462
|
+
WHERE c.run_id = r.run_id
|
|
1463
|
+
AND c.tipo_microdados = pe.tipo_microdados
|
|
1464
|
+
) AS catalog_entry_count,
|
|
1465
|
+
ROW_NUMBER() OVER (
|
|
1466
|
+
PARTITION BY pe.endpoint_key
|
|
1467
|
+
ORDER BY COALESCE(d.finished_at, d.started_at, r.finished_at, r.started_at) DESC, r.run_id DESC
|
|
1468
|
+
) AS row_num
|
|
1469
|
+
FROM pipeline_endpoints pe
|
|
1470
|
+
JOIN raw.pnp_runs r
|
|
1471
|
+
ON r.instance_key = pe.instance_key
|
|
1472
|
+
LEFT JOIN LATERAL (
|
|
1473
|
+
SELECT
|
|
1474
|
+
microdados_url,
|
|
1475
|
+
status,
|
|
1476
|
+
error_message,
|
|
1477
|
+
row_count_raw,
|
|
1478
|
+
started_at,
|
|
1479
|
+
finished_at
|
|
1480
|
+
FROM raw.pnp_downloads d
|
|
1481
|
+
WHERE d.run_id = r.run_id
|
|
1482
|
+
AND d.tipo_microdados = pe.tipo_microdados
|
|
1483
|
+
ORDER BY COALESCE(d.finished_at, d.started_at) DESC, d.download_id DESC
|
|
1484
|
+
LIMIT 1
|
|
1485
|
+
) d ON TRUE
|
|
1486
|
+
WHERE d.microdados_url IS NOT NULL
|
|
1487
|
+
OR EXISTS (
|
|
1488
|
+
SELECT 1
|
|
1489
|
+
FROM raw.pnp_catalog_entries c
|
|
1490
|
+
WHERE c.run_id = r.run_id
|
|
1491
|
+
AND c.tipo_microdados = pe.tipo_microdados
|
|
1492
|
+
)
|
|
1493
|
+
)
|
|
1494
|
+
SELECT
|
|
1495
|
+
pe.endpoint_key,
|
|
1496
|
+
pe.endpoint_name,
|
|
1497
|
+
pe.tipo_microdados,
|
|
1498
|
+
er.run_id AS diagnostic_run_id,
|
|
1499
|
+
er.source_url,
|
|
1500
|
+
er.updated_at,
|
|
1501
|
+
er.run_status,
|
|
1502
|
+
er.download_status,
|
|
1503
|
+
er.download_error,
|
|
1504
|
+
er.row_count_raw,
|
|
1505
|
+
er.raw_record_count,
|
|
1506
|
+
er.staging_record_count,
|
|
1507
|
+
er.curated_record_count,
|
|
1508
|
+
er.catalog_entry_count
|
|
1509
|
+
FROM pipeline_endpoints pe
|
|
1510
|
+
LEFT JOIN endpoint_runs er
|
|
1511
|
+
ON er.endpoint_key = pe.endpoint_key
|
|
1512
|
+
AND er.row_num = 1
|
|
1513
|
+
ORDER BY pe.endpoint_key
|
|
1514
|
+
""",
|
|
1515
|
+
(instance_key,),
|
|
1516
|
+
)
|
|
1517
|
+
rows = [dict(row) for row in cur.fetchall()]
|
|
1518
|
+
|
|
1519
|
+
items: list[dict[str, Any]] = []
|
|
1520
|
+
for row in rows:
|
|
1521
|
+
diagnostic = {
|
|
1522
|
+
"endpoint_key": row.get("endpoint_key"),
|
|
1523
|
+
"endpoint_name": row.get("endpoint_name"),
|
|
1524
|
+
"tipo_microdados": row.get("tipo_microdados"),
|
|
1525
|
+
"ingestion_mode": "powerbi_microdados",
|
|
1526
|
+
"source_label": PNP_POWERBI_SOURCE_LABEL,
|
|
1527
|
+
"source_group": PNP_POWERBI_GROUP_LABEL,
|
|
1528
|
+
"source_path": "powerbi_microdados",
|
|
1529
|
+
"run_id": row.get("diagnostic_run_id"),
|
|
1530
|
+
"source_url": row.get("source_url"),
|
|
1531
|
+
"updated_at": row.get("updated_at"),
|
|
1532
|
+
"status": row.get("download_status") or ("cataloged" if _coerce_int(row.get("catalog_entry_count")) else "missing"),
|
|
1533
|
+
"row_count": row.get("row_count_raw") or row.get("raw_record_count"),
|
|
1534
|
+
"selected_years": [],
|
|
1535
|
+
"selected_microdados_types": [row.get("tipo_microdados")] if row.get("tipo_microdados") else [],
|
|
1536
|
+
"downloads": [],
|
|
1537
|
+
"raw_run_id": row.get("diagnostic_run_id"),
|
|
1538
|
+
"raw_record_count": row.get("raw_record_count"),
|
|
1539
|
+
"staging_record_count": row.get("staging_record_count"),
|
|
1540
|
+
"curated_record_count": row.get("curated_record_count"),
|
|
1541
|
+
"raw_updated_at": row.get("updated_at"),
|
|
1542
|
+
"error": row.get("download_error") if row.get("run_status") != "success" else None,
|
|
1543
|
+
}
|
|
1544
|
+
diagnostic.update(_describe_pnp_diagnostic(diagnostic))
|
|
1545
|
+
items.append(diagnostic)
|
|
1546
|
+
|
|
1547
|
+
return items
|
|
1548
|
+
|
|
1549
|
+
|
|
1550
|
+
def _build_pnp_runtime_event_message(task_id: str, status: str, details: dict[str, Any], error_message: str | None) -> str:
|
|
1551
|
+
if error_message:
|
|
1552
|
+
return error_message
|
|
1553
|
+
if details.get("error"):
|
|
1554
|
+
return str(details["error"])
|
|
1555
|
+
task_meta = PNP_RUNTIME_TASK_META.get(task_id, {})
|
|
1556
|
+
if task_meta.get("message"):
|
|
1557
|
+
return str(task_meta["message"])
|
|
1558
|
+
return str(status or task_id or "unknown").replace("_", " ")
|
|
1559
|
+
|
|
1560
|
+
|
|
1561
|
+
def _load_pnp_instance_run_events(instance_key: str, limit: int = 12) -> list[dict[str, Any]]:
|
|
1562
|
+
with _db_connect() as conn, conn.cursor() as cur:
|
|
1563
|
+
cur.execute(
|
|
1564
|
+
"""
|
|
1565
|
+
SELECT
|
|
1566
|
+
steps.run_id,
|
|
1567
|
+
steps.airflow_task_id,
|
|
1568
|
+
steps.status,
|
|
1569
|
+
steps.started_at,
|
|
1570
|
+
steps.finished_at,
|
|
1571
|
+
steps.records_affected,
|
|
1572
|
+
steps.error_message,
|
|
1573
|
+
steps.details_json
|
|
1574
|
+
FROM raw.pnp_run_steps steps
|
|
1575
|
+
JOIN raw.pnp_runs runs
|
|
1576
|
+
ON runs.run_id = steps.run_id
|
|
1577
|
+
WHERE runs.instance_key = %s
|
|
1578
|
+
AND steps.airflow_task_id <> 'register_run'
|
|
1579
|
+
ORDER BY COALESCE(steps.finished_at, steps.started_at) DESC NULLS LAST, steps.step_id DESC
|
|
1580
|
+
LIMIT %s
|
|
1581
|
+
""",
|
|
1582
|
+
(instance_key, limit),
|
|
1583
|
+
)
|
|
1584
|
+
rows = [dict(row) for row in cur.fetchall()]
|
|
1585
|
+
|
|
1586
|
+
items: list[dict[str, Any]] = []
|
|
1587
|
+
for row in rows:
|
|
1588
|
+
status = str(row.get("status") or "").strip()
|
|
1589
|
+
task_id = str(row.get("airflow_task_id") or "")
|
|
1590
|
+
event_meta = PNP_RUNTIME_TASK_META.get(task_id, {})
|
|
1591
|
+
details = dict(row.get("details_json") or {})
|
|
1592
|
+
state = "neutral"
|
|
1593
|
+
if status == "success":
|
|
1594
|
+
state = "success"
|
|
1595
|
+
elif status in {"failed", "upstream_failed"}:
|
|
1596
|
+
state = "failed"
|
|
1597
|
+
elif status in {"running", "queued"}:
|
|
1598
|
+
state = "pending"
|
|
1599
|
+
items.append(
|
|
1600
|
+
{
|
|
1601
|
+
"run_id": row.get("run_id"),
|
|
1602
|
+
"status": status,
|
|
1603
|
+
"stage": event_meta.get("stage", task_id or "unknown"),
|
|
1604
|
+
"stage_label": event_meta.get("stage_label", str(task_id or status).replace("_", " ")),
|
|
1605
|
+
"state": state,
|
|
1606
|
+
"message": _build_pnp_runtime_event_message(task_id, status, details, row.get("error_message")),
|
|
1607
|
+
"timestamp": row.get("finished_at") or row.get("started_at"),
|
|
1608
|
+
"started_at": row.get("started_at"),
|
|
1609
|
+
"finished_at": row.get("finished_at"),
|
|
1610
|
+
"extracted_count": row.get("records_affected") if task_id == "extract_raw" else None,
|
|
1611
|
+
"loaded_count": row.get("records_affected"),
|
|
1612
|
+
"endpoint_count": _coerce_int(details.get("endpoint_count")),
|
|
1613
|
+
"asset_count": _coerce_int(details.get("asset_count")),
|
|
1614
|
+
"raw_count": _coerce_int(details.get("raw_count")) or _coerce_int(details.get("loaded_count")),
|
|
1615
|
+
"download_count": _coerce_int(details.get("download_count")) or _coerce_int(details.get("selected_download_count")),
|
|
1616
|
+
"error": row.get("error_message") or details.get("error"),
|
|
1617
|
+
}
|
|
1618
|
+
)
|
|
1619
|
+
|
|
1620
|
+
return items
|
|
1621
|
+
|
|
1622
|
+
|
|
1623
|
+
def _build_pnp_ingestion_summary(run_events: list[dict[str, Any]]) -> dict[str, Any]:
|
|
1624
|
+
if not run_events:
|
|
1625
|
+
return {
|
|
1626
|
+
"status": "not_started",
|
|
1627
|
+
"message": "A instância ainda não gerou eventos recentes de extração ou validação.",
|
|
1628
|
+
"last_event_at": None,
|
|
1629
|
+
"latest_success_at": None,
|
|
1630
|
+
"latest_success_stage": None,
|
|
1631
|
+
"stages": {},
|
|
1632
|
+
}
|
|
1633
|
+
|
|
1634
|
+
latest_by_stage: dict[str, dict[str, Any]] = {}
|
|
1635
|
+
latest_success: dict[str, Any] | None = None
|
|
1636
|
+
latest_issue: dict[str, Any] | None = None
|
|
1637
|
+
latest_event = run_events[0]
|
|
1638
|
+
|
|
1639
|
+
for item in run_events:
|
|
1640
|
+
stage = str(item.get("stage") or "unknown")
|
|
1641
|
+
latest_by_stage.setdefault(stage, item)
|
|
1642
|
+
if latest_success is None and item.get("state") == "success":
|
|
1643
|
+
latest_success = item
|
|
1644
|
+
if latest_issue is None and item.get("state") == "failed":
|
|
1645
|
+
latest_issue = item
|
|
1646
|
+
|
|
1647
|
+
curated_event = latest_by_stage.get("build_curated_views")
|
|
1648
|
+
staging_event = latest_by_stage.get("materialize_staging")
|
|
1649
|
+
raw_event = latest_by_stage.get("extract_raw")
|
|
1650
|
+
|
|
1651
|
+
if latest_event.get("state") == "pending":
|
|
1652
|
+
status = "running"
|
|
1653
|
+
message = "A instância tem uma execucao ativa no momento."
|
|
1654
|
+
elif curated_event and curated_event.get("state") == "success":
|
|
1655
|
+
status = "curated_ready"
|
|
1656
|
+
message = "A instância já publicou dados para consumo em curated."
|
|
1657
|
+
elif staging_event and staging_event.get("state") == "success":
|
|
1658
|
+
status = "staging_ready"
|
|
1659
|
+
message = "A instância já deduplicou e materializou dados em staging."
|
|
1660
|
+
elif raw_event and raw_event.get("state") == "success":
|
|
1661
|
+
status = "raw_loaded"
|
|
1662
|
+
message = "A instância já carregou microdados na camada raw."
|
|
1663
|
+
elif latest_issue:
|
|
1664
|
+
status = "failed"
|
|
1665
|
+
message = str(latest_issue.get("message") or "Há uma falha operacional recente na instância.")
|
|
1666
|
+
else:
|
|
1667
|
+
status = "pending"
|
|
1668
|
+
message = "A instância tem atividade recente, mas ainda sem materialização consolidada."
|
|
1669
|
+
|
|
1670
|
+
return {
|
|
1671
|
+
"status": status,
|
|
1672
|
+
"message": message,
|
|
1673
|
+
"last_event_at": run_events[0].get("timestamp"),
|
|
1674
|
+
"latest_success_at": latest_success.get("timestamp") if latest_success else None,
|
|
1675
|
+
"latest_success_stage": latest_success.get("stage") if latest_success else None,
|
|
1676
|
+
"stages": latest_by_stage,
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
|
|
1680
|
+
def _load_pnp_instance_integrations(instance_key: str, limit: int = 10) -> list[dict[str, Any]]:
|
|
1681
|
+
with _db_connect() as conn, conn.cursor() as cur:
|
|
1682
|
+
cur.execute(
|
|
1683
|
+
"""
|
|
1684
|
+
WITH endpoint_counts AS (
|
|
1685
|
+
SELECT
|
|
1686
|
+
instance_key,
|
|
1687
|
+
COUNT(*) AS endpoint_count
|
|
1688
|
+
FROM raw.pnp_pipeline_endpoints
|
|
1689
|
+
WHERE instance_key = %s
|
|
1690
|
+
AND is_active = TRUE
|
|
1691
|
+
GROUP BY instance_key
|
|
1692
|
+
),
|
|
1693
|
+
download_counts AS (
|
|
1694
|
+
SELECT
|
|
1695
|
+
run_id,
|
|
1696
|
+
COUNT(*) AS asset_count
|
|
1697
|
+
FROM raw.pnp_downloads
|
|
1698
|
+
GROUP BY run_id
|
|
1699
|
+
),
|
|
1700
|
+
package_counts AS (
|
|
1701
|
+
SELECT
|
|
1702
|
+
run_id,
|
|
1703
|
+
COUNT(*) AS package_count
|
|
1704
|
+
FROM raw.pnp_run_packages
|
|
1705
|
+
GROUP BY run_id
|
|
1706
|
+
)
|
|
1707
|
+
SELECT
|
|
1708
|
+
runs.run_id,
|
|
1709
|
+
CASE
|
|
1710
|
+
WHEN COALESCE(runs.run_summary_json->>'operation', 'sync') = 'validate' THEN 'source_validation'
|
|
1711
|
+
ELSE 'pipeline_sync'
|
|
1712
|
+
END AS integration_type,
|
|
1713
|
+
runs.started_at,
|
|
1714
|
+
runs.finished_at,
|
|
1715
|
+
COALESCE(download_counts.asset_count, 0) AS asset_count,
|
|
1716
|
+
COALESCE(endpoint_counts.endpoint_count, 0) AS endpoint_count,
|
|
1717
|
+
runs.raw_record_count AS record_count,
|
|
1718
|
+
COALESCE(staging.deduplicated_record_count, 0) AS staging_record_count,
|
|
1719
|
+
COALESCE(package_counts.package_count, 0) AS package_count,
|
|
1720
|
+
runs.status
|
|
1721
|
+
FROM raw.pnp_runs runs
|
|
1722
|
+
LEFT JOIN endpoint_counts
|
|
1723
|
+
ON endpoint_counts.instance_key = runs.instance_key
|
|
1724
|
+
LEFT JOIN download_counts
|
|
1725
|
+
ON download_counts.run_id = runs.run_id
|
|
1726
|
+
LEFT JOIN package_counts
|
|
1727
|
+
ON package_counts.run_id = runs.run_id
|
|
1728
|
+
LEFT JOIN staging.pnp_ingestion_runs staging
|
|
1729
|
+
ON staging.run_id = runs.run_id
|
|
1730
|
+
WHERE runs.instance_key = %s
|
|
1731
|
+
ORDER BY COALESCE(runs.finished_at, runs.started_at) DESC NULLS LAST
|
|
1732
|
+
LIMIT %s
|
|
1733
|
+
""",
|
|
1734
|
+
(instance_key, instance_key, limit),
|
|
1735
|
+
)
|
|
1736
|
+
return [dict(row) for row in cur.fetchall()]
|
|
1737
|
+
|
|
1738
|
+
|
|
1739
|
+
def _persist_pnp_instance_settings(
|
|
1740
|
+
instance_key: str,
|
|
1741
|
+
*,
|
|
1742
|
+
schedule: str | None = None,
|
|
1743
|
+
is_active: bool | None = None,
|
|
1744
|
+
) -> dict[str, Any]:
|
|
1745
|
+
try:
|
|
1746
|
+
pnp_instance_repository.update_instance_settings(
|
|
1747
|
+
_db_connect,
|
|
1748
|
+
instance_key=instance_key,
|
|
1749
|
+
schedule=schedule,
|
|
1750
|
+
is_active=is_active,
|
|
1751
|
+
)
|
|
1752
|
+
except LookupError as exc:
|
|
1753
|
+
raise HTTPException(status_code=404, detail="PNP instance not found") from exc
|
|
1754
|
+
return _load_pnp_instance(instance_key)
|
|
1755
|
+
|
|
1756
|
+
|
|
1757
|
+
def _airflow_request(method: str, path: str, payload: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
1758
|
+
if not settings.airflow_api_url:
|
|
1759
|
+
raise HTTPException(status_code=500, detail="AIRFLOW_API_URL not configured")
|
|
1760
|
+
|
|
1761
|
+
target_url = f"{settings.airflow_api_url.rstrip('/')}{path}"
|
|
1762
|
+
try:
|
|
1763
|
+
with httpx.Client(
|
|
1764
|
+
timeout=max(settings.nilo_timeout_seconds, 30.0),
|
|
1765
|
+
follow_redirects=True,
|
|
1766
|
+
auth=(settings.airflow_admin_user, settings.airflow_admin_password),
|
|
1767
|
+
) as client:
|
|
1768
|
+
response = client.request(method, target_url, json=payload)
|
|
1769
|
+
except Exception as exc:
|
|
1770
|
+
raise HTTPException(status_code=502, detail=f"Airflow unavailable: {exc}") from exc
|
|
1771
|
+
|
|
1772
|
+
if response.status_code >= 400:
|
|
1773
|
+
try:
|
|
1774
|
+
detail = response.json()
|
|
1775
|
+
except ValueError:
|
|
1776
|
+
detail = response.text
|
|
1777
|
+
raise HTTPException(status_code=response.status_code, detail=detail)
|
|
1778
|
+
|
|
1779
|
+
if not response.content:
|
|
1780
|
+
return {}
|
|
1781
|
+
return response.json()
|
|
1782
|
+
|
|
1783
|
+
|
|
1784
|
+
def _build_airflow_run_id(dag_id: str, instance_key: str, operation: str | None = None) -> str:
|
|
1785
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
|
1786
|
+
suffix = operation.strip().lower() if isinstance(operation, str) and operation.strip() else "run"
|
|
1787
|
+
return f"{dag_id}__{suffix}__{timestamp}"
|
|
1788
|
+
|
|
1789
|
+
|
|
1790
|
+
def _build_pnp_instance_dag_id(instance: dict[str, Any]) -> str:
|
|
1791
|
+
request_params = dict(instance.get("request_params") or {})
|
|
1792
|
+
pipeline_id = str(instance.get("pipeline_id") or request_params.get("pipeline_id") or "").strip() or None
|
|
1793
|
+
return pnp_dag_provisioner.build_pipeline_dag_id(
|
|
1794
|
+
str(instance["instance_key"]),
|
|
1795
|
+
pipeline_id,
|
|
1796
|
+
)
|
|
1797
|
+
|
|
1798
|
+
|
|
1799
|
+
def _wait_for_airflow_dag(
|
|
1800
|
+
dag_id: str,
|
|
1801
|
+
*,
|
|
1802
|
+
timeout_seconds: float = 90.0,
|
|
1803
|
+
poll_interval_seconds: float = 1.0,
|
|
1804
|
+
) -> None:
|
|
1805
|
+
if not settings.airflow_api_url:
|
|
1806
|
+
raise HTTPException(status_code=500, detail="AIRFLOW_API_URL not configured")
|
|
1807
|
+
|
|
1808
|
+
target_url = f"{settings.airflow_api_url.rstrip('/')}/api/v1/dags/{dag_id}"
|
|
1809
|
+
deadline = monotonic() + max(timeout_seconds, poll_interval_seconds)
|
|
1810
|
+
last_error: str | None = None
|
|
1811
|
+
|
|
1812
|
+
while monotonic() < deadline:
|
|
1813
|
+
try:
|
|
1814
|
+
with httpx.Client(
|
|
1815
|
+
timeout=max(settings.nilo_timeout_seconds, 30.0),
|
|
1816
|
+
follow_redirects=True,
|
|
1817
|
+
auth=(settings.airflow_admin_user, settings.airflow_admin_password),
|
|
1818
|
+
) as client:
|
|
1819
|
+
response = client.get(target_url)
|
|
1820
|
+
except Exception as exc:
|
|
1821
|
+
last_error = f"Airflow unavailable: {exc}"
|
|
1822
|
+
sleep(poll_interval_seconds)
|
|
1823
|
+
continue
|
|
1824
|
+
|
|
1825
|
+
if response.status_code == 200:
|
|
1826
|
+
return
|
|
1827
|
+
|
|
1828
|
+
if response.status_code == 404:
|
|
1829
|
+
last_error = f"DAG {dag_id} ainda nao foi registrada no Airflow."
|
|
1830
|
+
sleep(poll_interval_seconds)
|
|
1831
|
+
continue
|
|
1832
|
+
|
|
1833
|
+
try:
|
|
1834
|
+
detail = response.json()
|
|
1835
|
+
except ValueError:
|
|
1836
|
+
detail = response.text
|
|
1837
|
+
raise HTTPException(status_code=response.status_code, detail=detail)
|
|
1838
|
+
|
|
1839
|
+
raise HTTPException(
|
|
1840
|
+
status_code=502,
|
|
1841
|
+
detail=last_error or f"Airflow nao registrou a DAG {dag_id} dentro do prazo esperado.",
|
|
1842
|
+
)
|
|
1843
|
+
|
|
1844
|
+
|
|
1845
|
+
def _trigger_pnp_airflow_dag(dag_id: str, instance_key: str, *, operation: str) -> dict[str, Any]:
|
|
1846
|
+
_load_pnp_instance(instance_key)
|
|
1847
|
+
dag_run = _airflow_request(
|
|
1848
|
+
"POST",
|
|
1849
|
+
f"/api/v1/dags/{dag_id}/dagRuns",
|
|
1850
|
+
{
|
|
1851
|
+
"dag_run_id": _build_airflow_run_id(dag_id, instance_key, operation),
|
|
1852
|
+
"conf": {
|
|
1853
|
+
"instance_key": instance_key,
|
|
1854
|
+
"operation": operation,
|
|
1855
|
+
"requested_by": f"api.{operation}",
|
|
1856
|
+
},
|
|
1857
|
+
},
|
|
1858
|
+
)
|
|
1859
|
+
return {
|
|
1860
|
+
"dag_id": dag_id,
|
|
1861
|
+
"instance_key": instance_key,
|
|
1862
|
+
"dag_run": dag_run,
|
|
1863
|
+
}
|
|
1864
|
+
|
|
1865
|
+
|
|
1866
|
+
def _load_pnp_instance_dag_runs(instance_key: str, limit: int = 10) -> list[dict[str, Any]]:
|
|
1867
|
+
instance = _load_pnp_instance(instance_key)
|
|
1868
|
+
items: list[dict[str, Any]] = []
|
|
1869
|
+
request_limit = max(limit * 4, 20)
|
|
1870
|
+
dag_id = _build_pnp_instance_dag_id(instance)
|
|
1871
|
+
response = _airflow_request("GET", f"/api/v1/dags/{dag_id}/dagRuns?limit={request_limit}")
|
|
1872
|
+
for row in response.get("dag_runs") or []:
|
|
1873
|
+
conf = row.get("conf") or {}
|
|
1874
|
+
dag_run_id = str(row.get("dag_run_id") or "")
|
|
1875
|
+
if conf.get("instance_key") != instance_key and f"__{instance_key}__" not in dag_run_id:
|
|
1876
|
+
continue
|
|
1877
|
+
items.append(
|
|
1878
|
+
{
|
|
1879
|
+
"dag_id": dag_id,
|
|
1880
|
+
"dag_run_id": dag_run_id,
|
|
1881
|
+
"state": row.get("state"),
|
|
1882
|
+
"run_type": row.get("run_type"),
|
|
1883
|
+
"logical_date": row.get("logical_date"),
|
|
1884
|
+
"queued_at": row.get("queued_at"),
|
|
1885
|
+
"start_date": row.get("start_date"),
|
|
1886
|
+
"end_date": row.get("end_date"),
|
|
1887
|
+
"note": row.get("note"),
|
|
1888
|
+
"conf": conf,
|
|
1889
|
+
}
|
|
1890
|
+
)
|
|
1891
|
+
|
|
1892
|
+
items.sort(
|
|
1893
|
+
key=lambda item: item.get("end_date") or item.get("start_date") or item.get("queued_at") or item.get("logical_date") or "",
|
|
1894
|
+
reverse=True,
|
|
1895
|
+
)
|
|
1896
|
+
return items[:limit]
|
|
1897
|
+
|
|
1898
|
+
|
|
1899
|
+
@app.get("/api/health/live")
|
|
1900
|
+
def live() -> dict[str, str]:
|
|
1901
|
+
return {"status": "ok"}
|
|
1902
|
+
|
|
1903
|
+
|
|
1904
|
+
@app.get("/api/health/ready")
|
|
1905
|
+
def ready() -> dict[str, str]:
|
|
1906
|
+
return {"status": "ready"}
|
|
1907
|
+
|
|
1908
|
+
|
|
1909
|
+
@app.post("/api/admin/login")
|
|
1910
|
+
def admin_login(payload: AdminLoginRequest) -> dict[str, Any]:
|
|
1911
|
+
return _request_keycloak_token(
|
|
1912
|
+
{
|
|
1913
|
+
"grant_type": "password",
|
|
1914
|
+
"username": payload.username.strip(),
|
|
1915
|
+
"password": payload.password,
|
|
1916
|
+
}
|
|
1917
|
+
)
|
|
1918
|
+
|
|
1919
|
+
|
|
1920
|
+
@app.post("/api/admin/refresh")
|
|
1921
|
+
def admin_refresh(payload: AdminRefreshRequest) -> dict[str, Any]:
|
|
1922
|
+
return _request_keycloak_token(
|
|
1923
|
+
{
|
|
1924
|
+
"grant_type": "refresh_token",
|
|
1925
|
+
"refresh_token": payload.refresh_token,
|
|
1926
|
+
}
|
|
1927
|
+
)
|
|
1928
|
+
|
|
1929
|
+
|
|
1930
|
+
@app.get("/api/admin/whoami")
|
|
1931
|
+
def whoami(payload: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
1932
|
+
return {"claims": payload}
|
|
1933
|
+
|
|
1934
|
+
|
|
1935
|
+
@app.get("/api/admin/settings/llm")
|
|
1936
|
+
def get_admin_llm_settings(payload: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
1937
|
+
config = _effective_vanna_llm_settings_for_user(payload)
|
|
1938
|
+
return {
|
|
1939
|
+
"config": _serialize_vanna_llm_settings_public(config),
|
|
1940
|
+
"status": _vanna_provider_status(config),
|
|
1941
|
+
}
|
|
1942
|
+
|
|
1943
|
+
|
|
1944
|
+
@app.get("/api/admin/settings/llm/status")
|
|
1945
|
+
def get_admin_llm_settings_status(payload: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
1946
|
+
return _vanna_provider_status(_effective_vanna_llm_settings_for_user(payload))
|
|
1947
|
+
|
|
1948
|
+
|
|
1949
|
+
@app.patch("/api/admin/settings/llm")
|
|
1950
|
+
def update_admin_llm_settings(
|
|
1951
|
+
payload: AdminLlmSettingsUpdateRequest,
|
|
1952
|
+
admin_payload: dict[str, object] = Depends(_require_admin),
|
|
1953
|
+
) -> dict[str, object]:
|
|
1954
|
+
config = _persist_vanna_llm_settings(payload, admin_payload)
|
|
1955
|
+
return {
|
|
1956
|
+
"config": _serialize_vanna_llm_settings_public(config),
|
|
1957
|
+
"status": _vanna_provider_status(config),
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
|
|
1961
|
+
@app.get("/api/admin/users")
|
|
1962
|
+
def list_admin_users(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
1963
|
+
return {"items": _list_admin_users_with_metabase_state()}
|
|
1964
|
+
|
|
1965
|
+
|
|
1966
|
+
@app.post("/api/admin/users")
|
|
1967
|
+
def create_admin_user(
|
|
1968
|
+
payload: AdminUserCreateRequest,
|
|
1969
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
1970
|
+
) -> dict[str, object]:
|
|
1971
|
+
keycloak_client = _keycloak_admin_client()
|
|
1972
|
+
metabase_client = _metabase_admin_client()
|
|
1973
|
+
username = payload.username.strip()
|
|
1974
|
+
email = payload.email.strip()
|
|
1975
|
+
first_name = payload.first_name.strip()
|
|
1976
|
+
last_name = payload.last_name.strip()
|
|
1977
|
+
|
|
1978
|
+
created = keycloak_client.create_admin_user(
|
|
1979
|
+
username=username,
|
|
1980
|
+
email=email,
|
|
1981
|
+
password=payload.password,
|
|
1982
|
+
first_name=first_name,
|
|
1983
|
+
last_name=last_name,
|
|
1984
|
+
enabled=payload.enabled,
|
|
1985
|
+
)
|
|
1986
|
+
try:
|
|
1987
|
+
metabase_user = metabase_client.create_admin_user(
|
|
1988
|
+
email=email,
|
|
1989
|
+
password=payload.password,
|
|
1990
|
+
first_name=first_name,
|
|
1991
|
+
last_name=last_name,
|
|
1992
|
+
)
|
|
1993
|
+
except Exception as exc:
|
|
1994
|
+
rollback_error: str | None = None
|
|
1995
|
+
try:
|
|
1996
|
+
keycloak_client.delete_user(str(created["id"]))
|
|
1997
|
+
except Exception as rollback_exc: # pragma: no cover - defensive rollback
|
|
1998
|
+
rollback_error = str(getattr(rollback_exc, "detail", rollback_exc))
|
|
1999
|
+
detail = str(getattr(exc, "detail", exc))
|
|
2000
|
+
if rollback_error:
|
|
2001
|
+
detail = f"{detail}. Keycloak rollback failed: {rollback_error}"
|
|
2002
|
+
raise HTTPException(status_code=502, detail=detail) from exc
|
|
2003
|
+
|
|
2004
|
+
return {
|
|
2005
|
+
"user": {
|
|
2006
|
+
**created,
|
|
2007
|
+
"metabase_synced": True,
|
|
2008
|
+
"metabase_user_id": metabase_user.get("id"),
|
|
2009
|
+
}
|
|
2010
|
+
}
|
|
2011
|
+
|
|
2012
|
+
|
|
2013
|
+
@app.post("/api/admin/users/{user_id}/metabase-sync")
|
|
2014
|
+
def sync_admin_user_metabase(
|
|
2015
|
+
user_id: str,
|
|
2016
|
+
req: AdminUserMetabaseSyncRequest,
|
|
2017
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2018
|
+
) -> dict[str, object]:
|
|
2019
|
+
keycloak_client = _keycloak_admin_client()
|
|
2020
|
+
metabase_client = _metabase_admin_client()
|
|
2021
|
+
target = keycloak_client.get_admin_user(user_id)
|
|
2022
|
+
if not target:
|
|
2023
|
+
raise HTTPException(status_code=404, detail="Admin user not found in Keycloak")
|
|
2024
|
+
|
|
2025
|
+
email = str(target.get("email") or "").strip()
|
|
2026
|
+
if not email:
|
|
2027
|
+
raise HTTPException(status_code=409, detail="Admin user has no email for Metabase sync")
|
|
2028
|
+
|
|
2029
|
+
existing = metabase_client.find_user_by_email(email)
|
|
2030
|
+
if existing:
|
|
2031
|
+
return {
|
|
2032
|
+
"user": {
|
|
2033
|
+
**target,
|
|
2034
|
+
"metabase_synced": True,
|
|
2035
|
+
"metabase_user_id": existing.get("id"),
|
|
2036
|
+
},
|
|
2037
|
+
"created": False,
|
|
2038
|
+
}
|
|
2039
|
+
|
|
2040
|
+
metabase_user = metabase_client.create_admin_user(
|
|
2041
|
+
email=email,
|
|
2042
|
+
password=req.password,
|
|
2043
|
+
first_name=str(target.get("first_name") or ""),
|
|
2044
|
+
last_name=str(target.get("last_name") or ""),
|
|
2045
|
+
)
|
|
2046
|
+
return {
|
|
2047
|
+
"user": {
|
|
2048
|
+
**target,
|
|
2049
|
+
"metabase_synced": True,
|
|
2050
|
+
"metabase_user_id": metabase_user.get("id"),
|
|
2051
|
+
},
|
|
2052
|
+
"created": True,
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
|
|
2056
|
+
@app.delete("/api/admin/users/{user_id}")
|
|
2057
|
+
def delete_admin_user(
|
|
2058
|
+
user_id: str,
|
|
2059
|
+
payload: dict[str, object] = Depends(_require_admin),
|
|
2060
|
+
) -> dict[str, object]:
|
|
2061
|
+
if str(payload.get("sub") or "") == user_id:
|
|
2062
|
+
raise HTTPException(status_code=409, detail="The current admin user cannot delete itself")
|
|
2063
|
+
keycloak_client = _keycloak_admin_client()
|
|
2064
|
+
metabase_client = _metabase_admin_client()
|
|
2065
|
+
target = keycloak_client.get_admin_user(user_id)
|
|
2066
|
+
if not target:
|
|
2067
|
+
raise HTTPException(status_code=404, detail="Admin user not found in Keycloak")
|
|
2068
|
+
|
|
2069
|
+
email = str(target.get("email") or "").strip()
|
|
2070
|
+
metabase_user = metabase_client.find_user_by_email(email) if email else None
|
|
2071
|
+
if metabase_user and metabase_user.get("id") is not None:
|
|
2072
|
+
metabase_client.delete_user(metabase_user["id"])
|
|
2073
|
+
|
|
2074
|
+
keycloak_client.delete_user(user_id)
|
|
2075
|
+
return {
|
|
2076
|
+
"deleted": True,
|
|
2077
|
+
"user_id": user_id,
|
|
2078
|
+
"email": email,
|
|
2079
|
+
"metabase_deleted": bool(metabase_user),
|
|
2080
|
+
}
|
|
2081
|
+
|
|
2082
|
+
|
|
2083
|
+
@app.get("/api/admin/connector-definitions/pnp")
|
|
2084
|
+
def get_pnp_connector_definition(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2085
|
+
catalog = _load_pnp_powerbi_catalog_or_502()
|
|
2086
|
+
return {
|
|
2087
|
+
"connector_id": "pnp",
|
|
2088
|
+
"internal_connector_id": PNP_INTERNAL_CONNECTOR_ID,
|
|
2089
|
+
"label": "Programa Nilo Pecanha",
|
|
2090
|
+
"ingestion_mode": "powerbi_microdados",
|
|
2091
|
+
"powerbi_report_url": catalog["page_url"],
|
|
2092
|
+
"selection_catalog": {
|
|
2093
|
+
"available_years": catalog["available_years"],
|
|
2094
|
+
"available_microdados_types": catalog["available_microdados_types"],
|
|
2095
|
+
"types_by_year": catalog["types_by_year"],
|
|
2096
|
+
"items": catalog["items"],
|
|
2097
|
+
},
|
|
2098
|
+
"sources": [],
|
|
2099
|
+
}
|
|
2100
|
+
|
|
2101
|
+
|
|
2102
|
+
@app.get("/api/admin/connections/pnp")
|
|
2103
|
+
def list_pnp_connections(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2104
|
+
rows = _load_all_pnp_rows()
|
|
2105
|
+
return {"items": _enrich_connections_with_health(_group_pnp_connections(rows))}
|
|
2106
|
+
|
|
2107
|
+
|
|
2108
|
+
@app.get("/api/admin/connections/pnp/{connection_key}")
|
|
2109
|
+
def get_pnp_connection(connection_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2110
|
+
connection = _enrich_connections_with_health([_load_pnp_connection(connection_key)])[0]
|
|
2111
|
+
pipelines = [item for item in _group_pnp_instances(_load_all_pnp_rows()) if item.get("connection_key") == connection_key]
|
|
2112
|
+
return {
|
|
2113
|
+
"connection": connection,
|
|
2114
|
+
"pipelines": pipelines,
|
|
2115
|
+
}
|
|
2116
|
+
|
|
2117
|
+
|
|
2118
|
+
@app.post("/api/admin/connections/pnp")
|
|
2119
|
+
def create_pnp_connection(
|
|
2120
|
+
payload: PnpConnectionCreateRequest,
|
|
2121
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2122
|
+
) -> dict[str, object]:
|
|
2123
|
+
catalog = _load_pnp_powerbi_catalog_or_502()
|
|
2124
|
+
connection_key = _build_pnp_connection_key(payload.connection_name)
|
|
2125
|
+
|
|
2126
|
+
try:
|
|
2127
|
+
pnp_instance_repository.create_connection(
|
|
2128
|
+
_db_connect,
|
|
2129
|
+
connection_key=connection_key,
|
|
2130
|
+
connection_name=payload.connection_name.strip(),
|
|
2131
|
+
page_url=str(catalog.get("page_url") or DEFAULT_PNP_POWERBI_REPORT_URL),
|
|
2132
|
+
is_active=payload.is_active,
|
|
2133
|
+
)
|
|
2134
|
+
except psycopg2.Error as exc:
|
|
2135
|
+
if exc.pgcode == "23505":
|
|
2136
|
+
raise HTTPException(status_code=409, detail=f"PNP connection already exists for key: {connection_key}") from exc
|
|
2137
|
+
raise
|
|
2138
|
+
|
|
2139
|
+
return _enrich_connections_with_health([_load_pnp_connection(connection_key)])[0]
|
|
2140
|
+
|
|
2141
|
+
|
|
2142
|
+
@app.delete("/api/admin/connections/pnp/{connection_key}")
|
|
2143
|
+
def delete_pnp_connection(
|
|
2144
|
+
connection_key: str,
|
|
2145
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2146
|
+
) -> dict[str, Any]:
|
|
2147
|
+
return _delete_pnp_connection(connection_key)
|
|
2148
|
+
|
|
2149
|
+
|
|
2150
|
+
@app.get("/api/admin/pipelines/pnp")
|
|
2151
|
+
def list_pnp_pipelines(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2152
|
+
return {"items": _group_pnp_instances(_load_all_pnp_rows())}
|
|
2153
|
+
|
|
2154
|
+
|
|
2155
|
+
@app.get("/api/admin/connectors/pnp/instances")
|
|
2156
|
+
def list_pnp_instances(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2157
|
+
return list_pnp_pipelines(_)
|
|
2158
|
+
|
|
2159
|
+
|
|
2160
|
+
@app.get("/api/admin/connectors/pnp/instances/{instance_key}")
|
|
2161
|
+
def get_pnp_instance(instance_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2162
|
+
return _load_pnp_instance(instance_key)
|
|
2163
|
+
|
|
2164
|
+
|
|
2165
|
+
@app.get("/api/admin/pipelines/pnp/{instance_key}")
|
|
2166
|
+
def get_pnp_pipeline(instance_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2167
|
+
return _load_pnp_instance(instance_key)
|
|
2168
|
+
|
|
2169
|
+
|
|
2170
|
+
@app.get("/api/admin/connectors/pnp/instances/{instance_key}/admin-overview")
|
|
2171
|
+
def get_pnp_instance_admin_overview(
|
|
2172
|
+
instance_key: str,
|
|
2173
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2174
|
+
) -> dict[str, object]:
|
|
2175
|
+
instance = _load_pnp_instance(instance_key)
|
|
2176
|
+
diagnostics = _load_pnp_instance_diagnostics(instance_key)
|
|
2177
|
+
run_events = _load_pnp_instance_run_events(instance_key)
|
|
2178
|
+
integrations = _load_pnp_instance_integrations(instance_key)
|
|
2179
|
+
return {
|
|
2180
|
+
"instance": instance,
|
|
2181
|
+
"diagnostics": diagnostics,
|
|
2182
|
+
"diagnostics_summary": _summarize_pnp_diagnostics(diagnostics),
|
|
2183
|
+
"run_events": run_events,
|
|
2184
|
+
"ingestion": _build_pnp_ingestion_summary(run_events),
|
|
2185
|
+
"integrations": integrations,
|
|
2186
|
+
}
|
|
2187
|
+
|
|
2188
|
+
|
|
2189
|
+
@app.get("/api/admin/pipelines/pnp/{instance_key}/admin-overview")
|
|
2190
|
+
def get_pnp_pipeline_admin_overview(
|
|
2191
|
+
instance_key: str,
|
|
2192
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2193
|
+
) -> dict[str, object]:
|
|
2194
|
+
return get_pnp_instance_admin_overview(instance_key, _)
|
|
2195
|
+
|
|
2196
|
+
|
|
2197
|
+
@app.get("/api/admin/connectors/pnp/instances/{instance_key}/dag-runs")
|
|
2198
|
+
def list_pnp_instance_dag_runs(instance_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2199
|
+
return {"items": _load_pnp_instance_dag_runs(instance_key)}
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
@app.get("/api/admin/pipelines/pnp/{instance_key}/dag-runs")
|
|
2203
|
+
def list_pnp_pipeline_dag_runs(instance_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2204
|
+
return {"items": _load_pnp_instance_dag_runs(instance_key)}
|
|
2205
|
+
|
|
2206
|
+
|
|
2207
|
+
@app.post("/api/admin/connectors/pnp/instances/{instance_key}/operations/validate-sources")
|
|
2208
|
+
def trigger_pnp_instance_validate_sources(
|
|
2209
|
+
instance_key: str,
|
|
2210
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2211
|
+
) -> dict[str, Any]:
|
|
2212
|
+
instance = _load_pnp_instance(instance_key)
|
|
2213
|
+
return _trigger_pnp_airflow_dag(
|
|
2214
|
+
_build_pnp_instance_dag_id(instance),
|
|
2215
|
+
instance_key,
|
|
2216
|
+
operation="validate",
|
|
2217
|
+
)
|
|
2218
|
+
|
|
2219
|
+
|
|
2220
|
+
@app.post("/api/admin/pipelines/pnp/{instance_key}/operations/validate-sources")
|
|
2221
|
+
def trigger_pnp_pipeline_validate_sources(
|
|
2222
|
+
instance_key: str,
|
|
2223
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2224
|
+
) -> dict[str, Any]:
|
|
2225
|
+
instance = _load_pnp_instance(instance_key)
|
|
2226
|
+
return _trigger_pnp_airflow_dag(
|
|
2227
|
+
_build_pnp_instance_dag_id(instance),
|
|
2228
|
+
instance_key,
|
|
2229
|
+
operation="validate",
|
|
2230
|
+
)
|
|
2231
|
+
|
|
2232
|
+
|
|
2233
|
+
@app.post("/api/admin/connectors/pnp/instances/{instance_key}/operations/full-sync")
|
|
2234
|
+
def trigger_pnp_instance_full_sync(
|
|
2235
|
+
instance_key: str,
|
|
2236
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2237
|
+
) -> dict[str, Any]:
|
|
2238
|
+
instance = _load_pnp_instance(instance_key)
|
|
2239
|
+
return _trigger_pnp_airflow_dag(
|
|
2240
|
+
_build_pnp_instance_dag_id(instance),
|
|
2241
|
+
instance_key,
|
|
2242
|
+
operation="sync",
|
|
2243
|
+
)
|
|
2244
|
+
|
|
2245
|
+
|
|
2246
|
+
@app.post("/api/admin/pipelines/pnp/{instance_key}/operations/full-sync")
|
|
2247
|
+
def trigger_pnp_pipeline_full_sync(
|
|
2248
|
+
instance_key: str,
|
|
2249
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2250
|
+
) -> dict[str, Any]:
|
|
2251
|
+
instance = _load_pnp_instance(instance_key)
|
|
2252
|
+
return _trigger_pnp_airflow_dag(
|
|
2253
|
+
_build_pnp_instance_dag_id(instance),
|
|
2254
|
+
instance_key,
|
|
2255
|
+
operation="sync",
|
|
2256
|
+
)
|
|
2257
|
+
|
|
2258
|
+
|
|
2259
|
+
@app.delete("/api/admin/pipelines/pnp/instances/{instance_key}")
|
|
2260
|
+
def delete_pnp_pipeline_instance(
|
|
2261
|
+
instance_key: str,
|
|
2262
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2263
|
+
) -> dict[str, Any]:
|
|
2264
|
+
return _delete_pnp_instance(instance_key)
|
|
2265
|
+
|
|
2266
|
+
|
|
2267
|
+
@app.delete("/api/admin/connections/pnp/instances/{instance_key}")
|
|
2268
|
+
def delete_pnp_connection_instance(
|
|
2269
|
+
instance_key: str,
|
|
2270
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2271
|
+
) -> dict[str, Any]:
|
|
2272
|
+
return _delete_pnp_instance(instance_key)
|
|
2273
|
+
|
|
2274
|
+
|
|
2275
|
+
@app.post("/api/admin/pipelines/pnp")
|
|
2276
|
+
def create_pnp_pipeline(
|
|
2277
|
+
payload: PnpPipelineCreateRequest,
|
|
2278
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2279
|
+
) -> dict[str, object]:
|
|
2280
|
+
catalog = _load_pnp_powerbi_catalog_or_502()
|
|
2281
|
+
_validate_pnp_selection_against_catalog(
|
|
2282
|
+
selected_years=payload.selected_years,
|
|
2283
|
+
selected_microdados_types=payload.selected_microdados_types,
|
|
2284
|
+
catalog=catalog,
|
|
2285
|
+
)
|
|
2286
|
+
selected_downloads = _resolve_pnp_selected_downloads(
|
|
2287
|
+
selected_years=payload.selected_years,
|
|
2288
|
+
selected_microdados_types=payload.selected_microdados_types,
|
|
2289
|
+
catalog=catalog,
|
|
2290
|
+
)
|
|
2291
|
+
|
|
2292
|
+
connection = _load_pnp_connection(payload.connection_key)
|
|
2293
|
+
instance_key = _build_pnp_pipeline_key(payload.pipeline_name)
|
|
2294
|
+
normalized_schedule = _normalize_pipeline_schedule(payload.schedule)
|
|
2295
|
+
|
|
2296
|
+
try:
|
|
2297
|
+
pnp_instance_repository.create_instance(
|
|
2298
|
+
_db_connect,
|
|
2299
|
+
instance_key=instance_key,
|
|
2300
|
+
instance_name=payload.pipeline_name.strip(),
|
|
2301
|
+
connection_key=str(connection["connection_key"]),
|
|
2302
|
+
selected_years=payload.selected_years,
|
|
2303
|
+
selected_microdados_types=payload.selected_microdados_types,
|
|
2304
|
+
selected_downloads=selected_downloads,
|
|
2305
|
+
schedule=normalized_schedule,
|
|
2306
|
+
is_active=payload.is_active,
|
|
2307
|
+
)
|
|
2308
|
+
except pnp_instance_repository.PnpConnectionNotFoundError as exc:
|
|
2309
|
+
raise HTTPException(status_code=404, detail="PNP connection not found") from exc
|
|
2310
|
+
except psycopg2.Error as exc:
|
|
2311
|
+
if exc.pgcode == "23505":
|
|
2312
|
+
raise HTTPException(status_code=409, detail=f"PNP instance already exists for key: {instance_key}") from exc
|
|
2313
|
+
raise
|
|
2314
|
+
|
|
2315
|
+
instance = _load_pnp_instance(instance_key)
|
|
2316
|
+
dag_id = _build_pnp_instance_dag_id(instance)
|
|
2317
|
+
try:
|
|
2318
|
+
_wait_for_airflow_dag(dag_id)
|
|
2319
|
+
except HTTPException:
|
|
2320
|
+
pnp_instance_repository.delete_instance(_db_connect, instance_key=instance_key)
|
|
2321
|
+
raise
|
|
2322
|
+
|
|
2323
|
+
return instance
|
|
2324
|
+
|
|
2325
|
+
|
|
2326
|
+
@app.post("/api/admin/connectors/pnp/instances")
|
|
2327
|
+
def create_pnp_instance(
|
|
2328
|
+
payload: PnpInstanceCreateRequest,
|
|
2329
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2330
|
+
) -> dict[str, object]:
|
|
2331
|
+
rows = _load_all_pnp_rows()
|
|
2332
|
+
connections = _group_pnp_connections(rows)
|
|
2333
|
+
if connections:
|
|
2334
|
+
connection_key = connections[0]["connection_key"]
|
|
2335
|
+
else:
|
|
2336
|
+
connection = create_pnp_connection(
|
|
2337
|
+
PnpConnectionCreateRequest(connection_name="PNP Principal", is_active=payload.is_active),
|
|
2338
|
+
_,
|
|
2339
|
+
)
|
|
2340
|
+
connection_key = str(connection["connection_key"])
|
|
2341
|
+
|
|
2342
|
+
return create_pnp_pipeline(
|
|
2343
|
+
PnpPipelineCreateRequest(
|
|
2344
|
+
pipeline_name=payload.instance_name,
|
|
2345
|
+
connection_key=connection_key,
|
|
2346
|
+
selected_years=payload.selected_years,
|
|
2347
|
+
selected_microdados_types=payload.selected_microdados_types,
|
|
2348
|
+
schedule=payload.schedule,
|
|
2349
|
+
is_active=payload.is_active,
|
|
2350
|
+
),
|
|
2351
|
+
_,
|
|
2352
|
+
)
|
|
2353
|
+
|
|
2354
|
+
|
|
2355
|
+
@app.patch("/api/admin/connectors/pnp/instances/{instance_key}")
|
|
2356
|
+
def update_pnp_instance(
|
|
2357
|
+
instance_key: str,
|
|
2358
|
+
payload: PnpInstanceUpdateRequest,
|
|
2359
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2360
|
+
) -> dict[str, object]:
|
|
2361
|
+
_load_pnp_instance(instance_key)
|
|
2362
|
+
normalized_schedule = _normalize_pipeline_schedule(payload.schedule) if payload.schedule is not None else None
|
|
2363
|
+
return _persist_pnp_instance_settings(
|
|
2364
|
+
instance_key,
|
|
2365
|
+
schedule=normalized_schedule,
|
|
2366
|
+
is_active=payload.is_active,
|
|
2367
|
+
)
|
|
2368
|
+
|
|
2369
|
+
|
|
2370
|
+
@app.post("/api/embed/metabase-token")
|
|
2371
|
+
def create_embed_token(req: EmbedRequest) -> dict[str, object]:
|
|
2372
|
+
return _signed_metabase_dashboard_payload(req.dashboard_id, req.params)
|
|
2373
|
+
|
|
2374
|
+
|
|
2375
|
+
@app.get("/api/embed/metabase-default")
|
|
2376
|
+
def get_default_embed_token() -> dict[str, object]:
|
|
2377
|
+
dashboard_id = _read_metabase_default_dashboard_id()
|
|
2378
|
+
return _signed_metabase_dashboard_payload(dashboard_id, {})
|
|
2379
|
+
|
|
2380
|
+
|
|
2381
|
+
@app.post("/api/admin/embed/metabase-default")
|
|
2382
|
+
def set_default_embed_token(
|
|
2383
|
+
req: EmbedRequest,
|
|
2384
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2385
|
+
) -> dict[str, object]:
|
|
2386
|
+
_write_metabase_default_dashboard_id(req.dashboard_id)
|
|
2387
|
+
return _signed_metabase_dashboard_payload(req.dashboard_id, req.params)
|
|
2388
|
+
|
|
2389
|
+
|
|
2390
|
+
@app.get("/api/admin/sql/catalog")
|
|
2391
|
+
def get_admin_sql_catalog(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
|
|
2392
|
+
return {"items": _admin_sql_catalog()}
|
|
2393
|
+
|
|
2394
|
+
|
|
2395
|
+
@app.post("/api/admin/sql/query")
|
|
2396
|
+
def run_admin_sql_query(
|
|
2397
|
+
req: AdminSqlQueryRequest,
|
|
2398
|
+
_: dict[str, object] = Depends(_require_admin),
|
|
2399
|
+
) -> dict[str, object]:
|
|
2400
|
+
statement = _validate_admin_sql(req.sql)
|
|
2401
|
+
bounded_statement = _bounded_admin_sql(statement, req.max_rows)
|
|
2402
|
+
|
|
2403
|
+
try:
|
|
2404
|
+
with _db_connect() as conn, conn.cursor() as cur:
|
|
2405
|
+
cur.execute("BEGIN READ ONLY")
|
|
2406
|
+
cur.execute("SET LOCAL statement_timeout = '15s'")
|
|
2407
|
+
cur.execute(bounded_statement)
|
|
2408
|
+
fields = [{"name": item[0]} for item in (cur.description or [])]
|
|
2409
|
+
rows = list(cur.fetchmany(req.max_rows + 1)) if cur.description else []
|
|
2410
|
+
truncated = len(rows) > req.max_rows
|
|
2411
|
+
if truncated:
|
|
2412
|
+
rows = rows[: req.max_rows]
|
|
2413
|
+
cur.execute("ROLLBACK")
|
|
2414
|
+
except psycopg2.Error as exc:
|
|
2415
|
+
raise HTTPException(status_code=400, detail=str(exc).strip()) from exc
|
|
2416
|
+
|
|
2417
|
+
return {
|
|
2418
|
+
"fields": fields,
|
|
2419
|
+
"rows": rows,
|
|
2420
|
+
"row_count": len(rows),
|
|
2421
|
+
"truncated": truncated,
|
|
2422
|
+
"max_rows": req.max_rows,
|
|
2423
|
+
}
|
|
2424
|
+
|
|
2425
|
+
|
|
2426
|
+
@app.post("/api/vanna/ask")
|
|
2427
|
+
async def ask(
|
|
2428
|
+
req: AskRequest,
|
|
2429
|
+
payload: dict[str, object] | None = Depends(verify_optional_bearer),
|
|
2430
|
+
) -> dict[str, object]:
|
|
2431
|
+
config = _effective_vanna_llm_settings_for_user(payload) if payload else _effective_global_vanna_llm_settings()
|
|
2432
|
+
return await ask_vanna(settings.vanna_service_url, req.question, _vanna_llm_override_payload(config))
|