@dataif/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +16 -0
  2. package/bin/dataif.js +623 -0
  3. package/package.json +26 -0
  4. package/scripts/build-template.mjs +72 -0
  5. package/templates/dataif/README.md +157 -0
  6. package/templates/dataif/infra/.env.example +119 -0
  7. package/templates/dataif/infra/.env.stg.example +119 -0
  8. package/templates/dataif/infra/airflow/Dockerfile +11 -0
  9. package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
  10. package/templates/dataif/infra/airflow/requirements.txt +3 -0
  11. package/templates/dataif/infra/docker-compose.yml +306 -0
  12. package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
  13. package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
  14. package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
  15. package/templates/dataif/infra/keycloak/Dockerfile +4 -0
  16. package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
  17. package/templates/dataif/infra/ollama/Dockerfile +9 -0
  18. package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
  19. package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
  20. package/templates/dataif/infra/postgres/Dockerfile +4 -0
  21. package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
  22. package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
  23. package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
  24. package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
  25. package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
  26. package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
  27. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
  28. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
  29. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
  30. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
  31. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
  32. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
  33. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
  34. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
  35. package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
  36. package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
  37. package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
  38. package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
  39. package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
  40. package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
  41. package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
  42. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
  43. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
  44. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
  45. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
  46. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
  47. package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
  48. package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
  49. package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  50. package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
  51. package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
  52. package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  53. package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  54. package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  55. package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  56. package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  57. package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  58. package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  59. package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
  60. package/templates/dataif/scripts/configure-env.sh +149 -0
  61. package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
  62. package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
  63. package/templates/dataif/scripts/deploy.sh +79 -0
  64. package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
  65. package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
  66. package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
  67. package/templates/dataif/scripts/publish-images.sh +31 -0
  68. package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
  69. package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
  70. package/templates/dataif/services/api/.dockerignore +18 -0
  71. package/templates/dataif/services/api/Dockerfile +12 -0
  72. package/templates/dataif/services/api/app/__init__.py +1 -0
  73. package/templates/dataif/services/api/app/auth.py +48 -0
  74. package/templates/dataif/services/api/app/config.py +59 -0
  75. package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
  76. package/templates/dataif/services/api/app/main.py +2432 -0
  77. package/templates/dataif/services/api/app/metabase_admin.py +191 -0
  78. package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
  79. package/templates/dataif/services/api/app/metabase_embed.py +15 -0
  80. package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
  81. package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
  82. package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
  83. package/templates/dataif/services/api/app/vanna_client.py +32 -0
  84. package/templates/dataif/services/api/requirements.txt +9 -0
  85. package/templates/dataif/services/vanna/.dockerignore +18 -0
  86. package/templates/dataif/services/vanna/Dockerfile +12 -0
  87. package/templates/dataif/services/vanna/app/config.py +57 -0
  88. package/templates/dataif/services/vanna/app/main.py +108 -0
  89. package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
  90. package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
  91. package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
  92. package/templates/dataif/services/vanna/requirements.txt +8 -0
  93. package/templates/dataif/services/web/.dockerignore +13 -0
  94. package/templates/dataif/services/web/Dockerfile +16 -0
  95. package/templates/dataif/services/web/index.html +12 -0
  96. package/templates/dataif/services/web/nginx.conf +74 -0
  97. package/templates/dataif/services/web/package-lock.json +4397 -0
  98. package/templates/dataif/services/web/package.json +32 -0
  99. package/templates/dataif/services/web/postcss.config.mjs +5 -0
  100. package/templates/dataif/services/web/src/App.jsx +2817 -0
  101. package/templates/dataif/services/web/src/adminAuth.js +245 -0
  102. package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
  103. package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
  104. package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
  105. package/templates/dataif/services/web/src/assets/if.svg +0 -0
  106. package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
  107. package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
  108. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
  109. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
  110. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
  111. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
  112. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
  113. package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
  114. package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
  115. package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
  116. package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
  117. package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
  118. package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
  119. package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
  120. package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
  121. package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
  122. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
  123. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
  124. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
  125. package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
  126. package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
  127. package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
  128. package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
  129. package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
  130. package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
  131. package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
  132. package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
  133. package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
  134. package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
  135. package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
  136. package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
  137. package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
  138. package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
  139. package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
  140. package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
  141. package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
  142. package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
  143. package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
  144. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
  145. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
  146. package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
  147. package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
  148. package/templates/dataif/services/web/src/main.jsx +14 -0
  149. package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
  150. package/templates/dataif/services/web/src/styles/globals.css +60 -0
  151. package/templates/dataif/services/web/src/styles/theme.css +1326 -0
  152. package/templates/dataif/services/web/src/styles/typography.css +430 -0
  153. package/templates/dataif/services/web/src/styles.css +1287 -0
  154. package/templates/dataif/services/web/src/utils/cx.ts +24 -0
  155. package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
  156. package/templates/dataif/services/web/vite.config.js +14 -0
  157. package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
  158. package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
  159. package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
  160. package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
  161. package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
  162. package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
  163. package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
  164. package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
  165. package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
  166. package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
  167. package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
  168. package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
  169. package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
  170. package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
  171. package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
  172. package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  173. package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
  174. package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
  175. package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
  176. package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  177. package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  178. package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  179. package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  180. package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  181. package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  182. package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  183. package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
@@ -0,0 +1,2432 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from threading import Lock
6
+ from datetime import datetime, timezone
7
+ from time import monotonic, sleep
8
+ from typing import Any
9
+
10
+ import httpx
11
+ import psycopg2
12
+ from croniter import croniter
13
+ from fastapi import Depends, FastAPI, HTTPException
14
+ from fastapi.middleware.cors import CORSMiddleware
15
+ from pydantic import BaseModel, Field, model_validator
16
+ from psycopg2.extras import RealDictCursor
17
+
18
+ from .auth import require_admin, verify_optional_bearer
19
+ from .config import settings
20
+ from .keycloak_admin import KeycloakAdminClient
21
+ from .metabase_admin import MetabaseAdminClient
22
+ from .metabase_embed import build_signed_dashboard_url
23
+ from . import pnp_dag_provisioner, pnp_instance_repository
24
+ from .pnp_powerbi import DEFAULT_PNP_POWERBI_REPORT_URL, PNP_MICRODADOS_TYPES, load_public_microdados_catalog
25
+ from .vanna_client import ask_vanna
26
+
27
+ PNP_INTERNAL_CONNECTOR_ID = "nilo_pecanha"
28
+ PNP_POWERBI_GROUP_LABEL = "Microdados Publicos"
29
+ PNP_POWERBI_SOURCE_LABEL = "Catalogo publico de microdados via Power BI"
30
+ PNP_CONNECTION_ENTITY = "connection"
31
+ PNP_PIPELINE_ENTITY = "pipeline"
32
+ METABASE_DEFAULT_DASHBOARD_SETTING_KEY = "metabase.default_dashboard_id"
33
+ VANNA_LLM_SETTING_KEY = "vanna.llm_config"
34
+ VANNA_USER_LLM_SETTING_PREFIX = "vanna.llm_config.user."
35
+ PNP_RUNTIME_TASK_META = {
36
+ "load_instance_config": {
37
+ "stage": "load_instance_config",
38
+ "stage_label": "Carregamento da configuração",
39
+ "message": "A configuração da pipeline foi carregada.",
40
+ },
41
+ "resolve_powerbi_catalog": {
42
+ "stage": "resolve_powerbi_catalog",
43
+ "stage_label": "Resolução do catálogo",
44
+ "message": "O catalogo Power BI foi resolvido.",
45
+ },
46
+ "extract_raw": {
47
+ "stage": "extract_raw",
48
+ "stage_label": "Extracao de microdados",
49
+ "message": "A extração e a carga bruta dos microdados foram concluídas.",
50
+ },
51
+ "materialize_staging": {
52
+ "stage": "materialize_staging",
53
+ "stage_label": "Materializacao de staging",
54
+ "message": "A staging deduplicada foi materializada.",
55
+ },
56
+ "build_curated_views": {
57
+ "stage": "build_curated_views",
58
+ "stage_label": "Publicacao de curated",
59
+ "message": "As views e materialized views curadas foram publicadas.",
60
+ },
61
+ "run_quality_checks": {
62
+ "stage": "run_quality_checks",
63
+ "stage_label": "Checagens de qualidade",
64
+ "message": "As checagens operacionais e de qualidade foram executadas.",
65
+ },
66
+ "finalize_run": {
67
+ "stage": "finalize_run",
68
+ "stage_label": "Encerramento da execução",
69
+ "message": "A execucao da pipeline foi finalizada.",
70
+ },
71
+ }
72
+
73
+ _PNP_CATALOG_CACHE: dict[str, Any] = {"value": None, "loaded_at": 0.0}
74
+ _PNP_CATALOG_CACHE_LOCK = Lock()
75
+
76
+
77
+ def _coerce_int(value: object) -> int | None:
78
+ if isinstance(value, bool):
79
+ return int(value)
80
+ if isinstance(value, int):
81
+ return value
82
+ if isinstance(value, float):
83
+ return int(value)
84
+ if isinstance(value, str):
85
+ normalized = value.strip()
86
+ if normalized and normalized.lstrip("-").isdigit():
87
+ return int(normalized)
88
+ return None
89
+
90
+
91
+ def _parse_iso_datetime(value: object) -> datetime | None:
92
+ if isinstance(value, datetime):
93
+ if value.tzinfo is None:
94
+ return value.replace(tzinfo=timezone.utc)
95
+ return value.astimezone(timezone.utc)
96
+ if not isinstance(value, str) or not value.strip():
97
+ return None
98
+ candidate = value.strip().replace("Z", "+00:00")
99
+ try:
100
+ parsed = datetime.fromisoformat(candidate)
101
+ except ValueError:
102
+ return None
103
+ if parsed.tzinfo is None:
104
+ return parsed.replace(tzinfo=timezone.utc)
105
+ return parsed.astimezone(timezone.utc)
106
+
107
+
108
+ class EmbedRequest(BaseModel):
109
+ dashboard_id: int = Field(..., ge=1)
110
+ params: dict[str, object] = Field(default_factory=dict)
111
+
112
+
113
+ class AskRequest(BaseModel):
114
+ question: str = Field(..., min_length=3, max_length=1000)
115
+
116
+
117
+ class AdminSqlQueryRequest(BaseModel):
118
+ sql: str = Field(..., min_length=1, max_length=100_000)
119
+ max_rows: int = Field(default=500, ge=1, le=5_000)
120
+
121
+
122
+ class AdminLoginRequest(BaseModel):
123
+ username: str = Field(..., min_length=1, max_length=120)
124
+ password: str = Field(..., min_length=1, max_length=255)
125
+
126
+
127
+ class AdminRefreshRequest(BaseModel):
128
+ refresh_token: str = Field(..., min_length=1, max_length=4096)
129
+
130
+
131
+ class LlmProviderOllamaRequest(BaseModel):
132
+ base_url: str = Field(default="http://ollama:11434", min_length=1, max_length=255)
133
+ model: str = Field(default="sabia-7b", min_length=1, max_length=120)
134
+
135
+
136
+ class LlmProviderMaritacaRequest(BaseModel):
137
+ api_url: str = Field(default="https://chat.maritaca.ai/api/chat/completions", min_length=1, max_length=255)
138
+ model: str = Field(default="sabia-4", min_length=1, max_length=120)
139
+ timeout_seconds: int = Field(default=60, ge=1, le=300)
140
+ api_key: str | None = Field(default=None, max_length=4096)
141
+ clear_api_key: bool = False
142
+
143
+
144
+ class AdminLlmSettingsUpdateRequest(BaseModel):
145
+ provider: str = Field(..., pattern="^(ollama|maritaca)$")
146
+ ollama: LlmProviderOllamaRequest
147
+ maritaca: LlmProviderMaritacaRequest
148
+
149
+
150
+ class AdminUserCreateRequest(BaseModel):
151
+ username: str = Field(..., min_length=3, max_length=120)
152
+ email: str = Field(..., min_length=3, max_length=255)
153
+ password: str = Field(..., min_length=8, max_length=255)
154
+ first_name: str = Field(default="", max_length=120)
155
+ last_name: str = Field(default="", max_length=120)
156
+ enabled: bool = True
157
+
158
+
159
+ class AdminUserMetabaseSyncRequest(BaseModel):
160
+ password: str = Field(..., min_length=8, max_length=255)
161
+
162
+
163
+ class PnpInstanceCreateRequest(BaseModel):
164
+ instance_name: str = Field(..., min_length=3, max_length=120)
165
+ selected_years: list[str] = Field(..., min_length=1)
166
+ selected_microdados_types: list[str] = Field(..., min_length=1)
167
+ schedule: str | None = Field(default=None, max_length=120)
168
+ is_active: bool = False
169
+
170
+ @model_validator(mode="after")
171
+ def validate_sources(self) -> "PnpInstanceCreateRequest":
172
+ normalized_years = [item.strip() for item in self.selected_years if isinstance(item, str) and item.strip()]
173
+ normalized_types: list[str] = []
174
+ for item in self.selected_microdados_types:
175
+ cleaned = item.strip()
176
+ if not cleaned:
177
+ continue
178
+ if cleaned not in PNP_MICRODADOS_TYPES:
179
+ raise ValueError(f"Unsupported PNP microdados type: {cleaned}")
180
+ normalized_types.append(cleaned)
181
+
182
+ if not normalized_years:
183
+ raise ValueError("At least one selected_years entry is required")
184
+ if not normalized_types:
185
+ raise ValueError("At least one selected_microdados_types entry is required")
186
+
187
+ self.selected_years = list(dict.fromkeys(normalized_years))
188
+ self.selected_microdados_types = list(dict.fromkeys(normalized_types))
189
+ return self
190
+
191
+
192
+ class PnpInstanceUpdateRequest(BaseModel):
193
+ schedule: str | None = Field(default=None, max_length=120)
194
+ is_active: bool | None = None
195
+
196
+
197
+ class PnpConnectionCreateRequest(BaseModel):
198
+ connection_name: str = Field(..., min_length=3, max_length=120)
199
+ is_active: bool = True
200
+
201
+
202
+ class PnpPipelineCreateRequest(BaseModel):
203
+ pipeline_name: str = Field(..., min_length=3, max_length=120)
204
+ connection_key: str = Field(..., min_length=3, max_length=120)
205
+ selected_years: list[str] = Field(..., min_length=1)
206
+ selected_microdados_types: list[str] = Field(..., min_length=1)
207
+ schedule: str | None = Field(default=None, max_length=120)
208
+ is_active: bool = True
209
+
210
+ @model_validator(mode="after")
211
+ def validate_sources(self) -> "PnpPipelineCreateRequest":
212
+ normalized_years = [item.strip() for item in self.selected_years if isinstance(item, str) and item.strip()]
213
+ normalized_types: list[str] = []
214
+ for item in self.selected_microdados_types:
215
+ cleaned = item.strip()
216
+ if not cleaned:
217
+ continue
218
+ if cleaned not in PNP_MICRODADOS_TYPES:
219
+ raise ValueError(f"Unsupported PNP microdados type: {cleaned}")
220
+ normalized_types.append(cleaned)
221
+
222
+ if not normalized_years:
223
+ raise ValueError("At least one selected_years entry is required")
224
+ if not normalized_types:
225
+ raise ValueError("At least one selected_microdados_types entry is required")
226
+
227
+ self.selected_years = list(dict.fromkeys(normalized_years))
228
+ self.selected_microdados_types = list(dict.fromkeys(normalized_types))
229
+ return self
230
+
231
+
232
+ app = FastAPI(title="dataif-api", version="0.4.0")
233
+
234
+ allowed_origins = [origin.strip() for origin in settings.cors_allow_origins.split(",") if origin.strip()]
235
+ app.add_middleware(
236
+ CORSMiddleware,
237
+ allow_origins=allowed_origins,
238
+ allow_credentials=True,
239
+ allow_methods=["*"],
240
+ allow_headers=["*"],
241
+ )
242
+
243
+
244
+ def _require_admin(payload: dict[str, object] | None = Depends(verify_optional_bearer)) -> dict[str, object]:
245
+ require_admin(payload)
246
+ return payload or {}
247
+
248
+
249
+ def _db_connect():
250
+ if not settings.warehouse_dsn:
251
+ raise HTTPException(status_code=500, detail="WAREHOUSE_DSN not configured")
252
+ return psycopg2.connect(settings.warehouse_dsn, cursor_factory=RealDictCursor)
253
+
254
+
255
+ def _compact_sql(statement: str) -> str:
256
+ without_block_comments = re.sub(r"/\*.*?\*/", " ", statement, flags=re.DOTALL)
257
+ without_line_comments = re.sub(r"--.*?$", " ", without_block_comments, flags=re.MULTILINE)
258
+ return re.sub(r"\s+", " ", without_line_comments).strip()
259
+
260
+
261
+ def _validate_admin_sql(statement: str) -> str:
262
+ compact = _compact_sql(statement)
263
+ normalized = compact.lower()
264
+
265
+ if not normalized:
266
+ raise HTTPException(status_code=422, detail="SQL vazio.")
267
+ if not (normalized.startswith("select") or normalized.startswith("with")):
268
+ raise HTTPException(status_code=422, detail="Apenas SELECT ou WITH sao permitidos.")
269
+ if ";" in compact.rstrip(";"):
270
+ raise HTTPException(status_code=422, detail="Apenas uma instrucao SQL e permitida.")
271
+
272
+ forbidden_patterns = [
273
+ r"\binsert\b",
274
+ r"\bupdate\b",
275
+ r"\bdelete\b",
276
+ r"\bdrop\b",
277
+ r"\balter\b",
278
+ r"\btruncate\b",
279
+ r"\bcreate\b",
280
+ r"\bgrant\b",
281
+ r"\brevoke\b",
282
+ r"\bcopy\b",
283
+ r"\bcall\b",
284
+ r"\bdo\b",
285
+ r"\bexecute\b",
286
+ r"\bvacuum\b",
287
+ r"\banalyze\b",
288
+ r"\bset\b",
289
+ r"\breset\b",
290
+ ]
291
+ for pattern in forbidden_patterns:
292
+ if re.search(pattern, normalized):
293
+ raise HTTPException(status_code=422, detail="A consulta contem palavra-chave nao permitida.")
294
+
295
+ return compact.rstrip(";")
296
+
297
+
298
+ def _bounded_admin_sql(statement: str, max_rows: int) -> str:
299
+ return f"SELECT * FROM ({statement}) AS dataif_admin_sql_result LIMIT {max_rows + 1}"
300
+
301
+
302
+ def _admin_sql_catalog() -> list[dict[str, object]]:
303
+ with _db_connect() as conn, conn.cursor() as cur:
304
+ cur.execute(
305
+ """
306
+ WITH relations AS (
307
+ SELECT
308
+ table_schema AS schema_name,
309
+ table_name AS relation_name,
310
+ CASE table_type
311
+ WHEN 'VIEW' THEN 'view'
312
+ ELSE 'table'
313
+ END AS relation_type
314
+ FROM information_schema.tables
315
+ WHERE table_schema NOT IN ('pg_catalog', 'information_schema')
316
+ AND table_type IN ('BASE TABLE', 'VIEW')
317
+ UNION ALL
318
+ SELECT
319
+ schemaname AS schema_name,
320
+ matviewname AS relation_name,
321
+ 'materialized_view' AS relation_type
322
+ FROM pg_catalog.pg_matviews
323
+ WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
324
+ )
325
+ SELECT
326
+ relations.schema_name,
327
+ relations.relation_name,
328
+ relations.relation_type,
329
+ columns.column_name
330
+ FROM relations
331
+ LEFT JOIN information_schema.columns AS columns
332
+ ON columns.table_schema = relations.schema_name
333
+ AND columns.table_name = relations.relation_name
334
+ ORDER BY relations.schema_name, relations.relation_name, columns.ordinal_position NULLS LAST;
335
+ """
336
+ )
337
+ return list(cur.fetchall())
338
+
339
+
340
+ def _metabase_dashboard_id_list() -> list[int]:
341
+ allowed: list[int] = []
342
+ for item in settings.metabase_allowed_dashboard_ids.split(","):
343
+ cleaned = item.strip()
344
+ if not cleaned:
345
+ continue
346
+ try:
347
+ dashboard_id = int(cleaned)
348
+ except ValueError as exc:
349
+ raise HTTPException(status_code=500, detail="METABASE_ALLOWED_DASHBOARD_IDS is invalid") from exc
350
+ if dashboard_id not in allowed:
351
+ allowed.append(dashboard_id)
352
+ if not allowed:
353
+ raise HTTPException(status_code=500, detail="METABASE_ALLOWED_DASHBOARD_IDS is empty")
354
+ return allowed
355
+
356
+
357
+ def _allowed_metabase_dashboard_ids() -> set[int]:
358
+ return set(_metabase_dashboard_id_list())
359
+
360
+
361
+ def _fallback_metabase_dashboard_id(allowed_ids: list[int]) -> int:
362
+ allowed = set(allowed_ids)
363
+ configured = _coerce_int(settings.metabase_default_dashboard_id)
364
+ if configured is not None:
365
+ if configured not in allowed:
366
+ raise HTTPException(status_code=500, detail="METABASE_DEFAULT_DASHBOARD_ID is not allowed")
367
+ return configured
368
+ return allowed_ids[0]
369
+
370
+
371
+ def _validate_metabase_dashboard_id(dashboard_id: int) -> None:
372
+ if dashboard_id not in _allowed_metabase_dashboard_ids():
373
+ raise HTTPException(status_code=403, detail="Dashboard id is not allowed for public embed")
374
+
375
+
376
+ def _ensure_app_settings_table(conn) -> None:
377
+ with conn.cursor() as cur:
378
+ cur.execute(
379
+ """
380
+ CREATE TABLE IF NOT EXISTS config.app_settings (
381
+ setting_key TEXT PRIMARY KEY,
382
+ setting_value JSONB NOT NULL,
383
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
384
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
385
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
386
+ )
387
+ """
388
+ )
389
+
390
+
391
+ def _read_metabase_default_dashboard_id() -> int:
392
+ allowed_ids = _metabase_dashboard_id_list()
393
+ allowed = set(allowed_ids)
394
+ try:
395
+ with _db_connect() as conn:
396
+ _ensure_app_settings_table(conn)
397
+ with conn.cursor() as cur:
398
+ cur.execute(
399
+ "SELECT setting_value FROM config.app_settings WHERE setting_key = %s",
400
+ (METABASE_DEFAULT_DASHBOARD_SETTING_KEY,),
401
+ )
402
+ row = cur.fetchone()
403
+ except HTTPException:
404
+ raise
405
+ except Exception as exc:
406
+ raise HTTPException(status_code=500, detail=f"Failed to read default Metabase dashboard: {exc}") from exc
407
+
408
+ if row:
409
+ value = row["setting_value"]
410
+ dashboard_id = _coerce_int(value.get("dashboard_id") if isinstance(value, dict) else value)
411
+ if dashboard_id is not None and dashboard_id in allowed:
412
+ return dashboard_id
413
+ return _fallback_metabase_dashboard_id(allowed_ids)
414
+
415
+
416
+ def _write_metabase_default_dashboard_id(dashboard_id: int) -> None:
417
+ _validate_metabase_dashboard_id(dashboard_id)
418
+ try:
419
+ with _db_connect() as conn:
420
+ _ensure_app_settings_table(conn)
421
+ with conn.cursor() as cur:
422
+ cur.execute(
423
+ """
424
+ INSERT INTO config.app_settings (setting_key, setting_value)
425
+ VALUES (%s, %s::jsonb)
426
+ ON CONFLICT (setting_key) DO UPDATE
427
+ SET setting_value = EXCLUDED.setting_value,
428
+ updated_at = NOW()
429
+ """,
430
+ (
431
+ METABASE_DEFAULT_DASHBOARD_SETTING_KEY,
432
+ json.dumps({"dashboard_id": dashboard_id}),
433
+ ),
434
+ )
435
+ except HTTPException:
436
+ raise
437
+ except Exception as exc:
438
+ raise HTTPException(status_code=500, detail=f"Failed to save default Metabase dashboard: {exc}") from exc
439
+
440
+
441
+ def _signed_metabase_dashboard_payload(dashboard_id: int, params: dict[str, object] | None = None) -> dict[str, object]:
442
+ _validate_metabase_dashboard_id(dashboard_id)
443
+ signed_url = build_signed_dashboard_url(
444
+ site_url=settings.metabase_site_url,
445
+ embed_secret=settings.metabase_embed_secret,
446
+ dashboard_id=dashboard_id,
447
+ params=params or {},
448
+ )
449
+ return {"dashboard_id": dashboard_id, "signed_url": signed_url}
450
+
451
+
452
+ def _read_app_setting(setting_key: str) -> dict[str, Any] | None:
453
+ try:
454
+ with _db_connect() as conn:
455
+ _ensure_app_settings_table(conn)
456
+ with conn.cursor() as cur:
457
+ cur.execute(
458
+ "SELECT setting_value FROM config.app_settings WHERE setting_key = %s",
459
+ (setting_key,),
460
+ )
461
+ row = cur.fetchone()
462
+ except HTTPException:
463
+ raise
464
+ except Exception as exc:
465
+ raise HTTPException(status_code=500, detail=f"Failed to read app setting {setting_key}: {exc}") from exc
466
+
467
+ if not row:
468
+ return None
469
+ value = row["setting_value"]
470
+ return value if isinstance(value, dict) else None
471
+
472
+
473
+ def _write_app_setting(setting_key: str, setting_value: dict[str, Any]) -> None:
474
+ try:
475
+ with _db_connect() as conn:
476
+ _ensure_app_settings_table(conn)
477
+ with conn.cursor() as cur:
478
+ cur.execute(
479
+ """
480
+ INSERT INTO config.app_settings (setting_key, setting_value)
481
+ VALUES (%s, %s::jsonb)
482
+ ON CONFLICT (setting_key) DO UPDATE
483
+ SET setting_value = EXCLUDED.setting_value,
484
+ updated_at = NOW()
485
+ """,
486
+ (setting_key, json.dumps(setting_value)),
487
+ )
488
+ except HTTPException:
489
+ raise
490
+ except Exception as exc:
491
+ raise HTTPException(status_code=500, detail=f"Failed to save app setting {setting_key}: {exc}") from exc
492
+
493
+
494
+ def _default_vanna_llm_settings() -> dict[str, Any]:
495
+ return {
496
+ "provider": "ollama",
497
+ "ollama": {
498
+ "base_url": "http://ollama:11434",
499
+ "model": "sabia-7b",
500
+ },
501
+ "maritaca": {
502
+ "api_url": "https://chat.maritaca.ai/api/chat/completions",
503
+ "api_key": "",
504
+ "model": "sabia-4",
505
+ "timeout_seconds": 60,
506
+ },
507
+ }
508
+
509
+
510
+ def _vanna_llm_settings_from_env() -> dict[str, Any]:
511
+ defaults = _default_vanna_llm_settings()
512
+ return {
513
+ "provider": str(settings.vanna_llm_provider).strip().lower() or defaults["provider"],
514
+ "ollama": {
515
+ "base_url": str(settings.vanna_ollama_base_url).strip() or defaults["ollama"]["base_url"],
516
+ "model": str(settings.vanna_ollama_model).strip() or defaults["ollama"]["model"],
517
+ },
518
+ "maritaca": {
519
+ "api_url": str(settings.vanna_maritaca_api_url).strip() or defaults["maritaca"]["api_url"],
520
+ "api_key": str(settings.vanna_maritaca_api_key),
521
+ "model": str(settings.vanna_maritaca_model).strip() or defaults["maritaca"]["model"],
522
+ "timeout_seconds": _coerce_positive_int(
523
+ settings.vanna_maritaca_timeout_seconds,
524
+ defaults["maritaca"]["timeout_seconds"],
525
+ ),
526
+ },
527
+ }
528
+
529
+
530
+ def _effective_vanna_llm_settings() -> dict[str, Any]:
531
+ return _effective_global_vanna_llm_settings()
532
+
533
+
534
+ def _effective_global_vanna_llm_settings() -> dict[str, Any]:
535
+ effective = _vanna_llm_settings_from_env()
536
+ persisted = _read_app_setting(VANNA_LLM_SETTING_KEY)
537
+ if not isinstance(persisted, dict):
538
+ return effective
539
+
540
+ provider = str(persisted.get("provider") or effective["provider"]).strip().lower() or effective["provider"]
541
+ ollama = persisted.get("ollama") if isinstance(persisted.get("ollama"), dict) else {}
542
+ maritaca = persisted.get("maritaca") if isinstance(persisted.get("maritaca"), dict) else {}
543
+ return {
544
+ "provider": provider,
545
+ "ollama": {
546
+ "base_url": str(ollama.get("base_url") or effective["ollama"]["base_url"]).strip() or effective["ollama"]["base_url"],
547
+ "model": str(ollama.get("model") or effective["ollama"]["model"]).strip() or effective["ollama"]["model"],
548
+ },
549
+ "maritaca": {
550
+ "api_url": str(maritaca.get("api_url") or effective["maritaca"]["api_url"]).strip()
551
+ or effective["maritaca"]["api_url"],
552
+ "api_key": str(maritaca.get("api_key") or effective["maritaca"]["api_key"]),
553
+ "model": str(maritaca.get("model") or effective["maritaca"]["model"]).strip() or effective["maritaca"]["model"],
554
+ "timeout_seconds": _coerce_positive_int(
555
+ maritaca.get("timeout_seconds"),
556
+ effective["maritaca"]["timeout_seconds"],
557
+ ),
558
+ },
559
+ }
560
+
561
+
562
+ def _user_vanna_llm_setting_key(payload: dict[str, object] | None) -> str | None:
563
+ if not payload:
564
+ return None
565
+ subject = str(payload.get("sub") or "").strip()
566
+ if subject:
567
+ return f"{VANNA_USER_LLM_SETTING_PREFIX}{subject}"
568
+
569
+ fallback = str(payload.get("preferred_username") or payload.get("email") or "").strip().lower()
570
+ if not fallback:
571
+ return None
572
+ safe_fallback = re.sub(r"[^a-z0-9_.@-]+", "_", fallback)
573
+ return f"{VANNA_USER_LLM_SETTING_PREFIX}{safe_fallback}"
574
+
575
+
576
+ def _read_user_vanna_llm_settings(payload: dict[str, object] | None) -> dict[str, Any] | None:
577
+ setting_key = _user_vanna_llm_setting_key(payload)
578
+ if not setting_key:
579
+ return None
580
+ value = _read_app_setting(setting_key)
581
+ return value if isinstance(value, dict) else None
582
+
583
+
584
+ def _effective_vanna_llm_settings_for_user(payload: dict[str, object] | None) -> dict[str, Any]:
585
+ config = _effective_global_vanna_llm_settings()
586
+ scope = "global" if str(config["maritaca"].get("api_key") or "").strip() else "empty"
587
+ personal = _read_user_vanna_llm_settings(payload)
588
+ if isinstance(personal, dict):
589
+ maritaca = personal.get("maritaca") if isinstance(personal.get("maritaca"), dict) else {}
590
+ personal_key = str(maritaca.get("api_key") or "")
591
+ if personal_key.strip():
592
+ config = {
593
+ **config,
594
+ "maritaca": {
595
+ **config["maritaca"],
596
+ "api_key": personal_key,
597
+ },
598
+ }
599
+ scope = "personal"
600
+ config["_maritaca_api_key_scope"] = scope
601
+ return config
602
+
603
+
604
+ def _serialize_vanna_llm_settings_public(config: dict[str, Any]) -> dict[str, Any]:
605
+ maritaca = config["maritaca"]
606
+ masked_key = _mask_secret(str(maritaca.get("api_key") or ""))
607
+ key_scope = str(config.get("_maritaca_api_key_scope") or ("configured" if masked_key else "empty"))
608
+ return {
609
+ "provider": config["provider"],
610
+ "ollama": {
611
+ "base_url": config["ollama"]["base_url"],
612
+ "model": config["ollama"]["model"],
613
+ },
614
+ "maritaca": {
615
+ "api_url": maritaca["api_url"],
616
+ "model": maritaca["model"],
617
+ "timeout_seconds": maritaca["timeout_seconds"],
618
+ "has_api_key": bool(str(maritaca.get("api_key") or "").strip()),
619
+ "api_key_scope": key_scope,
620
+ "has_personal_api_key": key_scope == "personal",
621
+ "masked_api_key": masked_key,
622
+ },
623
+ }
624
+
625
+
626
+ def _persist_vanna_llm_settings(
627
+ payload: AdminLlmSettingsUpdateRequest,
628
+ admin_payload: dict[str, object] | None = None,
629
+ ) -> dict[str, Any]:
630
+ current_global = _effective_global_vanna_llm_settings()
631
+ next_global = {
632
+ "provider": payload.provider.strip().lower(),
633
+ "ollama": {
634
+ "base_url": payload.ollama.base_url.strip(),
635
+ "model": payload.ollama.model.strip(),
636
+ },
637
+ "maritaca": {
638
+ "api_url": payload.maritaca.api_url.strip(),
639
+ "model": payload.maritaca.model.strip(),
640
+ "timeout_seconds": int(payload.maritaca.timeout_seconds),
641
+ "api_key": current_global["maritaca"]["api_key"],
642
+ },
643
+ }
644
+ _write_app_setting(VANNA_LLM_SETTING_KEY, next_global)
645
+
646
+ user_setting_key = _user_vanna_llm_setting_key(admin_payload)
647
+ if user_setting_key:
648
+ if payload.maritaca.clear_api_key:
649
+ _write_app_setting(user_setting_key, {"maritaca": {"api_key": ""}})
650
+ elif payload.maritaca.api_key is not None:
651
+ _write_app_setting(user_setting_key, {"maritaca": {"api_key": payload.maritaca.api_key.strip()}})
652
+ elif payload.maritaca.clear_api_key:
653
+ next_global["maritaca"]["api_key"] = ""
654
+ _write_app_setting(VANNA_LLM_SETTING_KEY, next_global)
655
+ elif payload.maritaca.api_key is not None:
656
+ next_global["maritaca"]["api_key"] = payload.maritaca.api_key.strip()
657
+ _write_app_setting(VANNA_LLM_SETTING_KEY, next_global)
658
+
659
+ return _effective_vanna_llm_settings_for_user(admin_payload)
660
+
661
+
662
+ def _vanna_llm_override_payload(config: dict[str, Any]) -> dict[str, object]:
663
+ return {
664
+ "provider": config["provider"],
665
+ "ollama": {
666
+ "base_url": config["ollama"]["base_url"],
667
+ "model": config["ollama"]["model"],
668
+ },
669
+ "maritaca": {
670
+ "api_url": config["maritaca"]["api_url"],
671
+ "api_key": config["maritaca"]["api_key"],
672
+ "model": config["maritaca"]["model"],
673
+ "timeout_seconds": config["maritaca"]["timeout_seconds"],
674
+ },
675
+ }
676
+
677
+
678
+ def _vanna_provider_status(config: dict[str, Any]) -> dict[str, Any]:
679
+ provider = str(config["provider"]).strip().lower()
680
+ if provider == "maritaca":
681
+ has_key = bool(str(config["maritaca"].get("api_key") or "").strip())
682
+ return {
683
+ "provider": provider,
684
+ "available": has_key,
685
+ "detail": "Maritaca API key configured" if has_key else "Maritaca API key is not configured",
686
+ }
687
+
688
+ target_url = f"{str(config['ollama']['base_url']).rstrip('/')}/api/tags"
689
+ try:
690
+ with httpx.Client(timeout=5, follow_redirects=True) as client:
691
+ response = client.get(target_url)
692
+ except httpx.RequestError as exc:
693
+ return {"provider": provider, "available": False, "detail": f"Ollama is not reachable: {exc}"}
694
+
695
+ available = response.status_code < 500
696
+ return {
697
+ "provider": provider,
698
+ "available": available,
699
+ "detail": f"Ollama responded with HTTP {response.status_code}" if available else f"Ollama returned HTTP {response.status_code}",
700
+ }
701
+
702
+
703
+ def _mask_secret(value: str) -> str:
704
+ secret = value.strip()
705
+ if not secret:
706
+ return ""
707
+ if len(secret) <= 6:
708
+ return "*" * len(secret)
709
+ return f"{secret[:3]}{'*' * max(len(secret) - 6, 1)}{secret[-3:]}"
710
+
711
+
712
+ def _coerce_positive_int(value: object, default: int) -> int:
713
+ if isinstance(value, bool):
714
+ return int(value) or default
715
+ if isinstance(value, int):
716
+ return value if value > 0 else default
717
+ if isinstance(value, float):
718
+ parsed = int(value)
719
+ return parsed if parsed > 0 else default
720
+ if isinstance(value, str):
721
+ normalized = value.strip()
722
+ if normalized.isdigit():
723
+ parsed = int(normalized)
724
+ return parsed if parsed > 0 else default
725
+ return default
726
+
727
+
728
+ def _keycloak_admin_client() -> KeycloakAdminClient:
729
+ return KeycloakAdminClient(
730
+ base_url=settings.keycloak_url,
731
+ realm=settings.keycloak_realm,
732
+ admin_realm=settings.keycloak_admin_realm,
733
+ admin_client_id=settings.keycloak_admin_client_id,
734
+ admin_username=settings.keycloak_admin_username,
735
+ admin_password=settings.keycloak_admin_password,
736
+ timeout_seconds=max(settings.nilo_timeout_seconds, 30.0),
737
+ )
738
+
739
+
740
+ def _metabase_admin_client() -> MetabaseAdminClient:
741
+ return MetabaseAdminClient(
742
+ base_url=settings.metabase_api_url,
743
+ admin_email=settings.metabase_admin_email,
744
+ admin_password=settings.metabase_admin_password,
745
+ timeout_seconds=max(settings.nilo_timeout_seconds, 30.0),
746
+ )
747
+
748
+
749
+ def _list_admin_users_with_metabase_state() -> list[dict[str, Any]]:
750
+ keycloak_users = _keycloak_admin_client().list_admin_users()
751
+ metabase_users = {
752
+ str(item.get("email") or "").strip().lower(): item
753
+ for item in _metabase_admin_client().list_admin_users()
754
+ if str(item.get("email") or "").strip()
755
+ }
756
+ items: list[dict[str, Any]] = []
757
+ for user in keycloak_users:
758
+ email_key = str(user.get("email") or "").strip().lower()
759
+ metabase_user = metabase_users.get(email_key)
760
+ items.append(
761
+ {
762
+ **user,
763
+ "metabase_synced": metabase_user is not None,
764
+ "metabase_user_id": metabase_user.get("id") if metabase_user else None,
765
+ }
766
+ )
767
+ return items
768
+
769
+
770
+ def _keycloak_openid_url(path: str) -> str:
771
+ return f"{settings.keycloak_url.rstrip('/')}/realms/{settings.keycloak_realm}/protocol/openid-connect/{path.lstrip('/')}"
772
+
773
+
774
+ def _request_keycloak_token(form_fields: dict[str, str]) -> dict[str, Any]:
775
+ payload = {"client_id": settings.keycloak_client_id, **form_fields}
776
+ if settings.keycloak_client_secret:
777
+ payload["client_secret"] = settings.keycloak_client_secret
778
+
779
+ try:
780
+ with httpx.Client(timeout=max(settings.nilo_timeout_seconds, 30.0), follow_redirects=True) as client:
781
+ response = client.post(
782
+ _keycloak_openid_url("token"),
783
+ headers={"Content-Type": "application/x-www-form-urlencoded"},
784
+ data=payload,
785
+ )
786
+ except Exception as exc:
787
+ raise HTTPException(status_code=502, detail=f"Keycloak unavailable: {exc}") from exc
788
+
789
+ if response.status_code >= 400:
790
+ detail = response.text
791
+ try:
792
+ error_payload = response.json()
793
+ except ValueError:
794
+ error_payload = None
795
+ if isinstance(error_payload, dict):
796
+ detail = str(error_payload.get("error_description") or error_payload.get("error") or detail)
797
+ status_code = 401 if response.status_code in {400, 401} else response.status_code
798
+ raise HTTPException(status_code=status_code, detail=f"Falha ao autenticar no Keycloak: {detail}")
799
+
800
+ try:
801
+ token_payload = response.json()
802
+ except ValueError as exc:
803
+ raise HTTPException(status_code=502, detail="Keycloak returned a non-JSON token payload") from exc
804
+
805
+ if not isinstance(token_payload, dict) or not token_payload.get("access_token"):
806
+ raise HTTPException(status_code=502, detail="Keycloak returned an invalid token payload")
807
+ return token_payload
808
+
809
+
810
+ def _slugify_instance_name(value: str) -> str:
811
+ normalized = "".join(char.lower() if char.isalnum() else "_" for char in value.strip())
812
+ collapsed = "_".join(part for part in normalized.split("_") if part)
813
+ return collapsed[:80] or "pnp_instance"
814
+
815
+
816
+ def _build_pnp_instance_key(instance_name: str) -> str:
817
+ return f"pnp_{_slugify_instance_name(instance_name)}"
818
+
819
+
820
+ def _build_pnp_connection_key(connection_name: str) -> str:
821
+ return f"pnp_conn_{_slugify_instance_name(connection_name)}"
822
+
823
+
824
+ def _build_pnp_pipeline_key(pipeline_name: str) -> str:
825
+ return f"pnp_pipe_{_slugify_instance_name(pipeline_name)}"
826
+
827
+
828
+ def _normalize_pipeline_schedule(schedule: str | None) -> str | None:
829
+ if schedule is None:
830
+ return None
831
+ normalized = schedule.strip()
832
+ if not normalized:
833
+ return None
834
+ if not croniter.is_valid(normalized):
835
+ raise HTTPException(status_code=422, detail="Invalid pipeline schedule cron expression")
836
+ return normalized
837
+
838
+
839
+ def _load_pnp_powerbi_catalog_or_502() -> dict[str, Any]:
840
+ ttl_seconds = max(float(settings.pnp_catalog_cache_ttl_seconds), 0.0)
841
+ cached_catalog = _PNP_CATALOG_CACHE.get("value")
842
+ loaded_at = float(_PNP_CATALOG_CACHE.get("loaded_at") or 0.0)
843
+ now = monotonic()
844
+
845
+ if cached_catalog is not None and ttl_seconds > 0 and (now - loaded_at) < ttl_seconds:
846
+ return cached_catalog
847
+
848
+ try:
849
+ with _PNP_CATALOG_CACHE_LOCK:
850
+ cached_catalog = _PNP_CATALOG_CACHE.get("value")
851
+ loaded_at = float(_PNP_CATALOG_CACHE.get("loaded_at") or 0.0)
852
+ now = monotonic()
853
+
854
+ if cached_catalog is not None and ttl_seconds > 0 and (now - loaded_at) < ttl_seconds:
855
+ return cached_catalog
856
+
857
+ catalog = load_public_microdados_catalog(timeout_seconds=max(float(settings.nilo_timeout_seconds), 30.0))
858
+ _PNP_CATALOG_CACHE["value"] = catalog
859
+ _PNP_CATALOG_CACHE["loaded_at"] = monotonic()
860
+ return catalog
861
+ except Exception as exc:
862
+ stale_catalog = _PNP_CATALOG_CACHE.get("value")
863
+ if stale_catalog is not None:
864
+ return stale_catalog
865
+ raise HTTPException(status_code=502, detail=f"Falha ao consultar o catálogo público de microdados da PNP: {exc}") from exc
866
+
867
+
868
+ def _validate_pnp_selection_against_catalog(
869
+ *,
870
+ selected_years: list[str],
871
+ selected_microdados_types: list[str],
872
+ catalog: dict[str, Any],
873
+ ) -> None:
874
+ available_years = {str(item).strip() for item in (catalog.get("available_years") or []) if isinstance(item, str)}
875
+ missing_years = [item for item in selected_years if item not in available_years]
876
+ if missing_years:
877
+ raise HTTPException(
878
+ status_code=422,
879
+ detail=f"Anos indisponiveis no catalogo publico da PNP: {', '.join(missing_years)}",
880
+ )
881
+
882
+ types_by_year = {
883
+ str(year): {str(item).strip() for item in items if isinstance(item, str)}
884
+ for year, items in dict(catalog.get("types_by_year") or {}).items()
885
+ }
886
+ for year in selected_years:
887
+ missing_types = [item for item in selected_microdados_types if item not in types_by_year.get(year, set())]
888
+ if missing_types:
889
+ raise HTTPException(
890
+ status_code=422,
891
+ detail=f"Tipos de microdados indisponiveis para o ano {year}: {', '.join(missing_types)}",
892
+ )
893
+
894
+
895
+ def _normalize_pnp_selected_downloads(items: list[dict[str, Any]] | tuple[dict[str, Any], ...] | None) -> list[dict[str, str]]:
896
+ normalized: list[dict[str, str]] = []
897
+ seen: set[tuple[str, str, str]] = set()
898
+
899
+ for item in items or []:
900
+ if not isinstance(item, dict):
901
+ continue
902
+ ano_base = str(item.get("ano_base") or "").strip()
903
+ tipo_microdados = str(item.get("tipo_microdados") or "").strip()
904
+ microdados_url = str(item.get("microdados_url") or "").strip()
905
+ if not ano_base or not tipo_microdados or not microdados_url:
906
+ continue
907
+ key = (ano_base, tipo_microdados, microdados_url)
908
+ if key in seen:
909
+ continue
910
+ seen.add(key)
911
+ normalized.append(
912
+ {
913
+ "ano_base": ano_base,
914
+ "tipo_microdados": tipo_microdados,
915
+ "microdados_url": microdados_url,
916
+ }
917
+ )
918
+
919
+ return normalized
920
+
921
+
922
+ def _resolve_pnp_selected_downloads(
923
+ *,
924
+ selected_years: list[str],
925
+ selected_microdados_types: list[str],
926
+ catalog: dict[str, Any],
927
+ ) -> list[dict[str, str]]:
928
+ year_rank = {str(item).strip(): index for index, item in enumerate(catalog.get("available_years") or [])}
929
+ type_rank = {item: index for index, item in enumerate(PNP_MICRODADOS_TYPES)}
930
+ selected_years_set = set(selected_years)
931
+ selected_types_set = set(selected_microdados_types)
932
+
933
+ filtered = _normalize_pnp_selected_downloads(
934
+ [
935
+ item
936
+ for item in (catalog.get("items") or [])
937
+ if isinstance(item, dict)
938
+ and str(item.get("ano_base") or "").strip() in selected_years_set
939
+ and str(item.get("tipo_microdados") or "").strip() in selected_types_set
940
+ ]
941
+ )
942
+ filtered.sort(
943
+ key=lambda item: (
944
+ year_rank.get(item["ano_base"], len(year_rank)),
945
+ type_rank.get(item["tipo_microdados"], 999),
946
+ item["microdados_url"],
947
+ )
948
+ )
949
+
950
+ expected_pairs = {(year, microdados_type) for year in selected_years for microdados_type in selected_microdados_types}
951
+ resolved_pairs = {(item["ano_base"], item["tipo_microdados"]) for item in filtered}
952
+ missing_pairs = sorted(expected_pairs - resolved_pairs)
953
+ if missing_pairs:
954
+ detail = ", ".join(f"{year} / {microdados_type}" for year, microdados_type in missing_pairs)
955
+ raise HTTPException(
956
+ status_code=422,
957
+ detail=f"O catálogo público nao expôs links de download para o recorte selecionado: {detail}",
958
+ )
959
+
960
+ return filtered
961
+
962
+
963
+ def _build_pnp_connection_payload(
964
+ connection_key: str,
965
+ connection_name: str,
966
+ page_url: str,
967
+ ) -> dict[str, Any]:
968
+ request_params: dict[str, Any] = {
969
+ "mode": "powerbi_microdados",
970
+ "entity_type": PNP_CONNECTION_ENTITY,
971
+ "connection_key": connection_key,
972
+ "connection_name": connection_name,
973
+ "selected_source_label": PNP_POWERBI_SOURCE_LABEL,
974
+ "selected_source_group": PNP_POWERBI_GROUP_LABEL,
975
+ "source_path": "powerbi_microdados",
976
+ }
977
+
978
+ return {
979
+ "endpoint_key": f"{connection_key}__connection",
980
+ "description": f"{connection_name} - conexão PNP",
981
+ "page_url": page_url,
982
+ "api_endpoint_url": None,
983
+ "csv_url": None,
984
+ "dictionary_url": None,
985
+ "request_params": request_params,
986
+ }
987
+
988
+
989
+ def _build_pnp_pipeline_payload(
990
+ pipeline_key: str,
991
+ pipeline_name: str,
992
+ connection_key: str,
993
+ connection_name: str,
994
+ page_url: str,
995
+ selected_years: list[str],
996
+ selected_microdados_types: list[str],
997
+ selected_downloads: list[dict[str, str]],
998
+ schedule: str | None,
999
+ ) -> dict[str, Any]:
1000
+ request_params: dict[str, Any] = {
1001
+ "mode": "powerbi_microdados",
1002
+ "entity_type": PNP_PIPELINE_ENTITY,
1003
+ "pipeline_key": pipeline_key,
1004
+ "pipeline_name": pipeline_name,
1005
+ "connection_key": connection_key,
1006
+ "connection_name": connection_name,
1007
+ "instance_key": pipeline_key,
1008
+ "instance_name": pipeline_name,
1009
+ "selected_years": list(selected_years),
1010
+ "selected_microdados_types": list(selected_microdados_types),
1011
+ "selected_downloads": _normalize_pnp_selected_downloads(selected_downloads),
1012
+ "selected_source_label": PNP_POWERBI_SOURCE_LABEL,
1013
+ "selected_source_group": PNP_POWERBI_GROUP_LABEL,
1014
+ "source_path": "powerbi_microdados",
1015
+ }
1016
+ if schedule and schedule.strip():
1017
+ request_params["schedule"] = schedule.strip()
1018
+
1019
+ return {
1020
+ "endpoint_key": f"{pipeline_key}__powerbi_microdados",
1021
+ "description": f"{pipeline_name} - {PNP_POWERBI_SOURCE_LABEL}",
1022
+ "page_url": page_url,
1023
+ "api_endpoint_url": None,
1024
+ "csv_url": None,
1025
+ "dictionary_url": None,
1026
+ "request_params": request_params,
1027
+ }
1028
+
1029
+
1030
+ def _row_entity_type(request_params: dict[str, Any]) -> str:
1031
+ entity_type = str(request_params.get("entity_type") or "").strip().lower()
1032
+ if entity_type in {PNP_CONNECTION_ENTITY, PNP_PIPELINE_ENTITY}:
1033
+ return entity_type
1034
+ if request_params.get("selected_years") or request_params.get("selected_microdados_types"):
1035
+ return PNP_PIPELINE_ENTITY
1036
+ return PNP_CONNECTION_ENTITY
1037
+
1038
+
1039
+ def _is_deleted_row(request_params: dict[str, Any]) -> bool:
1040
+ return bool(request_params.get("deleted"))
1041
+
1042
+
1043
+ def _group_pnp_connections(
1044
+ rows: list[dict[str, Any]],
1045
+ *,
1046
+ include_deleted: bool = False,
1047
+ include_virtual: bool = True,
1048
+ ) -> list[dict[str, Any]]:
1049
+ grouped: dict[str, dict[str, Any]] = {}
1050
+
1051
+ for row in rows:
1052
+ request_params = dict(row.get("request_params") or {})
1053
+ if str(request_params.get("mode") or "").strip().lower() != "powerbi_microdados":
1054
+ continue
1055
+ if not include_deleted and _is_deleted_row(request_params):
1056
+ continue
1057
+
1058
+ entity_type = _row_entity_type(request_params)
1059
+ if entity_type == PNP_CONNECTION_ENTITY:
1060
+ connection_key = str(request_params.get("connection_key") or "").strip()
1061
+ connection_name = str(request_params.get("connection_name") or connection_key).strip()
1062
+ elif include_virtual:
1063
+ connection_key = str(request_params.get("connection_key") or request_params.get("instance_key") or "").strip()
1064
+ connection_name = str(
1065
+ request_params.get("connection_name") or request_params.get("instance_name") or connection_key
1066
+ ).strip()
1067
+ else:
1068
+ continue
1069
+
1070
+ if not connection_key:
1071
+ continue
1072
+
1073
+ connection = grouped.setdefault(
1074
+ connection_key,
1075
+ {
1076
+ "connection_key": connection_key,
1077
+ "connection_name": connection_name or connection_key,
1078
+ "connector_id": "pnp",
1079
+ "page_url": row.get("page_url"),
1080
+ "is_active": False,
1081
+ "validation_status": "pending",
1082
+ "validation_message": "conexão sem validação recente.",
1083
+ "pipeline_count": 0,
1084
+ "pipelines": [],
1085
+ "created_at": row.get("created_at"),
1086
+ "updated_at": row.get("updated_at"),
1087
+ },
1088
+ )
1089
+
1090
+ connection["is_active"] = bool(connection["is_active"] or row.get("is_active"))
1091
+ if row.get("page_url"):
1092
+ connection["page_url"] = row.get("page_url")
1093
+ if row.get("updated_at") and (connection["updated_at"] is None or row.get("updated_at") > connection["updated_at"]):
1094
+ connection["updated_at"] = row.get("updated_at")
1095
+
1096
+ if entity_type == PNP_PIPELINE_ENTITY:
1097
+ pipeline_key = str(request_params.get("pipeline_key") or request_params.get("instance_key") or "").strip()
1098
+ pipeline_id = str(request_params.get("pipeline_id") or "").strip()
1099
+ pipeline_name = str(request_params.get("pipeline_name") or request_params.get("instance_name") or pipeline_key).strip()
1100
+ if pipeline_key and pipeline_key not in {item["pipeline_key"] for item in connection["pipelines"]}:
1101
+ connection["pipelines"].append(
1102
+ {
1103
+ "pipeline_id": pipeline_id or None,
1104
+ "pipeline_key": pipeline_key,
1105
+ "pipeline_name": pipeline_name or pipeline_key,
1106
+ }
1107
+ )
1108
+
1109
+ for connection in grouped.values():
1110
+ connection["pipelines"].sort(key=lambda item: item["pipeline_name"].lower())
1111
+ connection["pipeline_count"] = len(connection["pipelines"])
1112
+
1113
+ return sorted(grouped.values(), key=lambda item: item["connection_name"].lower())
1114
+
1115
+
1116
+ def _group_pnp_instances(rows: list[dict[str, Any]], *, include_deleted: bool = False) -> list[dict[str, Any]]:
1117
+ grouped: dict[str, dict[str, Any]] = {}
1118
+
1119
+ for row in rows:
1120
+ request_params = dict(row.get("request_params") or {})
1121
+ ingestion_mode = str(request_params.get("mode") or "").strip().lower()
1122
+ if ingestion_mode != "powerbi_microdados":
1123
+ continue
1124
+ if _row_entity_type(request_params) != PNP_PIPELINE_ENTITY:
1125
+ continue
1126
+ if not include_deleted and _is_deleted_row(request_params):
1127
+ continue
1128
+
1129
+ instance_key = str(request_params.get("pipeline_key") or request_params.get("instance_key") or "").strip()
1130
+ if not instance_key:
1131
+ continue
1132
+
1133
+ instance = grouped.setdefault(
1134
+ instance_key,
1135
+ {
1136
+ "pipeline_id": str(request_params.get("pipeline_id") or "").strip() or None,
1137
+ "instance_key": instance_key,
1138
+ "instance_name": str(request_params.get("pipeline_name") or request_params.get("instance_name") or instance_key),
1139
+ "connector_id": "pnp",
1140
+ "ingestion_mode": "powerbi_microdados",
1141
+ "connection_key": str(request_params.get("connection_key") or instance_key),
1142
+ "connection_name": str(
1143
+ request_params.get("connection_name") or request_params.get("instance_name") or instance_key
1144
+ ),
1145
+ "schedule": request_params.get("schedule"),
1146
+ "is_active": False,
1147
+ "source_count": 0,
1148
+ "selection_count": 0,
1149
+ "download_count": 0,
1150
+ "selected_years": [],
1151
+ "selected_microdados_types": [],
1152
+ "selected_downloads": [],
1153
+ "selected_endpoints": [],
1154
+ "endpoint_tables": [],
1155
+ "endpoints": [],
1156
+ "created_at": row.get("created_at"),
1157
+ "updated_at": row.get("updated_at"),
1158
+ },
1159
+ )
1160
+
1161
+ selected_years = [
1162
+ str(item).strip()
1163
+ for item in (request_params.get("selected_years") or [])
1164
+ if isinstance(item, str) and item.strip()
1165
+ ]
1166
+ selected_microdados_types = [
1167
+ str(item).strip()
1168
+ for item in (request_params.get("selected_microdados_types") or [])
1169
+ if isinstance(item, str) and item.strip()
1170
+ ]
1171
+ selected_downloads = _normalize_pnp_selected_downloads(request_params.get("selected_downloads"))
1172
+ selected_endpoints = [
1173
+ str(item).strip()
1174
+ for item in (request_params.get("selected_endpoints") or [])
1175
+ if isinstance(item, str) and item.strip()
1176
+ ]
1177
+ endpoint_tables = [
1178
+ dict(item)
1179
+ for item in (request_params.get("endpoint_tables") or [])
1180
+ if isinstance(item, dict)
1181
+ ]
1182
+
1183
+ instance["is_active"] = bool(instance["is_active"] or row.get("is_active"))
1184
+ if request_params.get("pipeline_id"):
1185
+ instance["pipeline_id"] = str(request_params.get("pipeline_id"))
1186
+ if request_params.get("schedule"):
1187
+ instance["schedule"] = request_params.get("schedule")
1188
+ if row.get("updated_at") and (instance["updated_at"] is None or row.get("updated_at") > instance["updated_at"]):
1189
+ instance["updated_at"] = row.get("updated_at")
1190
+
1191
+ instance["selected_years"] = sorted({*instance["selected_years"], *selected_years}, reverse=True)
1192
+ instance["selected_microdados_types"] = sorted(
1193
+ {*instance["selected_microdados_types"], *selected_microdados_types},
1194
+ key=lambda item: (PNP_MICRODADOS_TYPES.index(item) if item in PNP_MICRODADOS_TYPES else 999, item),
1195
+ )
1196
+ instance["selected_downloads"] = _normalize_pnp_selected_downloads([*instance["selected_downloads"], *selected_downloads])
1197
+ instance["selected_endpoints"] = sorted({*instance["selected_endpoints"], *selected_endpoints})
1198
+ existing_endpoint_keys = {str(item.get("endpoint_key") or "") for item in instance["endpoint_tables"]}
1199
+ for endpoint_table in endpoint_tables:
1200
+ endpoint_key = str(endpoint_table.get("endpoint_key") or "").strip()
1201
+ if endpoint_key and endpoint_key not in existing_endpoint_keys:
1202
+ instance["endpoint_tables"].append(endpoint_table)
1203
+ existing_endpoint_keys.add(endpoint_key)
1204
+ instance["endpoints"].append(
1205
+ {
1206
+ "id": row.get("id"),
1207
+ "endpoint_key": row.get("endpoint_key"),
1208
+ "page_url": row.get("page_url"),
1209
+ "is_active": row.get("is_active"),
1210
+ "selected_years": selected_years,
1211
+ "selected_microdados_types": selected_microdados_types,
1212
+ "selected_downloads": selected_downloads,
1213
+ "source_label": PNP_POWERBI_SOURCE_LABEL,
1214
+ "source_group": PNP_POWERBI_GROUP_LABEL,
1215
+ "source_path": "powerbi_microdados",
1216
+ }
1217
+ )
1218
+
1219
+ for instance in grouped.values():
1220
+ instance["endpoint_tables"].sort(key=lambda item: str(item.get("endpoint_key") or ""))
1221
+ instance["endpoints"].sort(key=lambda item: item["endpoint_key"])
1222
+ instance["download_count"] = len(instance["selected_downloads"])
1223
+ instance["selection_count"] = instance["download_count"] or (
1224
+ len(instance["selected_years"]) * len(instance["selected_microdados_types"])
1225
+ )
1226
+ instance["source_count"] = instance["selection_count"]
1227
+
1228
+ return sorted(grouped.values(), key=lambda item: item["instance_name"].lower())
1229
+
1230
+
1231
+ def _load_pnp_instance_rows(instance_key: str, *, include_deleted: bool = False) -> list[dict[str, Any]]:
1232
+ try:
1233
+ return pnp_instance_repository.load_instance_rows(
1234
+ _db_connect,
1235
+ instance_key,
1236
+ include_deleted=include_deleted,
1237
+ )
1238
+ except LookupError as exc:
1239
+ raise HTTPException(status_code=404, detail="PNP instance not found") from exc
1240
+
1241
+
1242
+ def _load_all_pnp_rows(*, include_deleted: bool = False) -> list[dict[str, Any]]:
1243
+ return pnp_instance_repository.load_all_rows(_db_connect, include_deleted=include_deleted)
1244
+
1245
+
1246
+ def _load_pnp_connection(connection_key: str) -> dict[str, Any]:
1247
+ try:
1248
+ row = pnp_instance_repository.load_connection(_db_connect, connection_key)
1249
+ except pnp_instance_repository.PnpConnectionNotFoundError as exc:
1250
+ raise HTTPException(status_code=404, detail="PNP connection not found") from exc
1251
+
1252
+ grouped = _group_pnp_connections([row], include_virtual=False)
1253
+ if grouped:
1254
+ return grouped[0]
1255
+ raise HTTPException(status_code=404, detail="PNP connection not found")
1256
+
1257
+
1258
+ def _connection_health_snapshot() -> dict[str, str]:
1259
+ cached_catalog = _PNP_CATALOG_CACHE.get("value")
1260
+ if isinstance(cached_catalog, dict):
1261
+ page_url = str(cached_catalog.get("page_url") or DEFAULT_PNP_POWERBI_REPORT_URL)
1262
+ return {
1263
+ "validation_status": "validated",
1264
+ "validation_message": "Conector PNP validado a partir do catalogo em cache.",
1265
+ "page_url": page_url,
1266
+ }
1267
+ return {
1268
+ "validation_status": "pending",
1269
+ "validation_message": "A validação online ainda nao foi executada nesta sessão da API.",
1270
+ "page_url": DEFAULT_PNP_POWERBI_REPORT_URL,
1271
+ }
1272
+
1273
+
1274
+ def _enrich_connections_with_health(connections: list[dict[str, Any]]) -> list[dict[str, Any]]:
1275
+ snapshot = _connection_health_snapshot()
1276
+ items: list[dict[str, Any]] = []
1277
+ for connection in connections:
1278
+ items.append(
1279
+ {
1280
+ **connection,
1281
+ "validation_status": snapshot["validation_status"],
1282
+ "validation_message": snapshot["validation_message"],
1283
+ "page_url": connection.get("page_url") or snapshot["page_url"],
1284
+ }
1285
+ )
1286
+ return items
1287
+
1288
+
1289
+ def _load_pnp_instance(instance_key: str) -> dict[str, Any]:
1290
+ grouped = _group_pnp_instances(_load_pnp_instance_rows(instance_key))
1291
+ if not grouped:
1292
+ raise HTTPException(status_code=404, detail="PNP instance not found")
1293
+ return grouped[0]
1294
+
1295
+
1296
+ def _delete_pnp_instance(instance_key: str) -> dict[str, Any]:
1297
+ try:
1298
+ return pnp_instance_repository.delete_instance(_db_connect, instance_key=instance_key)
1299
+ except LookupError as exc:
1300
+ raise HTTPException(status_code=404, detail="PNP instance not found") from exc
1301
+
1302
+
1303
+ def _delete_pnp_connection(connection_key: str) -> dict[str, Any]:
1304
+ try:
1305
+ return pnp_instance_repository.delete_connection(_db_connect, connection_key=connection_key)
1306
+ except pnp_instance_repository.PnpConnectionNotFoundError as exc:
1307
+ raise HTTPException(status_code=404, detail="PNP connection not found") from exc
1308
+
1309
+
1310
+ def _safe_parse_json_text(value: object) -> dict[str, Any]:
1311
+ if not isinstance(value, str) or not value.strip():
1312
+ return {}
1313
+ try:
1314
+ parsed = json.loads(value)
1315
+ except json.JSONDecodeError:
1316
+ return {}
1317
+ return parsed if isinstance(parsed, dict) else {}
1318
+
1319
+
1320
+ def _describe_pnp_diagnostic(item: dict[str, Any]) -> dict[str, Any]:
1321
+ status = str(item.get("status") or "missing").strip().lower()
1322
+ raw_record_count = _coerce_int(item.get("raw_record_count")) or 0
1323
+ staging_record_count = _coerce_int(item.get("staging_record_count")) or 0
1324
+ curated_record_count = _coerce_int(item.get("curated_record_count")) or 0
1325
+
1326
+ if curated_record_count > 0:
1327
+ return {
1328
+ "operational_status": "curated_ready",
1329
+ "severity": "ready",
1330
+ "message": "A pipeline ja publicou o endpoint na camada curated.",
1331
+ }
1332
+
1333
+ if staging_record_count > 0:
1334
+ return {
1335
+ "operational_status": "staging_ready",
1336
+ "severity": "ready",
1337
+ "message": "O endpoint ja foi deduplicado e materializado em staging.",
1338
+ }
1339
+
1340
+ if status in {"running", "queued"}:
1341
+ return {
1342
+ "operational_status": "running",
1343
+ "severity": "pending",
1344
+ "message": "O endpoint esta em processamento na execucao atual.",
1345
+ }
1346
+
1347
+ if status in {"ok", "success", "cataloged"}:
1348
+ if raw_record_count > 0:
1349
+ return {
1350
+ "operational_status": "raw_loaded",
1351
+ "severity": "ready",
1352
+ "message": "Microdados públicos validados e persistidos em raw.",
1353
+ }
1354
+ return {
1355
+ "operational_status": "validated",
1356
+ "severity": "ready",
1357
+ "message": "Catálogo público resolvido e pronto para ingestão.",
1358
+ }
1359
+
1360
+ if status == "error":
1361
+ return {
1362
+ "operational_status": "error",
1363
+ "severity": "danger",
1364
+ "message": "A leitura dos microdados públicos falhou.",
1365
+ }
1366
+
1367
+ return {
1368
+ "operational_status": "missing",
1369
+ "severity": "pending",
1370
+ "message": "A fonte ainda nao produziu manifesto recente.",
1371
+ }
1372
+
1373
+
1374
+ def _summarize_pnp_diagnostics(diagnostics: list[dict[str, Any]]) -> dict[str, Any]:
1375
+ summary = {
1376
+ "total": len(diagnostics),
1377
+ "ready": 0,
1378
+ "attention": 0,
1379
+ "missing": 0,
1380
+ "raw_loaded": 0,
1381
+ "validated": 0,
1382
+ "last_updated_at": None,
1383
+ }
1384
+
1385
+ latest_timestamp: datetime | None = None
1386
+ for item in diagnostics:
1387
+ operational_status = str(item.get("operational_status") or "missing")
1388
+ if operational_status == "raw_loaded":
1389
+ summary["raw_loaded"] += 1
1390
+ summary["ready"] += 1
1391
+ elif operational_status == "validated":
1392
+ summary["validated"] += 1
1393
+ summary["ready"] += 1
1394
+ elif operational_status == "missing":
1395
+ summary["missing"] += 1
1396
+ else:
1397
+ summary["attention"] += 1
1398
+
1399
+ updated_at = _parse_iso_datetime(item.get("updated_at"))
1400
+ if updated_at and (latest_timestamp is None or updated_at > latest_timestamp):
1401
+ latest_timestamp = updated_at
1402
+ summary["last_updated_at"] = item.get("updated_at")
1403
+
1404
+ return summary
1405
+
1406
+
1407
+ def _load_pnp_instance_diagnostics(instance_key: str) -> list[dict[str, Any]]:
1408
+ with _db_connect() as conn, conn.cursor() as cur:
1409
+ cur.execute(
1410
+ """
1411
+ WITH pipeline_endpoints AS (
1412
+ SELECT
1413
+ pe.instance_key,
1414
+ pe.endpoint_key,
1415
+ et.endpoint_name,
1416
+ et.tipo_microdados
1417
+ FROM raw.pnp_pipeline_endpoints pe
1418
+ JOIN raw.pnp_endpoint_tables et
1419
+ ON et.endpoint_key = pe.endpoint_key
1420
+ WHERE pe.instance_key = %s
1421
+ AND pe.is_active = TRUE
1422
+ AND et.is_active = TRUE
1423
+ ),
1424
+ endpoint_runs AS (
1425
+ SELECT
1426
+ pe.endpoint_key,
1427
+ pe.endpoint_name,
1428
+ pe.tipo_microdados,
1429
+ r.run_id,
1430
+ r.status AS run_status,
1431
+ r.started_at,
1432
+ r.finished_at,
1433
+ d.microdados_url AS source_url,
1434
+ d.status AS download_status,
1435
+ d.error_message AS download_error,
1436
+ d.row_count_raw,
1437
+ COALESCE(d.finished_at, d.started_at, r.finished_at, r.started_at) AS updated_at,
1438
+ CASE pe.endpoint_key
1439
+ WHEN 'matriculas' THEN (SELECT COUNT(*) FROM raw.pnp_matriculas_src src WHERE src.run_id = r.run_id)
1440
+ WHEN 'eficiencia_academica' THEN (SELECT COUNT(*) FROM raw.pnp_eficiencia_academica_src src WHERE src.run_id = r.run_id)
1441
+ WHEN 'servidores' THEN (SELECT COUNT(*) FROM raw.pnp_servidores_src src WHERE src.run_id = r.run_id)
1442
+ WHEN 'financeiro' THEN (SELECT COUNT(*) FROM raw.pnp_financeiro_src src WHERE src.run_id = r.run_id)
1443
+ ELSE 0
1444
+ END AS raw_record_count,
1445
+ CASE pe.endpoint_key
1446
+ WHEN 'matriculas' THEN (SELECT COUNT(*) FROM staging.pnp_matriculas src WHERE src.run_id = r.run_id)
1447
+ WHEN 'eficiencia_academica' THEN (SELECT COUNT(*) FROM staging.pnp_eficiencia_academica src WHERE src.run_id = r.run_id)
1448
+ WHEN 'servidores' THEN (SELECT COUNT(*) FROM staging.pnp_servidores src WHERE src.run_id = r.run_id)
1449
+ WHEN 'financeiro' THEN (SELECT COUNT(*) FROM staging.pnp_financeiro src WHERE src.run_id = r.run_id)
1450
+ ELSE 0
1451
+ END AS staging_record_count,
1452
+ CASE pe.endpoint_key
1453
+ WHEN 'matriculas' THEN (SELECT COUNT(*) FROM curated.vw_pnp_matriculas_perfil src WHERE src.run_id = r.run_id)
1454
+ WHEN 'eficiencia_academica' THEN (SELECT COUNT(*) FROM curated.vw_pnp_eficiencia_situacao src WHERE src.run_id = r.run_id)
1455
+ WHEN 'servidores' THEN (SELECT COUNT(*) FROM curated.vw_pnp_servidores_quadro src WHERE src.run_id = r.run_id)
1456
+ WHEN 'financeiro' THEN (SELECT COUNT(*) FROM curated.vw_pnp_financeiro_execucao src WHERE src.run_id = r.run_id)
1457
+ ELSE 0
1458
+ END AS curated_record_count,
1459
+ (
1460
+ SELECT COUNT(*)
1461
+ FROM raw.pnp_catalog_entries c
1462
+ WHERE c.run_id = r.run_id
1463
+ AND c.tipo_microdados = pe.tipo_microdados
1464
+ ) AS catalog_entry_count,
1465
+ ROW_NUMBER() OVER (
1466
+ PARTITION BY pe.endpoint_key
1467
+ ORDER BY COALESCE(d.finished_at, d.started_at, r.finished_at, r.started_at) DESC, r.run_id DESC
1468
+ ) AS row_num
1469
+ FROM pipeline_endpoints pe
1470
+ JOIN raw.pnp_runs r
1471
+ ON r.instance_key = pe.instance_key
1472
+ LEFT JOIN LATERAL (
1473
+ SELECT
1474
+ microdados_url,
1475
+ status,
1476
+ error_message,
1477
+ row_count_raw,
1478
+ started_at,
1479
+ finished_at
1480
+ FROM raw.pnp_downloads d
1481
+ WHERE d.run_id = r.run_id
1482
+ AND d.tipo_microdados = pe.tipo_microdados
1483
+ ORDER BY COALESCE(d.finished_at, d.started_at) DESC, d.download_id DESC
1484
+ LIMIT 1
1485
+ ) d ON TRUE
1486
+ WHERE d.microdados_url IS NOT NULL
1487
+ OR EXISTS (
1488
+ SELECT 1
1489
+ FROM raw.pnp_catalog_entries c
1490
+ WHERE c.run_id = r.run_id
1491
+ AND c.tipo_microdados = pe.tipo_microdados
1492
+ )
1493
+ )
1494
+ SELECT
1495
+ pe.endpoint_key,
1496
+ pe.endpoint_name,
1497
+ pe.tipo_microdados,
1498
+ er.run_id AS diagnostic_run_id,
1499
+ er.source_url,
1500
+ er.updated_at,
1501
+ er.run_status,
1502
+ er.download_status,
1503
+ er.download_error,
1504
+ er.row_count_raw,
1505
+ er.raw_record_count,
1506
+ er.staging_record_count,
1507
+ er.curated_record_count,
1508
+ er.catalog_entry_count
1509
+ FROM pipeline_endpoints pe
1510
+ LEFT JOIN endpoint_runs er
1511
+ ON er.endpoint_key = pe.endpoint_key
1512
+ AND er.row_num = 1
1513
+ ORDER BY pe.endpoint_key
1514
+ """,
1515
+ (instance_key,),
1516
+ )
1517
+ rows = [dict(row) for row in cur.fetchall()]
1518
+
1519
+ items: list[dict[str, Any]] = []
1520
+ for row in rows:
1521
+ diagnostic = {
1522
+ "endpoint_key": row.get("endpoint_key"),
1523
+ "endpoint_name": row.get("endpoint_name"),
1524
+ "tipo_microdados": row.get("tipo_microdados"),
1525
+ "ingestion_mode": "powerbi_microdados",
1526
+ "source_label": PNP_POWERBI_SOURCE_LABEL,
1527
+ "source_group": PNP_POWERBI_GROUP_LABEL,
1528
+ "source_path": "powerbi_microdados",
1529
+ "run_id": row.get("diagnostic_run_id"),
1530
+ "source_url": row.get("source_url"),
1531
+ "updated_at": row.get("updated_at"),
1532
+ "status": row.get("download_status") or ("cataloged" if _coerce_int(row.get("catalog_entry_count")) else "missing"),
1533
+ "row_count": row.get("row_count_raw") or row.get("raw_record_count"),
1534
+ "selected_years": [],
1535
+ "selected_microdados_types": [row.get("tipo_microdados")] if row.get("tipo_microdados") else [],
1536
+ "downloads": [],
1537
+ "raw_run_id": row.get("diagnostic_run_id"),
1538
+ "raw_record_count": row.get("raw_record_count"),
1539
+ "staging_record_count": row.get("staging_record_count"),
1540
+ "curated_record_count": row.get("curated_record_count"),
1541
+ "raw_updated_at": row.get("updated_at"),
1542
+ "error": row.get("download_error") if row.get("run_status") != "success" else None,
1543
+ }
1544
+ diagnostic.update(_describe_pnp_diagnostic(diagnostic))
1545
+ items.append(diagnostic)
1546
+
1547
+ return items
1548
+
1549
+
1550
+ def _build_pnp_runtime_event_message(task_id: str, status: str, details: dict[str, Any], error_message: str | None) -> str:
1551
+ if error_message:
1552
+ return error_message
1553
+ if details.get("error"):
1554
+ return str(details["error"])
1555
+ task_meta = PNP_RUNTIME_TASK_META.get(task_id, {})
1556
+ if task_meta.get("message"):
1557
+ return str(task_meta["message"])
1558
+ return str(status or task_id or "unknown").replace("_", " ")
1559
+
1560
+
1561
+ def _load_pnp_instance_run_events(instance_key: str, limit: int = 12) -> list[dict[str, Any]]:
1562
+ with _db_connect() as conn, conn.cursor() as cur:
1563
+ cur.execute(
1564
+ """
1565
+ SELECT
1566
+ steps.run_id,
1567
+ steps.airflow_task_id,
1568
+ steps.status,
1569
+ steps.started_at,
1570
+ steps.finished_at,
1571
+ steps.records_affected,
1572
+ steps.error_message,
1573
+ steps.details_json
1574
+ FROM raw.pnp_run_steps steps
1575
+ JOIN raw.pnp_runs runs
1576
+ ON runs.run_id = steps.run_id
1577
+ WHERE runs.instance_key = %s
1578
+ AND steps.airflow_task_id <> 'register_run'
1579
+ ORDER BY COALESCE(steps.finished_at, steps.started_at) DESC NULLS LAST, steps.step_id DESC
1580
+ LIMIT %s
1581
+ """,
1582
+ (instance_key, limit),
1583
+ )
1584
+ rows = [dict(row) for row in cur.fetchall()]
1585
+
1586
+ items: list[dict[str, Any]] = []
1587
+ for row in rows:
1588
+ status = str(row.get("status") or "").strip()
1589
+ task_id = str(row.get("airflow_task_id") or "")
1590
+ event_meta = PNP_RUNTIME_TASK_META.get(task_id, {})
1591
+ details = dict(row.get("details_json") or {})
1592
+ state = "neutral"
1593
+ if status == "success":
1594
+ state = "success"
1595
+ elif status in {"failed", "upstream_failed"}:
1596
+ state = "failed"
1597
+ elif status in {"running", "queued"}:
1598
+ state = "pending"
1599
+ items.append(
1600
+ {
1601
+ "run_id": row.get("run_id"),
1602
+ "status": status,
1603
+ "stage": event_meta.get("stage", task_id or "unknown"),
1604
+ "stage_label": event_meta.get("stage_label", str(task_id or status).replace("_", " ")),
1605
+ "state": state,
1606
+ "message": _build_pnp_runtime_event_message(task_id, status, details, row.get("error_message")),
1607
+ "timestamp": row.get("finished_at") or row.get("started_at"),
1608
+ "started_at": row.get("started_at"),
1609
+ "finished_at": row.get("finished_at"),
1610
+ "extracted_count": row.get("records_affected") if task_id == "extract_raw" else None,
1611
+ "loaded_count": row.get("records_affected"),
1612
+ "endpoint_count": _coerce_int(details.get("endpoint_count")),
1613
+ "asset_count": _coerce_int(details.get("asset_count")),
1614
+ "raw_count": _coerce_int(details.get("raw_count")) or _coerce_int(details.get("loaded_count")),
1615
+ "download_count": _coerce_int(details.get("download_count")) or _coerce_int(details.get("selected_download_count")),
1616
+ "error": row.get("error_message") or details.get("error"),
1617
+ }
1618
+ )
1619
+
1620
+ return items
1621
+
1622
+
1623
+ def _build_pnp_ingestion_summary(run_events: list[dict[str, Any]]) -> dict[str, Any]:
1624
+ if not run_events:
1625
+ return {
1626
+ "status": "not_started",
1627
+ "message": "A instância ainda não gerou eventos recentes de extração ou validação.",
1628
+ "last_event_at": None,
1629
+ "latest_success_at": None,
1630
+ "latest_success_stage": None,
1631
+ "stages": {},
1632
+ }
1633
+
1634
+ latest_by_stage: dict[str, dict[str, Any]] = {}
1635
+ latest_success: dict[str, Any] | None = None
1636
+ latest_issue: dict[str, Any] | None = None
1637
+ latest_event = run_events[0]
1638
+
1639
+ for item in run_events:
1640
+ stage = str(item.get("stage") or "unknown")
1641
+ latest_by_stage.setdefault(stage, item)
1642
+ if latest_success is None and item.get("state") == "success":
1643
+ latest_success = item
1644
+ if latest_issue is None and item.get("state") == "failed":
1645
+ latest_issue = item
1646
+
1647
+ curated_event = latest_by_stage.get("build_curated_views")
1648
+ staging_event = latest_by_stage.get("materialize_staging")
1649
+ raw_event = latest_by_stage.get("extract_raw")
1650
+
1651
+ if latest_event.get("state") == "pending":
1652
+ status = "running"
1653
+ message = "A instância tem uma execucao ativa no momento."
1654
+ elif curated_event and curated_event.get("state") == "success":
1655
+ status = "curated_ready"
1656
+ message = "A instância já publicou dados para consumo em curated."
1657
+ elif staging_event and staging_event.get("state") == "success":
1658
+ status = "staging_ready"
1659
+ message = "A instância já deduplicou e materializou dados em staging."
1660
+ elif raw_event and raw_event.get("state") == "success":
1661
+ status = "raw_loaded"
1662
+ message = "A instância já carregou microdados na camada raw."
1663
+ elif latest_issue:
1664
+ status = "failed"
1665
+ message = str(latest_issue.get("message") or "Há uma falha operacional recente na instância.")
1666
+ else:
1667
+ status = "pending"
1668
+ message = "A instância tem atividade recente, mas ainda sem materialização consolidada."
1669
+
1670
+ return {
1671
+ "status": status,
1672
+ "message": message,
1673
+ "last_event_at": run_events[0].get("timestamp"),
1674
+ "latest_success_at": latest_success.get("timestamp") if latest_success else None,
1675
+ "latest_success_stage": latest_success.get("stage") if latest_success else None,
1676
+ "stages": latest_by_stage,
1677
+ }
1678
+
1679
+
1680
+ def _load_pnp_instance_integrations(instance_key: str, limit: int = 10) -> list[dict[str, Any]]:
1681
+ with _db_connect() as conn, conn.cursor() as cur:
1682
+ cur.execute(
1683
+ """
1684
+ WITH endpoint_counts AS (
1685
+ SELECT
1686
+ instance_key,
1687
+ COUNT(*) AS endpoint_count
1688
+ FROM raw.pnp_pipeline_endpoints
1689
+ WHERE instance_key = %s
1690
+ AND is_active = TRUE
1691
+ GROUP BY instance_key
1692
+ ),
1693
+ download_counts AS (
1694
+ SELECT
1695
+ run_id,
1696
+ COUNT(*) AS asset_count
1697
+ FROM raw.pnp_downloads
1698
+ GROUP BY run_id
1699
+ ),
1700
+ package_counts AS (
1701
+ SELECT
1702
+ run_id,
1703
+ COUNT(*) AS package_count
1704
+ FROM raw.pnp_run_packages
1705
+ GROUP BY run_id
1706
+ )
1707
+ SELECT
1708
+ runs.run_id,
1709
+ CASE
1710
+ WHEN COALESCE(runs.run_summary_json->>'operation', 'sync') = 'validate' THEN 'source_validation'
1711
+ ELSE 'pipeline_sync'
1712
+ END AS integration_type,
1713
+ runs.started_at,
1714
+ runs.finished_at,
1715
+ COALESCE(download_counts.asset_count, 0) AS asset_count,
1716
+ COALESCE(endpoint_counts.endpoint_count, 0) AS endpoint_count,
1717
+ runs.raw_record_count AS record_count,
1718
+ COALESCE(staging.deduplicated_record_count, 0) AS staging_record_count,
1719
+ COALESCE(package_counts.package_count, 0) AS package_count,
1720
+ runs.status
1721
+ FROM raw.pnp_runs runs
1722
+ LEFT JOIN endpoint_counts
1723
+ ON endpoint_counts.instance_key = runs.instance_key
1724
+ LEFT JOIN download_counts
1725
+ ON download_counts.run_id = runs.run_id
1726
+ LEFT JOIN package_counts
1727
+ ON package_counts.run_id = runs.run_id
1728
+ LEFT JOIN staging.pnp_ingestion_runs staging
1729
+ ON staging.run_id = runs.run_id
1730
+ WHERE runs.instance_key = %s
1731
+ ORDER BY COALESCE(runs.finished_at, runs.started_at) DESC NULLS LAST
1732
+ LIMIT %s
1733
+ """,
1734
+ (instance_key, instance_key, limit),
1735
+ )
1736
+ return [dict(row) for row in cur.fetchall()]
1737
+
1738
+
1739
+ def _persist_pnp_instance_settings(
1740
+ instance_key: str,
1741
+ *,
1742
+ schedule: str | None = None,
1743
+ is_active: bool | None = None,
1744
+ ) -> dict[str, Any]:
1745
+ try:
1746
+ pnp_instance_repository.update_instance_settings(
1747
+ _db_connect,
1748
+ instance_key=instance_key,
1749
+ schedule=schedule,
1750
+ is_active=is_active,
1751
+ )
1752
+ except LookupError as exc:
1753
+ raise HTTPException(status_code=404, detail="PNP instance not found") from exc
1754
+ return _load_pnp_instance(instance_key)
1755
+
1756
+
1757
+ def _airflow_request(method: str, path: str, payload: dict[str, Any] | None = None) -> dict[str, Any]:
1758
+ if not settings.airflow_api_url:
1759
+ raise HTTPException(status_code=500, detail="AIRFLOW_API_URL not configured")
1760
+
1761
+ target_url = f"{settings.airflow_api_url.rstrip('/')}{path}"
1762
+ try:
1763
+ with httpx.Client(
1764
+ timeout=max(settings.nilo_timeout_seconds, 30.0),
1765
+ follow_redirects=True,
1766
+ auth=(settings.airflow_admin_user, settings.airflow_admin_password),
1767
+ ) as client:
1768
+ response = client.request(method, target_url, json=payload)
1769
+ except Exception as exc:
1770
+ raise HTTPException(status_code=502, detail=f"Airflow unavailable: {exc}") from exc
1771
+
1772
+ if response.status_code >= 400:
1773
+ try:
1774
+ detail = response.json()
1775
+ except ValueError:
1776
+ detail = response.text
1777
+ raise HTTPException(status_code=response.status_code, detail=detail)
1778
+
1779
+ if not response.content:
1780
+ return {}
1781
+ return response.json()
1782
+
1783
+
1784
+ def _build_airflow_run_id(dag_id: str, instance_key: str, operation: str | None = None) -> str:
1785
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
1786
+ suffix = operation.strip().lower() if isinstance(operation, str) and operation.strip() else "run"
1787
+ return f"{dag_id}__{suffix}__{timestamp}"
1788
+
1789
+
1790
+ def _build_pnp_instance_dag_id(instance: dict[str, Any]) -> str:
1791
+ request_params = dict(instance.get("request_params") or {})
1792
+ pipeline_id = str(instance.get("pipeline_id") or request_params.get("pipeline_id") or "").strip() or None
1793
+ return pnp_dag_provisioner.build_pipeline_dag_id(
1794
+ str(instance["instance_key"]),
1795
+ pipeline_id,
1796
+ )
1797
+
1798
+
1799
+ def _wait_for_airflow_dag(
1800
+ dag_id: str,
1801
+ *,
1802
+ timeout_seconds: float = 90.0,
1803
+ poll_interval_seconds: float = 1.0,
1804
+ ) -> None:
1805
+ if not settings.airflow_api_url:
1806
+ raise HTTPException(status_code=500, detail="AIRFLOW_API_URL not configured")
1807
+
1808
+ target_url = f"{settings.airflow_api_url.rstrip('/')}/api/v1/dags/{dag_id}"
1809
+ deadline = monotonic() + max(timeout_seconds, poll_interval_seconds)
1810
+ last_error: str | None = None
1811
+
1812
+ while monotonic() < deadline:
1813
+ try:
1814
+ with httpx.Client(
1815
+ timeout=max(settings.nilo_timeout_seconds, 30.0),
1816
+ follow_redirects=True,
1817
+ auth=(settings.airflow_admin_user, settings.airflow_admin_password),
1818
+ ) as client:
1819
+ response = client.get(target_url)
1820
+ except Exception as exc:
1821
+ last_error = f"Airflow unavailable: {exc}"
1822
+ sleep(poll_interval_seconds)
1823
+ continue
1824
+
1825
+ if response.status_code == 200:
1826
+ return
1827
+
1828
+ if response.status_code == 404:
1829
+ last_error = f"DAG {dag_id} ainda nao foi registrada no Airflow."
1830
+ sleep(poll_interval_seconds)
1831
+ continue
1832
+
1833
+ try:
1834
+ detail = response.json()
1835
+ except ValueError:
1836
+ detail = response.text
1837
+ raise HTTPException(status_code=response.status_code, detail=detail)
1838
+
1839
+ raise HTTPException(
1840
+ status_code=502,
1841
+ detail=last_error or f"Airflow nao registrou a DAG {dag_id} dentro do prazo esperado.",
1842
+ )
1843
+
1844
+
1845
+ def _trigger_pnp_airflow_dag(dag_id: str, instance_key: str, *, operation: str) -> dict[str, Any]:
1846
+ _load_pnp_instance(instance_key)
1847
+ dag_run = _airflow_request(
1848
+ "POST",
1849
+ f"/api/v1/dags/{dag_id}/dagRuns",
1850
+ {
1851
+ "dag_run_id": _build_airflow_run_id(dag_id, instance_key, operation),
1852
+ "conf": {
1853
+ "instance_key": instance_key,
1854
+ "operation": operation,
1855
+ "requested_by": f"api.{operation}",
1856
+ },
1857
+ },
1858
+ )
1859
+ return {
1860
+ "dag_id": dag_id,
1861
+ "instance_key": instance_key,
1862
+ "dag_run": dag_run,
1863
+ }
1864
+
1865
+
1866
+ def _load_pnp_instance_dag_runs(instance_key: str, limit: int = 10) -> list[dict[str, Any]]:
1867
+ instance = _load_pnp_instance(instance_key)
1868
+ items: list[dict[str, Any]] = []
1869
+ request_limit = max(limit * 4, 20)
1870
+ dag_id = _build_pnp_instance_dag_id(instance)
1871
+ response = _airflow_request("GET", f"/api/v1/dags/{dag_id}/dagRuns?limit={request_limit}")
1872
+ for row in response.get("dag_runs") or []:
1873
+ conf = row.get("conf") or {}
1874
+ dag_run_id = str(row.get("dag_run_id") or "")
1875
+ if conf.get("instance_key") != instance_key and f"__{instance_key}__" not in dag_run_id:
1876
+ continue
1877
+ items.append(
1878
+ {
1879
+ "dag_id": dag_id,
1880
+ "dag_run_id": dag_run_id,
1881
+ "state": row.get("state"),
1882
+ "run_type": row.get("run_type"),
1883
+ "logical_date": row.get("logical_date"),
1884
+ "queued_at": row.get("queued_at"),
1885
+ "start_date": row.get("start_date"),
1886
+ "end_date": row.get("end_date"),
1887
+ "note": row.get("note"),
1888
+ "conf": conf,
1889
+ }
1890
+ )
1891
+
1892
+ items.sort(
1893
+ key=lambda item: item.get("end_date") or item.get("start_date") or item.get("queued_at") or item.get("logical_date") or "",
1894
+ reverse=True,
1895
+ )
1896
+ return items[:limit]
1897
+
1898
+
1899
+ @app.get("/api/health/live")
1900
+ def live() -> dict[str, str]:
1901
+ return {"status": "ok"}
1902
+
1903
+
1904
+ @app.get("/api/health/ready")
1905
+ def ready() -> dict[str, str]:
1906
+ return {"status": "ready"}
1907
+
1908
+
1909
+ @app.post("/api/admin/login")
1910
+ def admin_login(payload: AdminLoginRequest) -> dict[str, Any]:
1911
+ return _request_keycloak_token(
1912
+ {
1913
+ "grant_type": "password",
1914
+ "username": payload.username.strip(),
1915
+ "password": payload.password,
1916
+ }
1917
+ )
1918
+
1919
+
1920
+ @app.post("/api/admin/refresh")
1921
+ def admin_refresh(payload: AdminRefreshRequest) -> dict[str, Any]:
1922
+ return _request_keycloak_token(
1923
+ {
1924
+ "grant_type": "refresh_token",
1925
+ "refresh_token": payload.refresh_token,
1926
+ }
1927
+ )
1928
+
1929
+
1930
+ @app.get("/api/admin/whoami")
1931
+ def whoami(payload: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
1932
+ return {"claims": payload}
1933
+
1934
+
1935
+ @app.get("/api/admin/settings/llm")
1936
+ def get_admin_llm_settings(payload: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
1937
+ config = _effective_vanna_llm_settings_for_user(payload)
1938
+ return {
1939
+ "config": _serialize_vanna_llm_settings_public(config),
1940
+ "status": _vanna_provider_status(config),
1941
+ }
1942
+
1943
+
1944
+ @app.get("/api/admin/settings/llm/status")
1945
+ def get_admin_llm_settings_status(payload: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
1946
+ return _vanna_provider_status(_effective_vanna_llm_settings_for_user(payload))
1947
+
1948
+
1949
+ @app.patch("/api/admin/settings/llm")
1950
+ def update_admin_llm_settings(
1951
+ payload: AdminLlmSettingsUpdateRequest,
1952
+ admin_payload: dict[str, object] = Depends(_require_admin),
1953
+ ) -> dict[str, object]:
1954
+ config = _persist_vanna_llm_settings(payload, admin_payload)
1955
+ return {
1956
+ "config": _serialize_vanna_llm_settings_public(config),
1957
+ "status": _vanna_provider_status(config),
1958
+ }
1959
+
1960
+
1961
+ @app.get("/api/admin/users")
1962
+ def list_admin_users(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
1963
+ return {"items": _list_admin_users_with_metabase_state()}
1964
+
1965
+
1966
+ @app.post("/api/admin/users")
1967
+ def create_admin_user(
1968
+ payload: AdminUserCreateRequest,
1969
+ _: dict[str, object] = Depends(_require_admin),
1970
+ ) -> dict[str, object]:
1971
+ keycloak_client = _keycloak_admin_client()
1972
+ metabase_client = _metabase_admin_client()
1973
+ username = payload.username.strip()
1974
+ email = payload.email.strip()
1975
+ first_name = payload.first_name.strip()
1976
+ last_name = payload.last_name.strip()
1977
+
1978
+ created = keycloak_client.create_admin_user(
1979
+ username=username,
1980
+ email=email,
1981
+ password=payload.password,
1982
+ first_name=first_name,
1983
+ last_name=last_name,
1984
+ enabled=payload.enabled,
1985
+ )
1986
+ try:
1987
+ metabase_user = metabase_client.create_admin_user(
1988
+ email=email,
1989
+ password=payload.password,
1990
+ first_name=first_name,
1991
+ last_name=last_name,
1992
+ )
1993
+ except Exception as exc:
1994
+ rollback_error: str | None = None
1995
+ try:
1996
+ keycloak_client.delete_user(str(created["id"]))
1997
+ except Exception as rollback_exc: # pragma: no cover - defensive rollback
1998
+ rollback_error = str(getattr(rollback_exc, "detail", rollback_exc))
1999
+ detail = str(getattr(exc, "detail", exc))
2000
+ if rollback_error:
2001
+ detail = f"{detail}. Keycloak rollback failed: {rollback_error}"
2002
+ raise HTTPException(status_code=502, detail=detail) from exc
2003
+
2004
+ return {
2005
+ "user": {
2006
+ **created,
2007
+ "metabase_synced": True,
2008
+ "metabase_user_id": metabase_user.get("id"),
2009
+ }
2010
+ }
2011
+
2012
+
2013
+ @app.post("/api/admin/users/{user_id}/metabase-sync")
2014
+ def sync_admin_user_metabase(
2015
+ user_id: str,
2016
+ req: AdminUserMetabaseSyncRequest,
2017
+ _: dict[str, object] = Depends(_require_admin),
2018
+ ) -> dict[str, object]:
2019
+ keycloak_client = _keycloak_admin_client()
2020
+ metabase_client = _metabase_admin_client()
2021
+ target = keycloak_client.get_admin_user(user_id)
2022
+ if not target:
2023
+ raise HTTPException(status_code=404, detail="Admin user not found in Keycloak")
2024
+
2025
+ email = str(target.get("email") or "").strip()
2026
+ if not email:
2027
+ raise HTTPException(status_code=409, detail="Admin user has no email for Metabase sync")
2028
+
2029
+ existing = metabase_client.find_user_by_email(email)
2030
+ if existing:
2031
+ return {
2032
+ "user": {
2033
+ **target,
2034
+ "metabase_synced": True,
2035
+ "metabase_user_id": existing.get("id"),
2036
+ },
2037
+ "created": False,
2038
+ }
2039
+
2040
+ metabase_user = metabase_client.create_admin_user(
2041
+ email=email,
2042
+ password=req.password,
2043
+ first_name=str(target.get("first_name") or ""),
2044
+ last_name=str(target.get("last_name") or ""),
2045
+ )
2046
+ return {
2047
+ "user": {
2048
+ **target,
2049
+ "metabase_synced": True,
2050
+ "metabase_user_id": metabase_user.get("id"),
2051
+ },
2052
+ "created": True,
2053
+ }
2054
+
2055
+
2056
+ @app.delete("/api/admin/users/{user_id}")
2057
+ def delete_admin_user(
2058
+ user_id: str,
2059
+ payload: dict[str, object] = Depends(_require_admin),
2060
+ ) -> dict[str, object]:
2061
+ if str(payload.get("sub") or "") == user_id:
2062
+ raise HTTPException(status_code=409, detail="The current admin user cannot delete itself")
2063
+ keycloak_client = _keycloak_admin_client()
2064
+ metabase_client = _metabase_admin_client()
2065
+ target = keycloak_client.get_admin_user(user_id)
2066
+ if not target:
2067
+ raise HTTPException(status_code=404, detail="Admin user not found in Keycloak")
2068
+
2069
+ email = str(target.get("email") or "").strip()
2070
+ metabase_user = metabase_client.find_user_by_email(email) if email else None
2071
+ if metabase_user and metabase_user.get("id") is not None:
2072
+ metabase_client.delete_user(metabase_user["id"])
2073
+
2074
+ keycloak_client.delete_user(user_id)
2075
+ return {
2076
+ "deleted": True,
2077
+ "user_id": user_id,
2078
+ "email": email,
2079
+ "metabase_deleted": bool(metabase_user),
2080
+ }
2081
+
2082
+
2083
+ @app.get("/api/admin/connector-definitions/pnp")
2084
+ def get_pnp_connector_definition(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2085
+ catalog = _load_pnp_powerbi_catalog_or_502()
2086
+ return {
2087
+ "connector_id": "pnp",
2088
+ "internal_connector_id": PNP_INTERNAL_CONNECTOR_ID,
2089
+ "label": "Programa Nilo Pecanha",
2090
+ "ingestion_mode": "powerbi_microdados",
2091
+ "powerbi_report_url": catalog["page_url"],
2092
+ "selection_catalog": {
2093
+ "available_years": catalog["available_years"],
2094
+ "available_microdados_types": catalog["available_microdados_types"],
2095
+ "types_by_year": catalog["types_by_year"],
2096
+ "items": catalog["items"],
2097
+ },
2098
+ "sources": [],
2099
+ }
2100
+
2101
+
2102
+ @app.get("/api/admin/connections/pnp")
2103
+ def list_pnp_connections(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2104
+ rows = _load_all_pnp_rows()
2105
+ return {"items": _enrich_connections_with_health(_group_pnp_connections(rows))}
2106
+
2107
+
2108
+ @app.get("/api/admin/connections/pnp/{connection_key}")
2109
+ def get_pnp_connection(connection_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2110
+ connection = _enrich_connections_with_health([_load_pnp_connection(connection_key)])[0]
2111
+ pipelines = [item for item in _group_pnp_instances(_load_all_pnp_rows()) if item.get("connection_key") == connection_key]
2112
+ return {
2113
+ "connection": connection,
2114
+ "pipelines": pipelines,
2115
+ }
2116
+
2117
+
2118
+ @app.post("/api/admin/connections/pnp")
2119
+ def create_pnp_connection(
2120
+ payload: PnpConnectionCreateRequest,
2121
+ _: dict[str, object] = Depends(_require_admin),
2122
+ ) -> dict[str, object]:
2123
+ catalog = _load_pnp_powerbi_catalog_or_502()
2124
+ connection_key = _build_pnp_connection_key(payload.connection_name)
2125
+
2126
+ try:
2127
+ pnp_instance_repository.create_connection(
2128
+ _db_connect,
2129
+ connection_key=connection_key,
2130
+ connection_name=payload.connection_name.strip(),
2131
+ page_url=str(catalog.get("page_url") or DEFAULT_PNP_POWERBI_REPORT_URL),
2132
+ is_active=payload.is_active,
2133
+ )
2134
+ except psycopg2.Error as exc:
2135
+ if exc.pgcode == "23505":
2136
+ raise HTTPException(status_code=409, detail=f"PNP connection already exists for key: {connection_key}") from exc
2137
+ raise
2138
+
2139
+ return _enrich_connections_with_health([_load_pnp_connection(connection_key)])[0]
2140
+
2141
+
2142
+ @app.delete("/api/admin/connections/pnp/{connection_key}")
2143
+ def delete_pnp_connection(
2144
+ connection_key: str,
2145
+ _: dict[str, object] = Depends(_require_admin),
2146
+ ) -> dict[str, Any]:
2147
+ return _delete_pnp_connection(connection_key)
2148
+
2149
+
2150
+ @app.get("/api/admin/pipelines/pnp")
2151
+ def list_pnp_pipelines(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2152
+ return {"items": _group_pnp_instances(_load_all_pnp_rows())}
2153
+
2154
+
2155
+ @app.get("/api/admin/connectors/pnp/instances")
2156
+ def list_pnp_instances(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2157
+ return list_pnp_pipelines(_)
2158
+
2159
+
2160
+ @app.get("/api/admin/connectors/pnp/instances/{instance_key}")
2161
+ def get_pnp_instance(instance_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2162
+ return _load_pnp_instance(instance_key)
2163
+
2164
+
2165
+ @app.get("/api/admin/pipelines/pnp/{instance_key}")
2166
+ def get_pnp_pipeline(instance_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2167
+ return _load_pnp_instance(instance_key)
2168
+
2169
+
2170
+ @app.get("/api/admin/connectors/pnp/instances/{instance_key}/admin-overview")
2171
+ def get_pnp_instance_admin_overview(
2172
+ instance_key: str,
2173
+ _: dict[str, object] = Depends(_require_admin),
2174
+ ) -> dict[str, object]:
2175
+ instance = _load_pnp_instance(instance_key)
2176
+ diagnostics = _load_pnp_instance_diagnostics(instance_key)
2177
+ run_events = _load_pnp_instance_run_events(instance_key)
2178
+ integrations = _load_pnp_instance_integrations(instance_key)
2179
+ return {
2180
+ "instance": instance,
2181
+ "diagnostics": diagnostics,
2182
+ "diagnostics_summary": _summarize_pnp_diagnostics(diagnostics),
2183
+ "run_events": run_events,
2184
+ "ingestion": _build_pnp_ingestion_summary(run_events),
2185
+ "integrations": integrations,
2186
+ }
2187
+
2188
+
2189
+ @app.get("/api/admin/pipelines/pnp/{instance_key}/admin-overview")
2190
+ def get_pnp_pipeline_admin_overview(
2191
+ instance_key: str,
2192
+ _: dict[str, object] = Depends(_require_admin),
2193
+ ) -> dict[str, object]:
2194
+ return get_pnp_instance_admin_overview(instance_key, _)
2195
+
2196
+
2197
+ @app.get("/api/admin/connectors/pnp/instances/{instance_key}/dag-runs")
2198
+ def list_pnp_instance_dag_runs(instance_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2199
+ return {"items": _load_pnp_instance_dag_runs(instance_key)}
2200
+
2201
+
2202
+ @app.get("/api/admin/pipelines/pnp/{instance_key}/dag-runs")
2203
+ def list_pnp_pipeline_dag_runs(instance_key: str, _: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2204
+ return {"items": _load_pnp_instance_dag_runs(instance_key)}
2205
+
2206
+
2207
+ @app.post("/api/admin/connectors/pnp/instances/{instance_key}/operations/validate-sources")
2208
+ def trigger_pnp_instance_validate_sources(
2209
+ instance_key: str,
2210
+ _: dict[str, object] = Depends(_require_admin),
2211
+ ) -> dict[str, Any]:
2212
+ instance = _load_pnp_instance(instance_key)
2213
+ return _trigger_pnp_airflow_dag(
2214
+ _build_pnp_instance_dag_id(instance),
2215
+ instance_key,
2216
+ operation="validate",
2217
+ )
2218
+
2219
+
2220
+ @app.post("/api/admin/pipelines/pnp/{instance_key}/operations/validate-sources")
2221
+ def trigger_pnp_pipeline_validate_sources(
2222
+ instance_key: str,
2223
+ _: dict[str, object] = Depends(_require_admin),
2224
+ ) -> dict[str, Any]:
2225
+ instance = _load_pnp_instance(instance_key)
2226
+ return _trigger_pnp_airflow_dag(
2227
+ _build_pnp_instance_dag_id(instance),
2228
+ instance_key,
2229
+ operation="validate",
2230
+ )
2231
+
2232
+
2233
+ @app.post("/api/admin/connectors/pnp/instances/{instance_key}/operations/full-sync")
2234
+ def trigger_pnp_instance_full_sync(
2235
+ instance_key: str,
2236
+ _: dict[str, object] = Depends(_require_admin),
2237
+ ) -> dict[str, Any]:
2238
+ instance = _load_pnp_instance(instance_key)
2239
+ return _trigger_pnp_airflow_dag(
2240
+ _build_pnp_instance_dag_id(instance),
2241
+ instance_key,
2242
+ operation="sync",
2243
+ )
2244
+
2245
+
2246
+ @app.post("/api/admin/pipelines/pnp/{instance_key}/operations/full-sync")
2247
+ def trigger_pnp_pipeline_full_sync(
2248
+ instance_key: str,
2249
+ _: dict[str, object] = Depends(_require_admin),
2250
+ ) -> dict[str, Any]:
2251
+ instance = _load_pnp_instance(instance_key)
2252
+ return _trigger_pnp_airflow_dag(
2253
+ _build_pnp_instance_dag_id(instance),
2254
+ instance_key,
2255
+ operation="sync",
2256
+ )
2257
+
2258
+
2259
+ @app.delete("/api/admin/pipelines/pnp/instances/{instance_key}")
2260
+ def delete_pnp_pipeline_instance(
2261
+ instance_key: str,
2262
+ _: dict[str, object] = Depends(_require_admin),
2263
+ ) -> dict[str, Any]:
2264
+ return _delete_pnp_instance(instance_key)
2265
+
2266
+
2267
+ @app.delete("/api/admin/connections/pnp/instances/{instance_key}")
2268
+ def delete_pnp_connection_instance(
2269
+ instance_key: str,
2270
+ _: dict[str, object] = Depends(_require_admin),
2271
+ ) -> dict[str, Any]:
2272
+ return _delete_pnp_instance(instance_key)
2273
+
2274
+
2275
+ @app.post("/api/admin/pipelines/pnp")
2276
+ def create_pnp_pipeline(
2277
+ payload: PnpPipelineCreateRequest,
2278
+ _: dict[str, object] = Depends(_require_admin),
2279
+ ) -> dict[str, object]:
2280
+ catalog = _load_pnp_powerbi_catalog_or_502()
2281
+ _validate_pnp_selection_against_catalog(
2282
+ selected_years=payload.selected_years,
2283
+ selected_microdados_types=payload.selected_microdados_types,
2284
+ catalog=catalog,
2285
+ )
2286
+ selected_downloads = _resolve_pnp_selected_downloads(
2287
+ selected_years=payload.selected_years,
2288
+ selected_microdados_types=payload.selected_microdados_types,
2289
+ catalog=catalog,
2290
+ )
2291
+
2292
+ connection = _load_pnp_connection(payload.connection_key)
2293
+ instance_key = _build_pnp_pipeline_key(payload.pipeline_name)
2294
+ normalized_schedule = _normalize_pipeline_schedule(payload.schedule)
2295
+
2296
+ try:
2297
+ pnp_instance_repository.create_instance(
2298
+ _db_connect,
2299
+ instance_key=instance_key,
2300
+ instance_name=payload.pipeline_name.strip(),
2301
+ connection_key=str(connection["connection_key"]),
2302
+ selected_years=payload.selected_years,
2303
+ selected_microdados_types=payload.selected_microdados_types,
2304
+ selected_downloads=selected_downloads,
2305
+ schedule=normalized_schedule,
2306
+ is_active=payload.is_active,
2307
+ )
2308
+ except pnp_instance_repository.PnpConnectionNotFoundError as exc:
2309
+ raise HTTPException(status_code=404, detail="PNP connection not found") from exc
2310
+ except psycopg2.Error as exc:
2311
+ if exc.pgcode == "23505":
2312
+ raise HTTPException(status_code=409, detail=f"PNP instance already exists for key: {instance_key}") from exc
2313
+ raise
2314
+
2315
+ instance = _load_pnp_instance(instance_key)
2316
+ dag_id = _build_pnp_instance_dag_id(instance)
2317
+ try:
2318
+ _wait_for_airflow_dag(dag_id)
2319
+ except HTTPException:
2320
+ pnp_instance_repository.delete_instance(_db_connect, instance_key=instance_key)
2321
+ raise
2322
+
2323
+ return instance
2324
+
2325
+
2326
+ @app.post("/api/admin/connectors/pnp/instances")
2327
+ def create_pnp_instance(
2328
+ payload: PnpInstanceCreateRequest,
2329
+ _: dict[str, object] = Depends(_require_admin),
2330
+ ) -> dict[str, object]:
2331
+ rows = _load_all_pnp_rows()
2332
+ connections = _group_pnp_connections(rows)
2333
+ if connections:
2334
+ connection_key = connections[0]["connection_key"]
2335
+ else:
2336
+ connection = create_pnp_connection(
2337
+ PnpConnectionCreateRequest(connection_name="PNP Principal", is_active=payload.is_active),
2338
+ _,
2339
+ )
2340
+ connection_key = str(connection["connection_key"])
2341
+
2342
+ return create_pnp_pipeline(
2343
+ PnpPipelineCreateRequest(
2344
+ pipeline_name=payload.instance_name,
2345
+ connection_key=connection_key,
2346
+ selected_years=payload.selected_years,
2347
+ selected_microdados_types=payload.selected_microdados_types,
2348
+ schedule=payload.schedule,
2349
+ is_active=payload.is_active,
2350
+ ),
2351
+ _,
2352
+ )
2353
+
2354
+
2355
+ @app.patch("/api/admin/connectors/pnp/instances/{instance_key}")
2356
+ def update_pnp_instance(
2357
+ instance_key: str,
2358
+ payload: PnpInstanceUpdateRequest,
2359
+ _: dict[str, object] = Depends(_require_admin),
2360
+ ) -> dict[str, object]:
2361
+ _load_pnp_instance(instance_key)
2362
+ normalized_schedule = _normalize_pipeline_schedule(payload.schedule) if payload.schedule is not None else None
2363
+ return _persist_pnp_instance_settings(
2364
+ instance_key,
2365
+ schedule=normalized_schedule,
2366
+ is_active=payload.is_active,
2367
+ )
2368
+
2369
+
2370
+ @app.post("/api/embed/metabase-token")
2371
+ def create_embed_token(req: EmbedRequest) -> dict[str, object]:
2372
+ return _signed_metabase_dashboard_payload(req.dashboard_id, req.params)
2373
+
2374
+
2375
+ @app.get("/api/embed/metabase-default")
2376
+ def get_default_embed_token() -> dict[str, object]:
2377
+ dashboard_id = _read_metabase_default_dashboard_id()
2378
+ return _signed_metabase_dashboard_payload(dashboard_id, {})
2379
+
2380
+
2381
+ @app.post("/api/admin/embed/metabase-default")
2382
+ def set_default_embed_token(
2383
+ req: EmbedRequest,
2384
+ _: dict[str, object] = Depends(_require_admin),
2385
+ ) -> dict[str, object]:
2386
+ _write_metabase_default_dashboard_id(req.dashboard_id)
2387
+ return _signed_metabase_dashboard_payload(req.dashboard_id, req.params)
2388
+
2389
+
2390
+ @app.get("/api/admin/sql/catalog")
2391
+ def get_admin_sql_catalog(_: dict[str, object] = Depends(_require_admin)) -> dict[str, object]:
2392
+ return {"items": _admin_sql_catalog()}
2393
+
2394
+
2395
+ @app.post("/api/admin/sql/query")
2396
+ def run_admin_sql_query(
2397
+ req: AdminSqlQueryRequest,
2398
+ _: dict[str, object] = Depends(_require_admin),
2399
+ ) -> dict[str, object]:
2400
+ statement = _validate_admin_sql(req.sql)
2401
+ bounded_statement = _bounded_admin_sql(statement, req.max_rows)
2402
+
2403
+ try:
2404
+ with _db_connect() as conn, conn.cursor() as cur:
2405
+ cur.execute("BEGIN READ ONLY")
2406
+ cur.execute("SET LOCAL statement_timeout = '15s'")
2407
+ cur.execute(bounded_statement)
2408
+ fields = [{"name": item[0]} for item in (cur.description or [])]
2409
+ rows = list(cur.fetchmany(req.max_rows + 1)) if cur.description else []
2410
+ truncated = len(rows) > req.max_rows
2411
+ if truncated:
2412
+ rows = rows[: req.max_rows]
2413
+ cur.execute("ROLLBACK")
2414
+ except psycopg2.Error as exc:
2415
+ raise HTTPException(status_code=400, detail=str(exc).strip()) from exc
2416
+
2417
+ return {
2418
+ "fields": fields,
2419
+ "rows": rows,
2420
+ "row_count": len(rows),
2421
+ "truncated": truncated,
2422
+ "max_rows": req.max_rows,
2423
+ }
2424
+
2425
+
2426
+ @app.post("/api/vanna/ask")
2427
+ async def ask(
2428
+ req: AskRequest,
2429
+ payload: dict[str, object] | None = Depends(verify_optional_bearer),
2430
+ ) -> dict[str, object]:
2431
+ config = _effective_vanna_llm_settings_for_user(payload) if payload else _effective_global_vanna_llm_settings()
2432
+ return await ask_vanna(settings.vanna_service_url, req.question, _vanna_llm_override_payload(config))