@dataif/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/bin/dataif.js +623 -0
- package/package.json +26 -0
- package/scripts/build-template.mjs +72 -0
- package/templates/dataif/README.md +157 -0
- package/templates/dataif/infra/.env.example +119 -0
- package/templates/dataif/infra/.env.stg.example +119 -0
- package/templates/dataif/infra/airflow/Dockerfile +11 -0
- package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
- package/templates/dataif/infra/airflow/requirements.txt +3 -0
- package/templates/dataif/infra/docker-compose.yml +306 -0
- package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
- package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
- package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
- package/templates/dataif/infra/keycloak/Dockerfile +4 -0
- package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
- package/templates/dataif/infra/ollama/Dockerfile +9 -0
- package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
- package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
- package/templates/dataif/infra/postgres/Dockerfile +4 -0
- package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
- package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
- package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
- package/templates/dataif/scripts/configure-env.sh +149 -0
- package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
- package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
- package/templates/dataif/scripts/deploy.sh +79 -0
- package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
- package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
- package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
- package/templates/dataif/scripts/publish-images.sh +31 -0
- package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
- package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
- package/templates/dataif/services/api/.dockerignore +18 -0
- package/templates/dataif/services/api/Dockerfile +12 -0
- package/templates/dataif/services/api/app/__init__.py +1 -0
- package/templates/dataif/services/api/app/auth.py +48 -0
- package/templates/dataif/services/api/app/config.py +59 -0
- package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
- package/templates/dataif/services/api/app/main.py +2432 -0
- package/templates/dataif/services/api/app/metabase_admin.py +191 -0
- package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
- package/templates/dataif/services/api/app/metabase_embed.py +15 -0
- package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
- package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
- package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
- package/templates/dataif/services/api/app/vanna_client.py +32 -0
- package/templates/dataif/services/api/requirements.txt +9 -0
- package/templates/dataif/services/vanna/.dockerignore +18 -0
- package/templates/dataif/services/vanna/Dockerfile +12 -0
- package/templates/dataif/services/vanna/app/config.py +57 -0
- package/templates/dataif/services/vanna/app/main.py +108 -0
- package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
- package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
- package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
- package/templates/dataif/services/vanna/requirements.txt +8 -0
- package/templates/dataif/services/web/.dockerignore +13 -0
- package/templates/dataif/services/web/Dockerfile +16 -0
- package/templates/dataif/services/web/index.html +12 -0
- package/templates/dataif/services/web/nginx.conf +74 -0
- package/templates/dataif/services/web/package-lock.json +4397 -0
- package/templates/dataif/services/web/package.json +32 -0
- package/templates/dataif/services/web/postcss.config.mjs +5 -0
- package/templates/dataif/services/web/src/App.jsx +2817 -0
- package/templates/dataif/services/web/src/adminAuth.js +245 -0
- package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
- package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
- package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
- package/templates/dataif/services/web/src/assets/if.svg +0 -0
- package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
- package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
- package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
- package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
- package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
- package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
- package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
- package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
- package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
- package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
- package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
- package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
- package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
- package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
- package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
- package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
- package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
- package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
- package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
- package/templates/dataif/services/web/src/main.jsx +14 -0
- package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
- package/templates/dataif/services/web/src/styles/globals.css +60 -0
- package/templates/dataif/services/web/src/styles/theme.css +1326 -0
- package/templates/dataif/services/web/src/styles/typography.css +430 -0
- package/templates/dataif/services/web/src/styles.css +1287 -0
- package/templates/dataif/services/web/src/utils/cx.ts +24 -0
- package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
- package/templates/dataif/services/web/vite.config.js +14 -0
- package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
- package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
- package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
- package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
- package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
- package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
- package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
- package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
- package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
- package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
- package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
- package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
- package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
- package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import json
|
|
5
|
+
import uuid
|
|
6
|
+
from typing import Any
|
|
7
|
+
from urllib.parse import parse_qs, unquote, urlparse
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
DEFAULT_PNP_POWERBI_REPORT_URL = (
|
|
12
|
+
"https://app.powerbi.com/view?"
|
|
13
|
+
"r=eyJrIjoiZDhkNGNiYzgtMjQ0My00OGVlLWJjNzYtZWQwYjI2OThhYWM1IiwidCI6IjllNjgyMzU5LWQxMjgtNGVkYi1iYjU4LTgyYjJhMTUzNDBmZiJ9"
|
|
14
|
+
)
|
|
15
|
+
MICRODADOS_SECTION_DISPLAY_NAME = "Microdados da PNP"
|
|
16
|
+
MICRODADOS_ROWS_QUERY_REF = "Microdados.Ano Base"
|
|
17
|
+
MICRODADOS_COLUMNS_QUERY_REF = "Microdados.Tipo de microdados"
|
|
18
|
+
MICRODADOS_VALUES_QUERY_REF = "Microdados.MicrodadosURL"
|
|
19
|
+
MICRODADOS_ENTITY_NAME = "Microdados"
|
|
20
|
+
MICRODADOS_ANO_PROPERTY = "Ano Base"
|
|
21
|
+
MICRODADOS_TIPO_PROPERTY = "Tipo de microdados"
|
|
22
|
+
MICRODADOS_URL_PROPERTY = "MicrodadosURL"
|
|
23
|
+
PNP_MICRODADOS_TYPES = (
|
|
24
|
+
"Eficiência Acadêmica",
|
|
25
|
+
"Financeiro",
|
|
26
|
+
"Matrículas",
|
|
27
|
+
"Servidores",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def load_public_microdados_catalog(
|
|
32
|
+
*,
|
|
33
|
+
timeout_seconds: float,
|
|
34
|
+
page_url: str = DEFAULT_PNP_POWERBI_REPORT_URL,
|
|
35
|
+
) -> dict[str, Any]:
|
|
36
|
+
with httpx.Client(timeout=timeout_seconds, follow_redirects=True) as client:
|
|
37
|
+
html_response = client.get(page_url)
|
|
38
|
+
html_response.raise_for_status()
|
|
39
|
+
html = html_response.text
|
|
40
|
+
|
|
41
|
+
resource_descriptor = _extract_resource_descriptor(html) or _decode_resource_descriptor_from_url(page_url)
|
|
42
|
+
resource_key = str(resource_descriptor.get("k") or "").strip()
|
|
43
|
+
tenant_id = str(resource_descriptor.get("t") or "").strip()
|
|
44
|
+
if not resource_key or not tenant_id:
|
|
45
|
+
raise RuntimeError("Power BI page did not expose the public resource descriptor")
|
|
46
|
+
|
|
47
|
+
resolved_cluster_uri = _extract_resolved_cluster_uri(html)
|
|
48
|
+
if not resolved_cluster_uri:
|
|
49
|
+
route_response = client.get(
|
|
50
|
+
f"https://api.powerbi.com/public/routing/cluster/{tenant_id}",
|
|
51
|
+
headers=_powerbi_headers(resource_key),
|
|
52
|
+
)
|
|
53
|
+
route_response.raise_for_status()
|
|
54
|
+
resolved_cluster_uri = str((route_response.json() or {}).get("FixedClusterUri") or "").strip()
|
|
55
|
+
if not resolved_cluster_uri:
|
|
56
|
+
raise RuntimeError("Power BI routing did not return FixedClusterUri")
|
|
57
|
+
|
|
58
|
+
api_base_url = _build_apim_url(resolved_cluster_uri)
|
|
59
|
+
metadata_response = client.get(
|
|
60
|
+
f"{api_base_url}/public/reports/{resource_key}/modelsAndExploration?preferReadOnlySession=true",
|
|
61
|
+
headers=_powerbi_headers(resource_key),
|
|
62
|
+
)
|
|
63
|
+
metadata_response.raise_for_status()
|
|
64
|
+
metadata = metadata_response.json()
|
|
65
|
+
visual = _find_microdados_visual(metadata)
|
|
66
|
+
report = dict((metadata.get("exploration") or {}).get("report") or {})
|
|
67
|
+
model = dict(report.get("model") or {})
|
|
68
|
+
model_fallback = dict((metadata.get("models") or [{}])[0] or {})
|
|
69
|
+
|
|
70
|
+
query_body = {
|
|
71
|
+
"version": "1.0.0",
|
|
72
|
+
"queries": [
|
|
73
|
+
{
|
|
74
|
+
"Query": {
|
|
75
|
+
"Commands": [
|
|
76
|
+
{
|
|
77
|
+
"SemanticQueryDataShapeCommand": {
|
|
78
|
+
"Query": dict((visual.get("singleVisual") or {}).get("prototypeQuery") or {}),
|
|
79
|
+
"Binding": {
|
|
80
|
+
"Primary": {"Groupings": [{"Projections": [0, 1, 2]}]},
|
|
81
|
+
"DataReduction": {"DataVolume": 3, "Primary": {"Top": {"Count": 500}}},
|
|
82
|
+
"Version": 1,
|
|
83
|
+
},
|
|
84
|
+
"ExecutionMetricsKind": 1,
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
]
|
|
88
|
+
},
|
|
89
|
+
"ApplicationContext": {
|
|
90
|
+
"DatasetId": str(model.get("dbName") or model_fallback.get("dbName") or ""),
|
|
91
|
+
"Sources": [
|
|
92
|
+
{
|
|
93
|
+
"ReportId": str(report.get("objectId") or ""),
|
|
94
|
+
"VisualId": str(visual.get("name") or ""),
|
|
95
|
+
}
|
|
96
|
+
],
|
|
97
|
+
},
|
|
98
|
+
}
|
|
99
|
+
],
|
|
100
|
+
"modelId": int(report.get("modelId") or model_fallback.get("id") or 0),
|
|
101
|
+
}
|
|
102
|
+
query_response = client.post(
|
|
103
|
+
f"{api_base_url}/public/reports/querydata?synchronous=true",
|
|
104
|
+
headers=_powerbi_headers(resource_key, json_request=True),
|
|
105
|
+
json=query_body,
|
|
106
|
+
)
|
|
107
|
+
query_response.raise_for_status()
|
|
108
|
+
items = _decode_microdados_catalog(query_response.json())
|
|
109
|
+
type_rank = {item: index for index, item in enumerate(PNP_MICRODADOS_TYPES)}
|
|
110
|
+
items.sort(
|
|
111
|
+
key=lambda item: (
|
|
112
|
+
-int(item["ano_base"]) if str(item["ano_base"]).isdigit() else 0,
|
|
113
|
+
type_rank.get(item["tipo_microdados"], 999),
|
|
114
|
+
item["microdados_url"],
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
years = sorted({item["ano_base"] for item in items}, reverse=True)
|
|
118
|
+
by_year: dict[str, list[str]] = {}
|
|
119
|
+
for year in years:
|
|
120
|
+
types = sorted(
|
|
121
|
+
{item["tipo_microdados"] for item in items if item["ano_base"] == year},
|
|
122
|
+
key=lambda item: (PNP_MICRODADOS_TYPES.index(item) if item in PNP_MICRODADOS_TYPES else 999, item),
|
|
123
|
+
)
|
|
124
|
+
by_year[year] = types
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
"page_url": page_url,
|
|
128
|
+
"resource_key": resource_key,
|
|
129
|
+
"available_years": years,
|
|
130
|
+
"available_microdados_types": [item for item in PNP_MICRODADOS_TYPES if any(t == item for t in {row["tipo_microdados"] for row in items})],
|
|
131
|
+
"types_by_year": by_year,
|
|
132
|
+
"items": items,
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _powerbi_headers(resource_key: str, json_request: bool = False) -> dict[str, str]:
|
|
137
|
+
headers = {
|
|
138
|
+
"Accept": "application/json",
|
|
139
|
+
"ActivityId": str(uuid.uuid4()),
|
|
140
|
+
"RequestId": str(uuid.uuid4()),
|
|
141
|
+
"X-PowerBI-ResourceKey": resource_key,
|
|
142
|
+
}
|
|
143
|
+
if json_request:
|
|
144
|
+
headers["Content-Type"] = "application/json"
|
|
145
|
+
return headers
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _extract_resource_descriptor(html: str) -> dict[str, str]:
|
|
149
|
+
marker = "resourceDescriptor = JSON.parse('"
|
|
150
|
+
start = html.find(marker)
|
|
151
|
+
if start < 0:
|
|
152
|
+
return {}
|
|
153
|
+
start += len(marker)
|
|
154
|
+
end = html.find("');", start)
|
|
155
|
+
if end < 0:
|
|
156
|
+
return {}
|
|
157
|
+
payload = bytes(html[start:end], "utf-8").decode("unicode_escape")
|
|
158
|
+
descriptor = json.loads(payload)
|
|
159
|
+
return {
|
|
160
|
+
"k": str(descriptor.get("k") or "").strip(),
|
|
161
|
+
"t": str(descriptor.get("t") or "").strip(),
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _decode_resource_descriptor_from_url(page_url: str) -> dict[str, str]:
|
|
166
|
+
parsed = urlparse(page_url)
|
|
167
|
+
encoded = parse_qs(parsed.query).get("r", [])
|
|
168
|
+
if not encoded:
|
|
169
|
+
return {}
|
|
170
|
+
token = unquote(encoded[0])
|
|
171
|
+
padding = "=" * (-len(token) % 4)
|
|
172
|
+
payload = json.loads(base64.urlsafe_b64decode(token + padding).decode("utf-8"))
|
|
173
|
+
return {
|
|
174
|
+
"k": str(payload.get("k") or "").strip(),
|
|
175
|
+
"t": str(payload.get("t") or "").strip(),
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _extract_resolved_cluster_uri(html: str) -> str:
|
|
180
|
+
marker = "var resolvedClusterUri = '"
|
|
181
|
+
start = html.find(marker)
|
|
182
|
+
if start < 0:
|
|
183
|
+
return ""
|
|
184
|
+
start += len(marker)
|
|
185
|
+
end = html.find("';", start)
|
|
186
|
+
if end < 0:
|
|
187
|
+
return ""
|
|
188
|
+
return html[start:end].strip()
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _build_apim_url(cluster_uri: str) -> str:
|
|
192
|
+
parsed = urlparse(cluster_uri)
|
|
193
|
+
hostname = parsed.hostname or ""
|
|
194
|
+
if not hostname:
|
|
195
|
+
raise RuntimeError("Invalid Power BI cluster uri")
|
|
196
|
+
host_tokens = hostname.split(".")
|
|
197
|
+
host_tokens[0] = host_tokens[0].replace("-redirect", "")
|
|
198
|
+
host_tokens[0] = host_tokens[0].replace("global-", "")
|
|
199
|
+
host_tokens[0] = f"{host_tokens[0]}-api"
|
|
200
|
+
scheme = parsed.scheme or "https"
|
|
201
|
+
return f"{scheme}://{'.'.join(host_tokens)}"
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _find_microdados_visual(metadata: dict[str, Any]) -> dict[str, Any]:
|
|
205
|
+
sections = list(((metadata.get("exploration") or {}).get("sections") or []))
|
|
206
|
+
preferred_section = _find_microdados_section(sections)
|
|
207
|
+
if preferred_section is not None:
|
|
208
|
+
visual = _select_microdados_visual(preferred_section)
|
|
209
|
+
if visual is not None:
|
|
210
|
+
return visual
|
|
211
|
+
|
|
212
|
+
fallback_visual = _build_fallback_visual(preferred_section)
|
|
213
|
+
if fallback_visual is not None:
|
|
214
|
+
return fallback_visual
|
|
215
|
+
|
|
216
|
+
for section in sections:
|
|
217
|
+
visual = _select_microdados_visual(section)
|
|
218
|
+
if visual is not None:
|
|
219
|
+
return visual
|
|
220
|
+
|
|
221
|
+
if preferred_section is None:
|
|
222
|
+
available_sections = ", ".join(
|
|
223
|
+
sorted(
|
|
224
|
+
{
|
|
225
|
+
str(section.get("displayName") or "").strip()
|
|
226
|
+
for section in sections
|
|
227
|
+
if str(section.get("displayName") or "").strip()
|
|
228
|
+
}
|
|
229
|
+
)
|
|
230
|
+
)
|
|
231
|
+
raise RuntimeError(
|
|
232
|
+
"Power BI metadata did not expose the Microdados da PNP section"
|
|
233
|
+
+ (f"; available sections: {available_sections}" if available_sections else "")
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
raise RuntimeError(
|
|
237
|
+
"Power BI metadata exposed the Microdados da PNP section, "
|
|
238
|
+
"but no compatible visual or fallback query context was found"
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _find_microdados_section(sections: list[dict[str, Any]]) -> dict[str, Any] | None:
|
|
243
|
+
for section in sections:
|
|
244
|
+
if str(section.get("displayName") or "").strip() == MICRODADOS_SECTION_DISPLAY_NAME:
|
|
245
|
+
return section
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _select_microdados_visual(section: dict[str, Any]) -> dict[str, Any] | None:
|
|
250
|
+
for container in section.get("visualContainers") or []:
|
|
251
|
+
raw_config = container.get("config")
|
|
252
|
+
if not isinstance(raw_config, str) or not raw_config.strip():
|
|
253
|
+
continue
|
|
254
|
+
try:
|
|
255
|
+
visual = json.loads(raw_config)
|
|
256
|
+
except json.JSONDecodeError:
|
|
257
|
+
continue
|
|
258
|
+
|
|
259
|
+
single_visual = dict(visual.get("singleVisual") or {})
|
|
260
|
+
if not single_visual:
|
|
261
|
+
continue
|
|
262
|
+
|
|
263
|
+
projections = dict(single_visual.get("projections") or {})
|
|
264
|
+
if _visual_matches_microdados_catalog(single_visual, projections):
|
|
265
|
+
return visual
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _visual_matches_microdados_catalog(single_visual: dict[str, Any], projections: dict[str, Any]) -> bool:
|
|
270
|
+
row_refs = [item.get("queryRef") for item in projections.get("Rows") or [] if item.get("active", True)]
|
|
271
|
+
column_refs = [item.get("queryRef") for item in projections.get("Columns") or [] if item.get("active", True)]
|
|
272
|
+
value_refs = [
|
|
273
|
+
item.get("queryRef")
|
|
274
|
+
for item in projections.get("Values") or []
|
|
275
|
+
if item.get("active", True) or "active" not in item
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
exact_projection_match = (
|
|
279
|
+
single_visual.get("visualType") == "pivotTable"
|
|
280
|
+
and row_refs == [MICRODADOS_ROWS_QUERY_REF]
|
|
281
|
+
and column_refs == [MICRODADOS_COLUMNS_QUERY_REF]
|
|
282
|
+
and value_refs == [MICRODADOS_VALUES_QUERY_REF]
|
|
283
|
+
)
|
|
284
|
+
if exact_projection_match:
|
|
285
|
+
return True
|
|
286
|
+
|
|
287
|
+
prototype_query = dict(single_visual.get("prototypeQuery") or {})
|
|
288
|
+
select_names = {
|
|
289
|
+
str(item.get("Name") or "").strip()
|
|
290
|
+
for item in prototype_query.get("Select") or []
|
|
291
|
+
if isinstance(item, dict)
|
|
292
|
+
}
|
|
293
|
+
return {
|
|
294
|
+
MICRODADOS_ROWS_QUERY_REF,
|
|
295
|
+
MICRODADOS_COLUMNS_QUERY_REF,
|
|
296
|
+
MICRODADOS_VALUES_QUERY_REF,
|
|
297
|
+
}.issubset(select_names)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _build_fallback_visual(section: dict[str, Any]) -> dict[str, Any] | None:
|
|
301
|
+
visual_containers = list(section.get("visualContainers") or [])
|
|
302
|
+
if not visual_containers:
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
fallback_name = str(visual_containers[0].get("objectName") or visual_containers[0].get("id") or "microdados_catalog")
|
|
306
|
+
return {
|
|
307
|
+
"name": fallback_name,
|
|
308
|
+
"singleVisual": {
|
|
309
|
+
"visualType": "microdados_catalog_fallback",
|
|
310
|
+
"prototypeQuery": _build_fallback_prototype_query(),
|
|
311
|
+
},
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _build_fallback_prototype_query() -> dict[str, Any]:
|
|
316
|
+
return {
|
|
317
|
+
"Version": 2,
|
|
318
|
+
"From": [{"Name": "m", "Entity": MICRODADOS_ENTITY_NAME, "Type": 0}],
|
|
319
|
+
"Select": [
|
|
320
|
+
{
|
|
321
|
+
"Measure": {
|
|
322
|
+
"Expression": {"SourceRef": {"Source": "m"}},
|
|
323
|
+
"Property": MICRODADOS_URL_PROPERTY,
|
|
324
|
+
},
|
|
325
|
+
"Name": MICRODADOS_VALUES_QUERY_REF,
|
|
326
|
+
},
|
|
327
|
+
{
|
|
328
|
+
"Column": {
|
|
329
|
+
"Expression": {"SourceRef": {"Source": "m"}},
|
|
330
|
+
"Property": MICRODADOS_ANO_PROPERTY,
|
|
331
|
+
},
|
|
332
|
+
"Name": MICRODADOS_ROWS_QUERY_REF,
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
"Column": {
|
|
336
|
+
"Expression": {"SourceRef": {"Source": "m"}},
|
|
337
|
+
"Property": MICRODADOS_TIPO_PROPERTY,
|
|
338
|
+
},
|
|
339
|
+
"Name": MICRODADOS_COLUMNS_QUERY_REF,
|
|
340
|
+
},
|
|
341
|
+
],
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _decode_microdados_catalog(payload: dict[str, Any]) -> list[dict[str, str]]:
|
|
346
|
+
seen: set[tuple[str, str, str]] = set()
|
|
347
|
+
items: list[dict[str, str]] = []
|
|
348
|
+
for result in payload.get("results") or []:
|
|
349
|
+
data = dict((result.get("result") or {}).get("data") or {})
|
|
350
|
+
dsr = dict(data.get("dsr") or {})
|
|
351
|
+
for dataset in dsr.get("DS") or []:
|
|
352
|
+
for row in _decode_dsr_rows(dict(dataset or {})):
|
|
353
|
+
if len(row) < 3:
|
|
354
|
+
continue
|
|
355
|
+
year = str(row[0] or "").strip()
|
|
356
|
+
microdata_type = str(row[1] or "").strip()
|
|
357
|
+
download_url = str(row[2] or "").strip()
|
|
358
|
+
if not year or not microdata_type or not download_url:
|
|
359
|
+
continue
|
|
360
|
+
key = (year, microdata_type, download_url)
|
|
361
|
+
if key in seen:
|
|
362
|
+
continue
|
|
363
|
+
seen.add(key)
|
|
364
|
+
items.append(
|
|
365
|
+
{
|
|
366
|
+
"ano_base": year,
|
|
367
|
+
"tipo_microdados": microdata_type,
|
|
368
|
+
"microdados_url": download_url,
|
|
369
|
+
}
|
|
370
|
+
)
|
|
371
|
+
return items
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _decode_dsr_rows(dataset: dict[str, Any]) -> list[list[Any]]:
|
|
375
|
+
value_dicts = dict(dataset.get("ValueDicts") or {})
|
|
376
|
+
rows: list[list[Any]] = []
|
|
377
|
+
for placeholder in dataset.get("PH") or []:
|
|
378
|
+
if not isinstance(placeholder, dict):
|
|
379
|
+
continue
|
|
380
|
+
for member_name, member_rows in placeholder.items():
|
|
381
|
+
if not isinstance(member_name, str) or not member_name.startswith("DM") or not isinstance(member_rows, list):
|
|
382
|
+
continue
|
|
383
|
+
schema: list[dict[str, Any]] = []
|
|
384
|
+
previous_values: list[Any] = []
|
|
385
|
+
for member_row in member_rows:
|
|
386
|
+
if not isinstance(member_row, dict):
|
|
387
|
+
continue
|
|
388
|
+
if isinstance(member_row.get("S"), list):
|
|
389
|
+
schema = [dict(item or {}) for item in member_row["S"]]
|
|
390
|
+
previous_values = [None] * len(schema)
|
|
391
|
+
if not schema:
|
|
392
|
+
continue
|
|
393
|
+
inflated = _inflate_dsr_row(member_row, schema, previous_values)
|
|
394
|
+
if inflated is None:
|
|
395
|
+
continue
|
|
396
|
+
previous_values = list(inflated)
|
|
397
|
+
rows.append([_resolve_dsr_value(schema_item, value_dicts, value) for schema_item, value in zip(schema, inflated)])
|
|
398
|
+
return rows
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _inflate_dsr_row(
|
|
402
|
+
row: dict[str, Any],
|
|
403
|
+
schema: list[dict[str, Any]],
|
|
404
|
+
previous_values: list[Any],
|
|
405
|
+
) -> list[Any] | None:
|
|
406
|
+
compressed = row.get("C")
|
|
407
|
+
if isinstance(compressed, list):
|
|
408
|
+
values = list(previous_values) if previous_values else [None] * len(schema)
|
|
409
|
+
start_index = len(schema) - len(compressed)
|
|
410
|
+
if isinstance(row.get("R"), int):
|
|
411
|
+
start_index = int(row["R"])
|
|
412
|
+
start_index = max(0, min(start_index, len(schema)))
|
|
413
|
+
for offset, value in enumerate(compressed):
|
|
414
|
+
index = start_index + offset
|
|
415
|
+
if index >= len(schema):
|
|
416
|
+
break
|
|
417
|
+
values[index] = value
|
|
418
|
+
return values
|
|
419
|
+
|
|
420
|
+
named_values = [row.get(str(column.get("N") or "")) for column in schema]
|
|
421
|
+
if any(value is not None for value in named_values):
|
|
422
|
+
return named_values
|
|
423
|
+
return None
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _resolve_dsr_value(schema_item: dict[str, Any], value_dicts: dict[str, Any], raw_value: Any) -> Any:
|
|
427
|
+
dictionary_name = schema_item.get("DN")
|
|
428
|
+
if isinstance(raw_value, int) and isinstance(dictionary_name, str):
|
|
429
|
+
dictionary = value_dicts.get(dictionary_name)
|
|
430
|
+
if isinstance(dictionary, list) and 0 <= raw_value < len(dictionary):
|
|
431
|
+
raw_value = dictionary[raw_value]
|
|
432
|
+
|
|
433
|
+
if isinstance(raw_value, str):
|
|
434
|
+
cleaned = raw_value.strip()
|
|
435
|
+
if len(cleaned) >= 2 and cleaned[0] == cleaned[-1] and cleaned[0] in {"'", '"'}:
|
|
436
|
+
cleaned = cleaned[1:-1]
|
|
437
|
+
return cleaned
|
|
438
|
+
return raw_value
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
from fastapi import HTTPException
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
async def ask_vanna(
|
|
8
|
+
vanna_service_url: str,
|
|
9
|
+
question: str,
|
|
10
|
+
llm_override: dict[str, object] | None = None,
|
|
11
|
+
) -> dict[str, object]:
|
|
12
|
+
payload: dict[str, object] = {"question": question}
|
|
13
|
+
if llm_override:
|
|
14
|
+
payload["llm_override"] = llm_override
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
async with httpx.AsyncClient(timeout=60) as client:
|
|
18
|
+
response = await client.post(
|
|
19
|
+
f"{vanna_service_url}/ask",
|
|
20
|
+
json=payload,
|
|
21
|
+
)
|
|
22
|
+
except httpx.RequestError as exc:
|
|
23
|
+
raise HTTPException(status_code=503, detail=f"Vanna service unavailable: {exc}") from exc
|
|
24
|
+
|
|
25
|
+
if response.status_code >= 400:
|
|
26
|
+
try:
|
|
27
|
+
detail = response.json().get("detail", response.text)
|
|
28
|
+
except ValueError:
|
|
29
|
+
detail = response.text
|
|
30
|
+
raise HTTPException(status_code=response.status_code, detail=detail)
|
|
31
|
+
|
|
32
|
+
return response.json()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
FROM python:3.11-slim
|
|
2
|
+
|
|
3
|
+
ENV PYTHONDONTWRITEBYTECODE=1
|
|
4
|
+
ENV PYTHONUNBUFFERED=1
|
|
5
|
+
|
|
6
|
+
WORKDIR /app
|
|
7
|
+
COPY requirements.txt /app/requirements.txt
|
|
8
|
+
RUN pip install --no-cache-dir -r /app/requirements.txt
|
|
9
|
+
|
|
10
|
+
COPY app /app/app
|
|
11
|
+
|
|
12
|
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "9000"]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
from pydantic_settings import BaseSettings
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Settings(BaseSettings):
|
|
10
|
+
vanna_host: str = Field(default="0.0.0.0", alias="VANNA_HOST")
|
|
11
|
+
vanna_port: int = Field(default=9000, alias="VANNA_PORT")
|
|
12
|
+
vanna_dsn: str = Field(..., alias="VANNA_DSN")
|
|
13
|
+
vanna_llm_provider: str = Field(default="ollama", alias="VANNA_LLM_PROVIDER")
|
|
14
|
+
vanna_ollama_base_url: str = Field(default="http://ollama:11434", alias="VANNA_OLLAMA_BASE_URL")
|
|
15
|
+
vanna_ollama_model: str = Field(default="sabia-7b", alias="VANNA_OLLAMA_MODEL")
|
|
16
|
+
vanna_maritaca_api_url: str = Field(
|
|
17
|
+
default="https://chat.maritaca.ai/api/chat/completions",
|
|
18
|
+
alias="VANNA_MARITACA_API_URL",
|
|
19
|
+
)
|
|
20
|
+
vanna_maritaca_api_key: str = Field(default="", alias="VANNA_MARITACA_API_KEY")
|
|
21
|
+
vanna_maritaca_model: str = Field(default="sabia-4", alias="VANNA_MARITACA_MODEL")
|
|
22
|
+
vanna_maritaca_timeout_seconds: int = Field(default=60, alias="VANNA_MARITACA_TIMEOUT_SECONDS")
|
|
23
|
+
vanna_vectorstore_path: str = Field(default="/data/vanna/chroma", alias="VANNA_VECTORSTORE_PATH")
|
|
24
|
+
vanna_auto_train: bool = Field(default=True, alias="VANNA_AUTO_TRAIN")
|
|
25
|
+
vanna_allowed_schema: str = Field(default="curated", alias="VANNA_ALLOWED_SCHEMA")
|
|
26
|
+
allowed_curated_views: str = Field(
|
|
27
|
+
default="",
|
|
28
|
+
alias="ALLOWED_CURATED_VIEWS",
|
|
29
|
+
)
|
|
30
|
+
vanna_max_rows: int = Field(default=200, alias="VANNA_MAX_ROWS")
|
|
31
|
+
|
|
32
|
+
def effective_allowed_schema(self) -> str:
|
|
33
|
+
configured_schema = self.vanna_allowed_schema.strip().lower()
|
|
34
|
+
if os.getenv("VANNA_ALLOWED_SCHEMA") or configured_schema:
|
|
35
|
+
return configured_schema or "curated"
|
|
36
|
+
|
|
37
|
+
legacy_schemas = {
|
|
38
|
+
item.strip().split(".", 1)[0].lower()
|
|
39
|
+
for item in self.allowed_curated_views.split(",")
|
|
40
|
+
if "." in item.strip()
|
|
41
|
+
}
|
|
42
|
+
if len(legacy_schemas) == 1:
|
|
43
|
+
return next(iter(legacy_schemas))
|
|
44
|
+
return "curated"
|
|
45
|
+
|
|
46
|
+
def model_name(self) -> str:
|
|
47
|
+
provider = self.vanna_llm_provider.strip().lower()
|
|
48
|
+
if provider == "maritaca":
|
|
49
|
+
return self.vanna_maritaca_model
|
|
50
|
+
return self.vanna_ollama_model
|
|
51
|
+
|
|
52
|
+
class Config:
|
|
53
|
+
env_file = ".env"
|
|
54
|
+
case_sensitive = False
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
settings = Settings()
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from fastapi import FastAPI, HTTPException
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
from sqlalchemy import create_engine, text
|
|
9
|
+
|
|
10
|
+
from .config import settings
|
|
11
|
+
from .sql_guard import SQLGuard
|
|
12
|
+
from .vanna_engine import DataifVannaEngine
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AskRequest(BaseModel):
|
|
16
|
+
question: str = Field(..., min_length=3, max_length=1000)
|
|
17
|
+
llm_override: dict[str, Any] | None = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _allowed_schema() -> str:
|
|
21
|
+
return settings.effective_allowed_schema()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _extract_sql(candidate: str) -> str:
|
|
25
|
+
fenced = re.search(r"```(?:sql)?\s*(.*?)```", candidate, flags=re.IGNORECASE | re.DOTALL)
|
|
26
|
+
return (fenced.group(1) if fenced else candidate).strip()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _fallback_sql(question: str) -> str:
|
|
30
|
+
lower = question.lower()
|
|
31
|
+
if "cat" in lower or "catálogo" in lower or "catalogo" in lower or "view" in lower:
|
|
32
|
+
return (
|
|
33
|
+
"SELECT relation_group, relation_name, relation_description "
|
|
34
|
+
"FROM curated.vw_pnp_vanna_catalogo ORDER BY relation_group, relation_name LIMIT 50"
|
|
35
|
+
)
|
|
36
|
+
if "total" in lower or "quant" in lower or "matricula" in lower or "matrícula" in lower:
|
|
37
|
+
return (
|
|
38
|
+
"SELECT ano, SUM(matriculas) AS total_matriculas "
|
|
39
|
+
"FROM curated.mv_pnp_dashboard_matriculas "
|
|
40
|
+
"GROUP BY ano ORDER BY ano DESC LIMIT 50"
|
|
41
|
+
)
|
|
42
|
+
if "indicador" in lower or "resumo" in lower or "média" in lower or "media" in lower:
|
|
43
|
+
return (
|
|
44
|
+
"SELECT dominio, indicador, ano, COUNT(*) AS registros, AVG(valor) AS media_valor "
|
|
45
|
+
"FROM curated.vw_pnp_vanna_resumo "
|
|
46
|
+
"GROUP BY dominio, indicador, ano ORDER BY ano DESC, dominio, indicador LIMIT 50"
|
|
47
|
+
)
|
|
48
|
+
return (
|
|
49
|
+
"SELECT run_id, instance_key, dominio, indicador, ano, instituicao, regiao, uf, municipio, valor "
|
|
50
|
+
"FROM curated.vw_pnp_vanna_resumo "
|
|
51
|
+
"ORDER BY ano DESC NULLS LAST, dominio, indicador LIMIT 50"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
app = FastAPI(title="dataif-vanna", version="0.1.0")
|
|
56
|
+
engine = create_engine(settings.vanna_dsn, pool_pre_ping=True)
|
|
57
|
+
allowed_schema = _allowed_schema()
|
|
58
|
+
guard = SQLGuard({allowed_schema})
|
|
59
|
+
vanna_engine = DataifVannaEngine(settings, engine, allowed_schema)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@app.get("/health")
|
|
63
|
+
def health() -> dict[str, object]:
|
|
64
|
+
runtime = vanna_engine.runtime_config()
|
|
65
|
+
return {
|
|
66
|
+
"status": "ok",
|
|
67
|
+
"llm_provider": runtime.provider,
|
|
68
|
+
"model": runtime.model_name(),
|
|
69
|
+
"allowed_schema": allowed_schema,
|
|
70
|
+
"llm_available": vanna_engine.is_llm_available(),
|
|
71
|
+
"llm_status": vanna_engine.provider_status(),
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@app.post("/train")
|
|
76
|
+
def train() -> dict[str, object]:
|
|
77
|
+
try:
|
|
78
|
+
vanna_engine.train_once(force=True)
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
raise HTTPException(status_code=503, detail=f"Vanna unavailable for training: {exc}") from exc
|
|
81
|
+
return {"status": "ok", "allowed_schema": allowed_schema}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@app.post("/ask")
|
|
85
|
+
def ask(req: AskRequest) -> dict[str, Any]:
|
|
86
|
+
generation_mode = "vanna"
|
|
87
|
+
try:
|
|
88
|
+
sql = _extract_sql(vanna_engine.generate_sql(req.question, runtime_override=req.llm_override))
|
|
89
|
+
except Exception as exc:
|
|
90
|
+
generation_mode = f"fallback: {exc}"
|
|
91
|
+
sql = _fallback_sql(req.question)
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
sql = guard.enforce_limit(sql, settings.vanna_max_rows)
|
|
95
|
+
except ValueError as exc:
|
|
96
|
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
|
97
|
+
|
|
98
|
+
with engine.begin() as conn:
|
|
99
|
+
rows = conn.execute(text(sql)).fetchmany(settings.vanna_max_rows)
|
|
100
|
+
|
|
101
|
+
items = [dict(row._mapping) for row in rows]
|
|
102
|
+
return {
|
|
103
|
+
"question": req.question,
|
|
104
|
+
"sql": sql,
|
|
105
|
+
"rows": items,
|
|
106
|
+
"row_count": len(items),
|
|
107
|
+
"generation_mode": generation_mode,
|
|
108
|
+
}
|