@dataif/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/bin/dataif.js +623 -0
- package/package.json +26 -0
- package/scripts/build-template.mjs +72 -0
- package/templates/dataif/README.md +157 -0
- package/templates/dataif/infra/.env.example +119 -0
- package/templates/dataif/infra/.env.stg.example +119 -0
- package/templates/dataif/infra/airflow/Dockerfile +11 -0
- package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
- package/templates/dataif/infra/airflow/requirements.txt +3 -0
- package/templates/dataif/infra/docker-compose.yml +306 -0
- package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
- package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
- package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
- package/templates/dataif/infra/keycloak/Dockerfile +4 -0
- package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
- package/templates/dataif/infra/ollama/Dockerfile +9 -0
- package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
- package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
- package/templates/dataif/infra/postgres/Dockerfile +4 -0
- package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
- package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
- package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
- package/templates/dataif/scripts/configure-env.sh +149 -0
- package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
- package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
- package/templates/dataif/scripts/deploy.sh +79 -0
- package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
- package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
- package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
- package/templates/dataif/scripts/publish-images.sh +31 -0
- package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
- package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
- package/templates/dataif/services/api/.dockerignore +18 -0
- package/templates/dataif/services/api/Dockerfile +12 -0
- package/templates/dataif/services/api/app/__init__.py +1 -0
- package/templates/dataif/services/api/app/auth.py +48 -0
- package/templates/dataif/services/api/app/config.py +59 -0
- package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
- package/templates/dataif/services/api/app/main.py +2432 -0
- package/templates/dataif/services/api/app/metabase_admin.py +191 -0
- package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
- package/templates/dataif/services/api/app/metabase_embed.py +15 -0
- package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
- package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
- package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
- package/templates/dataif/services/api/app/vanna_client.py +32 -0
- package/templates/dataif/services/api/requirements.txt +9 -0
- package/templates/dataif/services/vanna/.dockerignore +18 -0
- package/templates/dataif/services/vanna/Dockerfile +12 -0
- package/templates/dataif/services/vanna/app/config.py +57 -0
- package/templates/dataif/services/vanna/app/main.py +108 -0
- package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
- package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
- package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
- package/templates/dataif/services/vanna/requirements.txt +8 -0
- package/templates/dataif/services/web/.dockerignore +13 -0
- package/templates/dataif/services/web/Dockerfile +16 -0
- package/templates/dataif/services/web/index.html +12 -0
- package/templates/dataif/services/web/nginx.conf +74 -0
- package/templates/dataif/services/web/package-lock.json +4397 -0
- package/templates/dataif/services/web/package.json +32 -0
- package/templates/dataif/services/web/postcss.config.mjs +5 -0
- package/templates/dataif/services/web/src/App.jsx +2817 -0
- package/templates/dataif/services/web/src/adminAuth.js +245 -0
- package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
- package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
- package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
- package/templates/dataif/services/web/src/assets/if.svg +0 -0
- package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
- package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
- package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
- package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
- package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
- package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
- package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
- package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
- package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
- package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
- package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
- package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
- package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
- package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
- package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
- package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
- package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
- package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
- package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
- package/templates/dataif/services/web/src/main.jsx +14 -0
- package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
- package/templates/dataif/services/web/src/styles/globals.css +60 -0
- package/templates/dataif/services/web/src/styles/theme.css +1326 -0
- package/templates/dataif/services/web/src/styles/typography.css +430 -0
- package/templates/dataif/services/web/src/styles.css +1287 -0
- package/templates/dataif/services/web/src/utils/cx.ts +24 -0
- package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
- package/templates/dataif/services/web/vite.config.js +14 -0
- package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
- package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
- package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
- package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
- package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
- package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
- package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
- package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
- package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
- package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
- package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
- package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
- package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
- package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
import unicodedata
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class FieldMapping:
|
|
13
|
+
column_name: str
|
|
14
|
+
source_headers: tuple[str, ...]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class DomainSpec:
|
|
19
|
+
label: str
|
|
20
|
+
domain_key: str
|
|
21
|
+
raw_table_name: str
|
|
22
|
+
field_mappings: tuple[FieldMapping, ...]
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def raw_column_names(self) -> tuple[str, ...]:
|
|
26
|
+
return tuple(item.column_name for item in self.field_mappings)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def source_headers(self) -> tuple[str, ...]:
|
|
30
|
+
return tuple(item.source_headers[0] for item in self.field_mappings)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
ACADEMIC_FIELD_MAPPINGS = (
|
|
34
|
+
FieldMapping("ano", ("Ano",)),
|
|
35
|
+
FieldMapping("carga_horaria", ("Carga Horaria",)),
|
|
36
|
+
FieldMapping("carga_horaria_minima", ("Carga Horaria Mínima",)),
|
|
37
|
+
FieldMapping("categoria_da_situacao", ("Categoria da Situação",)),
|
|
38
|
+
FieldMapping("co_inst", ("Co Inst",)),
|
|
39
|
+
FieldMapping("cod_unidade", ("Cod Unidade",)),
|
|
40
|
+
FieldMapping("cor_raca", ("Cor / Raça",)),
|
|
41
|
+
FieldMapping("codigo_da_matricula", ("Código da Matricula", "Código da Matrícula")),
|
|
42
|
+
FieldMapping("codigo_da_unidade_de_ensino_sistec", ("Código da Unidade de Ensino - SISTEC",)),
|
|
43
|
+
FieldMapping("codigo_do_ciclo_matricula", ("Código do Ciclo Matricula",)),
|
|
44
|
+
FieldMapping("codigo_do_municipio_com_dv", ("Código do Município com DV",)),
|
|
45
|
+
FieldMapping("data_de_fim_previsto_do_ciclo", ("Data de Fim Previsto do Ciclo",)),
|
|
46
|
+
FieldMapping("data_de_inicio_do_ciclo", ("Data de Inicio do Ciclo",)),
|
|
47
|
+
FieldMapping("data_de_ocorrencia_da_matricula", ("Data de Ocorrencia da Matricula",)),
|
|
48
|
+
FieldMapping("eixo_tecnologico", ("Eixo Tecnológico",)),
|
|
49
|
+
FieldMapping("faixa_etaria", ("Faixa Etária",)),
|
|
50
|
+
FieldMapping("fator_esforco_curso", ("Fator Esforço Curso",)),
|
|
51
|
+
FieldMapping("fonte_de_financiamento", ("Fonte de Financiamento",)),
|
|
52
|
+
FieldMapping("forma_de_ingresso", ("Forma de ingresso",)),
|
|
53
|
+
FieldMapping("habilitacao", ("Habilitação",)),
|
|
54
|
+
FieldMapping("idade", ("Idade",)),
|
|
55
|
+
FieldMapping("instituicao", ("Instituição",)),
|
|
56
|
+
FieldMapping("matricula_atendida", ("Matrícula Atendida",)),
|
|
57
|
+
FieldMapping("modalidade_de_ensino", ("Modalidade de Ensino",)),
|
|
58
|
+
FieldMapping("municipio", ("Município",)),
|
|
59
|
+
FieldMapping("mes_de_ocorrencia_da_situacao", ("Mês De Ocorrência da Situação",)),
|
|
60
|
+
FieldMapping("nome_de_curso", ("Nome de Curso",)),
|
|
61
|
+
FieldMapping("regiao", ("Região",)),
|
|
62
|
+
FieldMapping("renda_familiar", ("Renda Familiar",)),
|
|
63
|
+
FieldMapping("sexo", ("Sexo",)),
|
|
64
|
+
FieldMapping("situacao_de_matricula", ("Situação de Matrícula",)),
|
|
65
|
+
FieldMapping("subeixo_tecnologico", ("Subeixo Tecnológico",)),
|
|
66
|
+
FieldMapping("tipo_de_curso", ("Tipo de Curso",)),
|
|
67
|
+
FieldMapping("tipo_de_oferta", ("Tipo de Oferta",)),
|
|
68
|
+
FieldMapping("total_de_inscritos", ("Total de Inscritos",)),
|
|
69
|
+
FieldMapping("turno", ("Turno",)),
|
|
70
|
+
FieldMapping("uf", ("UF",)),
|
|
71
|
+
FieldMapping("unidade_de_ensino", ("Unidade de Ensino",)),
|
|
72
|
+
FieldMapping("vagas_extraordinarias_ac", ("Vagas Extraordinárias AC",)),
|
|
73
|
+
FieldMapping("vagas_extraordinarias_l1", ("Vagas Extraordinárias l1",)),
|
|
74
|
+
FieldMapping("vagas_extraordinarias_l10", ("Vagas Extraordinárias l10",)),
|
|
75
|
+
FieldMapping("vagas_extraordinarias_l13", ("Vagas Extraordinárias l13",)),
|
|
76
|
+
FieldMapping("vagas_extraordinarias_l14", ("Vagas Extraordinárias l14",)),
|
|
77
|
+
FieldMapping("vagas_extraordinarias_l2", ("Vagas Extraordinárias l2",)),
|
|
78
|
+
FieldMapping("vagas_extraordinarias_l5", ("Vagas Extraordinárias l5",)),
|
|
79
|
+
FieldMapping("vagas_extraordinarias_l6", ("Vagas Extraordinárias l6",)),
|
|
80
|
+
FieldMapping("vagas_extraordinarias_l9", ("Vagas Extraordinárias l9",)),
|
|
81
|
+
FieldMapping("vagas_extraordinarias_lb_ppi", ("Vagas Extraordinárias LB_PPI",)),
|
|
82
|
+
FieldMapping("vagas_extraordinarias_lb_q", ("Vagas Extraordinárias LB_Q",)),
|
|
83
|
+
FieldMapping("vagas_extraordinarias_lb_pcd", ("Vagas Extraordinárias LB_PCD",)),
|
|
84
|
+
FieldMapping("vagas_extraordinarias_lb_ep", ("Vagas Extraordinárias LB_EP",)),
|
|
85
|
+
FieldMapping("vagas_extraordinarias_li_ppi", ("Vagas Extraordinárias LI_PPI",)),
|
|
86
|
+
FieldMapping("vagas_extraordinarias_li_q", ("Vagas Extraordinárias LI_Q",)),
|
|
87
|
+
FieldMapping("vagas_extraordinarias_li_pcd", ("Vagas Extraordinárias LI_PCD",)),
|
|
88
|
+
FieldMapping("vagas_extraordinarias_li_ep", ("Vagas Extraordinárias LI_EP",)),
|
|
89
|
+
FieldMapping("vagas_ofertadas", ("Vagas Ofertadas",)),
|
|
90
|
+
FieldMapping("vagas_regulares_ac", ("Vagas Regulares AC",)),
|
|
91
|
+
FieldMapping("vagas_regulares_l1", ("Vagas Regulares l1",)),
|
|
92
|
+
FieldMapping("vagas_regulares_l10", ("Vagas Regulares l10",)),
|
|
93
|
+
FieldMapping("vagas_regulares_l13", ("Vagas Regulares l13",)),
|
|
94
|
+
FieldMapping("vagas_regulares_l14", ("Vagas Regulares l14",)),
|
|
95
|
+
FieldMapping("vagas_regulares_l2", ("Vagas Regulares l2",)),
|
|
96
|
+
FieldMapping("vagas_regulares_l5", ("Vagas Regulares l5",)),
|
|
97
|
+
FieldMapping("vagas_regulares_l6", ("Vagas Regulares l6",)),
|
|
98
|
+
FieldMapping("vagas_regulares_l9", ("Vagas Regulares l9",)),
|
|
99
|
+
FieldMapping("vagas_regulares_lb_ppi", ("Vagas Regulares LB_PPI",)),
|
|
100
|
+
FieldMapping("vagas_regulares_lb_q", ("Vagas Regulares LB_Q",)),
|
|
101
|
+
FieldMapping("vagas_regulares_lb_pcd", ("Vagas Regulares LB_PCD",)),
|
|
102
|
+
FieldMapping("vagas_regulares_lb_ep", ("Vagas Regulares LB_EP",)),
|
|
103
|
+
FieldMapping("vagas_regulares_li_ppi", ("Vagas Regulares LI_PPI",)),
|
|
104
|
+
FieldMapping("vagas_regulares_li_q", ("Vagas Regulares LI_Q",)),
|
|
105
|
+
FieldMapping("vagas_regulares_li_pcd", ("Vagas Regulares LI_PCD",)),
|
|
106
|
+
FieldMapping("vagas_regulares_li_ep", ("Vagas Regulares LI_EP",)),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
FINANCEIRO_FIELD_MAPPINGS = (
|
|
110
|
+
FieldMapping("uo", ("UO",)),
|
|
111
|
+
FieldMapping("nome_uo", ("nomeUO",)),
|
|
112
|
+
FieldMapping("cod_acao", ("codAcao",)),
|
|
113
|
+
FieldMapping("nome_acao", ("nomeAcao",)),
|
|
114
|
+
FieldMapping("grupo_despesa", ("GrupoDespesa",)),
|
|
115
|
+
FieldMapping("liquidacoes_totais", ("liquidacoesTotais",)),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
SERVIDORES_FIELD_MAPPINGS = (
|
|
119
|
+
FieldMapping("classe", ("Classe",)),
|
|
120
|
+
FieldMapping("cod_unidade", ("Cod_Unidade",)),
|
|
121
|
+
FieldMapping("codigo_da_unidade_de_ensino_sistec", ("Código_da_Unidade_de_Ensino___SISTEC",)),
|
|
122
|
+
FieldMapping("codigo_municipio_com_dv", ("Código_Municipio_com_DV",)),
|
|
123
|
+
FieldMapping("instituicao", ("Instituição",)),
|
|
124
|
+
FieldMapping("jornada_de_trabalho", ("Jornada_de_Trabalho",)),
|
|
125
|
+
FieldMapping("matricula", ("Matrícula",)),
|
|
126
|
+
FieldMapping("municipio", ("Município",)),
|
|
127
|
+
FieldMapping("regiao", ("Região",)),
|
|
128
|
+
FieldMapping("rsc", ("RSC",)),
|
|
129
|
+
FieldMapping("titulacao", ("Titulação",)),
|
|
130
|
+
FieldMapping("unidade_de_lotacao", ("Unidade_de_Lotação",)),
|
|
131
|
+
FieldMapping("vinculo_carreira", ("Vinculo_Carreira",)),
|
|
132
|
+
FieldMapping("vinculo_contrato", ("Vinculo_Contrato",)),
|
|
133
|
+
FieldMapping("vinculo_professor", ("Vinculo_Professor",)),
|
|
134
|
+
FieldMapping("numero_de_registros", ("Número_de_registros",)),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
DOMAIN_SPECS = {
|
|
138
|
+
"Matrículas": DomainSpec(
|
|
139
|
+
label="Matrículas",
|
|
140
|
+
domain_key="matriculas",
|
|
141
|
+
raw_table_name="pnp_matriculas_src",
|
|
142
|
+
field_mappings=ACADEMIC_FIELD_MAPPINGS,
|
|
143
|
+
),
|
|
144
|
+
"Eficiência Acadêmica": DomainSpec(
|
|
145
|
+
label="Eficiência Acadêmica",
|
|
146
|
+
domain_key="eficiencia_academica",
|
|
147
|
+
raw_table_name="pnp_eficiencia_academica_src",
|
|
148
|
+
field_mappings=ACADEMIC_FIELD_MAPPINGS,
|
|
149
|
+
),
|
|
150
|
+
"Financeiro": DomainSpec(
|
|
151
|
+
label="Financeiro",
|
|
152
|
+
domain_key="financeiro",
|
|
153
|
+
raw_table_name="pnp_financeiro_src",
|
|
154
|
+
field_mappings=FINANCEIRO_FIELD_MAPPINGS,
|
|
155
|
+
),
|
|
156
|
+
"Servidores": DomainSpec(
|
|
157
|
+
label="Servidores",
|
|
158
|
+
domain_key="servidores",
|
|
159
|
+
raw_table_name="pnp_servidores_src",
|
|
160
|
+
field_mappings=SERVIDORES_FIELD_MAPPINGS,
|
|
161
|
+
),
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _to_int(value: Any) -> int | None:
|
|
166
|
+
if value is None:
|
|
167
|
+
return None
|
|
168
|
+
try:
|
|
169
|
+
return int(str(value).strip())
|
|
170
|
+
except (TypeError, ValueError):
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _to_float(value: Any) -> float | None:
|
|
175
|
+
if value is None:
|
|
176
|
+
return None
|
|
177
|
+
try:
|
|
178
|
+
text = str(value).strip().replace(".", "").replace(",", ".")
|
|
179
|
+
return float(text)
|
|
180
|
+
except (TypeError, ValueError):
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def normalize_record(
|
|
185
|
+
payload: dict[str, Any],
|
|
186
|
+
source_url: str,
|
|
187
|
+
run_id: str,
|
|
188
|
+
endpoint_id: int = 0,
|
|
189
|
+
endpoint_key: str = "default",
|
|
190
|
+
source_kind: str = "api",
|
|
191
|
+
) -> dict[str, Any]:
|
|
192
|
+
source_record_id = str(
|
|
193
|
+
payload.get("id")
|
|
194
|
+
or payload.get("_id")
|
|
195
|
+
or payload.get("codigo")
|
|
196
|
+
or payload.get("cod")
|
|
197
|
+
or payload.get("uuid")
|
|
198
|
+
or ""
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
serialized = json.dumps(
|
|
202
|
+
{
|
|
203
|
+
"endpoint_id": endpoint_id,
|
|
204
|
+
"source_url": source_url,
|
|
205
|
+
"payload": payload,
|
|
206
|
+
},
|
|
207
|
+
sort_keys=True,
|
|
208
|
+
ensure_ascii=True,
|
|
209
|
+
)
|
|
210
|
+
payload_hash = hashlib.sha256(serialized.encode("utf-8")).hexdigest()
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
"run_id": run_id,
|
|
214
|
+
"endpoint_id": endpoint_id,
|
|
215
|
+
"endpoint_key": endpoint_key,
|
|
216
|
+
"source_kind": source_kind,
|
|
217
|
+
"source_url": source_url,
|
|
218
|
+
"source_record_id": source_record_id or None,
|
|
219
|
+
"dataset": str(payload.get("dataset") or payload.get("base") or endpoint_key or "nilo_pecanha"),
|
|
220
|
+
"entidade": str(
|
|
221
|
+
payload.get("entidade")
|
|
222
|
+
or payload.get("instituicao")
|
|
223
|
+
or payload.get("municipio")
|
|
224
|
+
or payload.get("nome")
|
|
225
|
+
or ""
|
|
226
|
+
)
|
|
227
|
+
or None,
|
|
228
|
+
"ano": _to_int(payload.get("ano") or payload.get("year")),
|
|
229
|
+
"indicador": str(payload.get("indicador") or payload.get("metric") or payload.get("tipo") or "") or None,
|
|
230
|
+
"valor": _to_float(payload.get("valor") or payload.get("value")),
|
|
231
|
+
"payload_hash": payload_hash,
|
|
232
|
+
"payload": payload,
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def normalize_column_name(value: str) -> str:
|
|
237
|
+
ascii_text = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
|
|
238
|
+
lowered = ascii_text.lower().strip()
|
|
239
|
+
collapsed = re.sub(r"[^a-z0-9]+", "_", lowered).strip("_")
|
|
240
|
+
return collapsed or "coluna"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def domain_spec_for_label(label: str) -> DomainSpec:
|
|
244
|
+
try:
|
|
245
|
+
return DOMAIN_SPECS[label]
|
|
246
|
+
except KeyError as exc:
|
|
247
|
+
raise KeyError(f"Unsupported PNP microdados domain: {label}") from exc
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _coalesce_payload_value(payload: dict[str, Any], headers: tuple[str, ...]) -> str | None:
|
|
251
|
+
for header in headers:
|
|
252
|
+
value = payload.get(header)
|
|
253
|
+
if value is None:
|
|
254
|
+
continue
|
|
255
|
+
if isinstance(value, str):
|
|
256
|
+
return value.strip() or None
|
|
257
|
+
return str(value)
|
|
258
|
+
return None
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def normalize_domain_record(
|
|
262
|
+
payload: dict[str, Any],
|
|
263
|
+
*,
|
|
264
|
+
run_id: str,
|
|
265
|
+
instance_key: str | None,
|
|
266
|
+
source_url: str,
|
|
267
|
+
) -> dict[str, Any]:
|
|
268
|
+
tipo_microdados = str(payload.get("tipo_microdados") or payload.get("tipo") or "").strip()
|
|
269
|
+
if not tipo_microdados:
|
|
270
|
+
raise KeyError("payload is missing tipo_microdados")
|
|
271
|
+
|
|
272
|
+
domain_spec = domain_spec_for_label(tipo_microdados)
|
|
273
|
+
field_values = {
|
|
274
|
+
mapping.column_name: _coalesce_payload_value(payload, mapping.source_headers)
|
|
275
|
+
for mapping in domain_spec.field_mappings
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
"run_id": run_id,
|
|
280
|
+
"instance_key": instance_key,
|
|
281
|
+
"tipo_microdados": tipo_microdados,
|
|
282
|
+
"domain_key": domain_spec.domain_key,
|
|
283
|
+
"raw_table_name": domain_spec.raw_table_name,
|
|
284
|
+
"source_url": source_url,
|
|
285
|
+
"source_record_id": str(payload.get("id") or "").strip() or None,
|
|
286
|
+
"source_row_number": _to_int(payload.get("source_row_number")),
|
|
287
|
+
"source_file_name": str(payload.get("source_file_name") or "").strip() or None,
|
|
288
|
+
"source_file_sha256": str(payload.get("source_file_sha256") or "").strip() or None,
|
|
289
|
+
"ano_base": str(payload.get("ano") or payload.get("Ano") or "").strip() or None,
|
|
290
|
+
"record_hash": normalize_record(
|
|
291
|
+
payload=payload,
|
|
292
|
+
source_url=source_url,
|
|
293
|
+
run_id=run_id,
|
|
294
|
+
)["payload_hash"],
|
|
295
|
+
"field_values": field_values,
|
|
296
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Pipeline jobs entrypoints."""
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import uuid
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
|
|
8
|
+
from dataif_pipelines.connectors.base.types import RunContext
|
|
9
|
+
from dataif_pipelines.connectors.nilo_pecanha.config import load_config
|
|
10
|
+
from dataif_pipelines.connectors.nilo_pecanha.connector import NiloPecanhaConnector
|
|
11
|
+
from dataif_pipelines.repositories import pnp_raw_repository
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _warehouse_dsn() -> str:
|
|
17
|
+
dsn = os.getenv("WAREHOUSE_DSN")
|
|
18
|
+
if not dsn:
|
|
19
|
+
raise RuntimeError("WAREHOUSE_DSN is required")
|
|
20
|
+
return dsn
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def run_extract_to_raw(instance_key: str | None = None) -> str:
|
|
24
|
+
run_id = str(uuid.uuid4())
|
|
25
|
+
config = load_config()
|
|
26
|
+
started_at = datetime.now(tz=UTC)
|
|
27
|
+
run_context = RunContext(run_id=run_id, started_at=started_at, source_url=config.endpoint)
|
|
28
|
+
connector = NiloPecanhaConnector(dsn=_warehouse_dsn(), config=config)
|
|
29
|
+
pnp_raw_repository.register_run_start(
|
|
30
|
+
_warehouse_dsn(),
|
|
31
|
+
run_id=run_id,
|
|
32
|
+
instance_key=instance_key,
|
|
33
|
+
status="running",
|
|
34
|
+
trigger_mode="legacy_run_extract_to_raw",
|
|
35
|
+
requested_by="nilo_pipeline.run_extract_to_raw",
|
|
36
|
+
logical_date=started_at,
|
|
37
|
+
started_at=started_at,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
extracted_count = 0
|
|
42
|
+
if hasattr(connector, "extract_and_load_raw"):
|
|
43
|
+
loaded_count = connector.extract_and_load_raw(run_context, instance_key=instance_key)
|
|
44
|
+
extracted_count = int(connector.runtime_stats().get("raw_domain_count") or loaded_count)
|
|
45
|
+
else:
|
|
46
|
+
raw_records = connector.fetch(run_context, instance_key=instance_key)
|
|
47
|
+
normalized_records = connector.normalize(raw_records, run_context)
|
|
48
|
+
loaded_count = connector.load_raw(normalized_records, run_context)
|
|
49
|
+
extracted_count = len(raw_records)
|
|
50
|
+
checks = connector.post_load_checks(run_id)
|
|
51
|
+
|
|
52
|
+
details = {
|
|
53
|
+
"source": config.endpoint,
|
|
54
|
+
"instance_key": instance_key,
|
|
55
|
+
"runtime": connector.runtime_stats(),
|
|
56
|
+
"checks": checks,
|
|
57
|
+
"extracted_count": extracted_count,
|
|
58
|
+
"loaded_count": loaded_count,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
pnp_raw_repository.finish_run(
|
|
62
|
+
_warehouse_dsn(),
|
|
63
|
+
run_id=run_id,
|
|
64
|
+
status="success",
|
|
65
|
+
catalog_entry_count=int(checks.get("catalog_entry_count") or 0),
|
|
66
|
+
selected_download_count=int(checks.get("run_selection_count") or 0),
|
|
67
|
+
downloaded_file_count=int(checks.get("download_count") or 0),
|
|
68
|
+
raw_record_count=int(checks.get("raw_count") or loaded_count),
|
|
69
|
+
error_message=None,
|
|
70
|
+
run_summary=details,
|
|
71
|
+
finished_at=datetime.now(tz=UTC),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
logger.info("Raw load completed run_id=%s extracted=%s loaded=%s", run_id, extracted_count, loaded_count)
|
|
75
|
+
return run_id
|
|
76
|
+
except Exception as exc:
|
|
77
|
+
details = {
|
|
78
|
+
"source": config.endpoint,
|
|
79
|
+
"instance_key": instance_key,
|
|
80
|
+
"runtime": connector.runtime_stats(),
|
|
81
|
+
"error": str(exc),
|
|
82
|
+
}
|
|
83
|
+
pnp_raw_repository.finish_run(
|
|
84
|
+
_warehouse_dsn(),
|
|
85
|
+
run_id=run_id,
|
|
86
|
+
status="failed",
|
|
87
|
+
catalog_entry_count=0,
|
|
88
|
+
selected_download_count=0,
|
|
89
|
+
downloaded_file_count=int(connector.runtime_stats().get("download_count") or 0),
|
|
90
|
+
raw_record_count=int(connector.runtime_stats().get("raw_domain_count") or 0),
|
|
91
|
+
error_message=str(exc),
|
|
92
|
+
run_summary=details,
|
|
93
|
+
finished_at=datetime.now(tz=UTC),
|
|
94
|
+
)
|
|
95
|
+
pnp_raw_repository.mark_run_downloads_failed(
|
|
96
|
+
_warehouse_dsn(),
|
|
97
|
+
run_id=run_id,
|
|
98
|
+
error_message=str(exc),
|
|
99
|
+
)
|
|
100
|
+
raise
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def run_staging(run_id: str) -> int:
|
|
104
|
+
raise RuntimeError(
|
|
105
|
+
"run_staging no longer supports the legacy nilo staging tables; use the PNP workflow staging services instead"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def run_mart_and_curated(run_id: str) -> int:
|
|
110
|
+
raise RuntimeError(
|
|
111
|
+
"run_mart_and_curated no longer supports the legacy nilo mart tables; use the PNP workflow curated services instead"
|
|
112
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .pnp_workflow import (
|
|
2
|
+
finalize_run,
|
|
3
|
+
load_instance_config,
|
|
4
|
+
materialize_staging,
|
|
5
|
+
register_pipeline_run,
|
|
6
|
+
resolve_pipeline_targets,
|
|
7
|
+
resolve_catalog,
|
|
8
|
+
run_quality_checks,
|
|
9
|
+
sync_raw,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"finalize_run",
|
|
14
|
+
"load_instance_config",
|
|
15
|
+
"materialize_staging",
|
|
16
|
+
"register_pipeline_run",
|
|
17
|
+
"resolve_pipeline_targets",
|
|
18
|
+
"resolve_catalog",
|
|
19
|
+
"run_quality_checks",
|
|
20
|
+
"sync_raw",
|
|
21
|
+
]
|