@dataif/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +16 -0
  2. package/bin/dataif.js +623 -0
  3. package/package.json +26 -0
  4. package/scripts/build-template.mjs +72 -0
  5. package/templates/dataif/README.md +157 -0
  6. package/templates/dataif/infra/.env.example +119 -0
  7. package/templates/dataif/infra/.env.stg.example +119 -0
  8. package/templates/dataif/infra/airflow/Dockerfile +11 -0
  9. package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
  10. package/templates/dataif/infra/airflow/requirements.txt +3 -0
  11. package/templates/dataif/infra/docker-compose.yml +306 -0
  12. package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
  13. package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
  14. package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
  15. package/templates/dataif/infra/keycloak/Dockerfile +4 -0
  16. package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
  17. package/templates/dataif/infra/ollama/Dockerfile +9 -0
  18. package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
  19. package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
  20. package/templates/dataif/infra/postgres/Dockerfile +4 -0
  21. package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
  22. package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
  23. package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
  24. package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
  25. package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
  26. package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
  27. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
  28. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
  29. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
  30. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
  31. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
  32. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
  33. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
  34. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
  35. package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
  36. package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
  37. package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
  38. package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
  39. package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
  40. package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
  41. package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
  42. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
  43. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
  44. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
  45. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
  46. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
  47. package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
  48. package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
  49. package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  50. package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
  51. package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
  52. package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  53. package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  54. package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  55. package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  56. package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  57. package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  58. package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  59. package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
  60. package/templates/dataif/scripts/configure-env.sh +149 -0
  61. package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
  62. package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
  63. package/templates/dataif/scripts/deploy.sh +79 -0
  64. package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
  65. package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
  66. package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
  67. package/templates/dataif/scripts/publish-images.sh +31 -0
  68. package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
  69. package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
  70. package/templates/dataif/services/api/.dockerignore +18 -0
  71. package/templates/dataif/services/api/Dockerfile +12 -0
  72. package/templates/dataif/services/api/app/__init__.py +1 -0
  73. package/templates/dataif/services/api/app/auth.py +48 -0
  74. package/templates/dataif/services/api/app/config.py +59 -0
  75. package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
  76. package/templates/dataif/services/api/app/main.py +2432 -0
  77. package/templates/dataif/services/api/app/metabase_admin.py +191 -0
  78. package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
  79. package/templates/dataif/services/api/app/metabase_embed.py +15 -0
  80. package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
  81. package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
  82. package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
  83. package/templates/dataif/services/api/app/vanna_client.py +32 -0
  84. package/templates/dataif/services/api/requirements.txt +9 -0
  85. package/templates/dataif/services/vanna/.dockerignore +18 -0
  86. package/templates/dataif/services/vanna/Dockerfile +12 -0
  87. package/templates/dataif/services/vanna/app/config.py +57 -0
  88. package/templates/dataif/services/vanna/app/main.py +108 -0
  89. package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
  90. package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
  91. package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
  92. package/templates/dataif/services/vanna/requirements.txt +8 -0
  93. package/templates/dataif/services/web/.dockerignore +13 -0
  94. package/templates/dataif/services/web/Dockerfile +16 -0
  95. package/templates/dataif/services/web/index.html +12 -0
  96. package/templates/dataif/services/web/nginx.conf +74 -0
  97. package/templates/dataif/services/web/package-lock.json +4397 -0
  98. package/templates/dataif/services/web/package.json +32 -0
  99. package/templates/dataif/services/web/postcss.config.mjs +5 -0
  100. package/templates/dataif/services/web/src/App.jsx +2817 -0
  101. package/templates/dataif/services/web/src/adminAuth.js +245 -0
  102. package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
  103. package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
  104. package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
  105. package/templates/dataif/services/web/src/assets/if.svg +0 -0
  106. package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
  107. package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
  108. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
  109. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
  110. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
  111. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
  112. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
  113. package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
  114. package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
  115. package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
  116. package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
  117. package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
  118. package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
  119. package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
  120. package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
  121. package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
  122. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
  123. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
  124. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
  125. package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
  126. package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
  127. package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
  128. package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
  129. package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
  130. package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
  131. package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
  132. package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
  133. package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
  134. package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
  135. package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
  136. package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
  137. package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
  138. package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
  139. package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
  140. package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
  141. package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
  142. package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
  143. package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
  144. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
  145. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
  146. package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
  147. package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
  148. package/templates/dataif/services/web/src/main.jsx +14 -0
  149. package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
  150. package/templates/dataif/services/web/src/styles/globals.css +60 -0
  151. package/templates/dataif/services/web/src/styles/theme.css +1326 -0
  152. package/templates/dataif/services/web/src/styles/typography.css +430 -0
  153. package/templates/dataif/services/web/src/styles.css +1287 -0
  154. package/templates/dataif/services/web/src/utils/cx.ts +24 -0
  155. package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
  156. package/templates/dataif/services/web/vite.config.js +14 -0
  157. package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
  158. package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
  159. package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
  160. package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
  161. package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
  162. package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
  163. package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
  164. package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
  165. package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
  166. package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
  167. package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
  168. package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
  169. package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
  170. package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
  171. package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
  172. package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  173. package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
  174. package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
  175. package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
  176. package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  177. package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  178. package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  179. package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  180. package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  181. package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  182. package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  183. package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
@@ -0,0 +1,296 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import re
6
+ import unicodedata
7
+ from dataclasses import dataclass
8
+ from typing import Any
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class FieldMapping:
13
+ column_name: str
14
+ source_headers: tuple[str, ...]
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class DomainSpec:
19
+ label: str
20
+ domain_key: str
21
+ raw_table_name: str
22
+ field_mappings: tuple[FieldMapping, ...]
23
+
24
+ @property
25
+ def raw_column_names(self) -> tuple[str, ...]:
26
+ return tuple(item.column_name for item in self.field_mappings)
27
+
28
+ @property
29
+ def source_headers(self) -> tuple[str, ...]:
30
+ return tuple(item.source_headers[0] for item in self.field_mappings)
31
+
32
+
33
+ ACADEMIC_FIELD_MAPPINGS = (
34
+ FieldMapping("ano", ("Ano",)),
35
+ FieldMapping("carga_horaria", ("Carga Horaria",)),
36
+ FieldMapping("carga_horaria_minima", ("Carga Horaria Mínima",)),
37
+ FieldMapping("categoria_da_situacao", ("Categoria da Situação",)),
38
+ FieldMapping("co_inst", ("Co Inst",)),
39
+ FieldMapping("cod_unidade", ("Cod Unidade",)),
40
+ FieldMapping("cor_raca", ("Cor / Raça",)),
41
+ FieldMapping("codigo_da_matricula", ("Código da Matricula", "Código da Matrícula")),
42
+ FieldMapping("codigo_da_unidade_de_ensino_sistec", ("Código da Unidade de Ensino - SISTEC",)),
43
+ FieldMapping("codigo_do_ciclo_matricula", ("Código do Ciclo Matricula",)),
44
+ FieldMapping("codigo_do_municipio_com_dv", ("Código do Município com DV",)),
45
+ FieldMapping("data_de_fim_previsto_do_ciclo", ("Data de Fim Previsto do Ciclo",)),
46
+ FieldMapping("data_de_inicio_do_ciclo", ("Data de Inicio do Ciclo",)),
47
+ FieldMapping("data_de_ocorrencia_da_matricula", ("Data de Ocorrencia da Matricula",)),
48
+ FieldMapping("eixo_tecnologico", ("Eixo Tecnológico",)),
49
+ FieldMapping("faixa_etaria", ("Faixa Etária",)),
50
+ FieldMapping("fator_esforco_curso", ("Fator Esforço Curso",)),
51
+ FieldMapping("fonte_de_financiamento", ("Fonte de Financiamento",)),
52
+ FieldMapping("forma_de_ingresso", ("Forma de ingresso",)),
53
+ FieldMapping("habilitacao", ("Habilitação",)),
54
+ FieldMapping("idade", ("Idade",)),
55
+ FieldMapping("instituicao", ("Instituição",)),
56
+ FieldMapping("matricula_atendida", ("Matrícula Atendida",)),
57
+ FieldMapping("modalidade_de_ensino", ("Modalidade de Ensino",)),
58
+ FieldMapping("municipio", ("Município",)),
59
+ FieldMapping("mes_de_ocorrencia_da_situacao", ("Mês De Ocorrência da Situação",)),
60
+ FieldMapping("nome_de_curso", ("Nome de Curso",)),
61
+ FieldMapping("regiao", ("Região",)),
62
+ FieldMapping("renda_familiar", ("Renda Familiar",)),
63
+ FieldMapping("sexo", ("Sexo",)),
64
+ FieldMapping("situacao_de_matricula", ("Situação de Matrícula",)),
65
+ FieldMapping("subeixo_tecnologico", ("Subeixo Tecnológico",)),
66
+ FieldMapping("tipo_de_curso", ("Tipo de Curso",)),
67
+ FieldMapping("tipo_de_oferta", ("Tipo de Oferta",)),
68
+ FieldMapping("total_de_inscritos", ("Total de Inscritos",)),
69
+ FieldMapping("turno", ("Turno",)),
70
+ FieldMapping("uf", ("UF",)),
71
+ FieldMapping("unidade_de_ensino", ("Unidade de Ensino",)),
72
+ FieldMapping("vagas_extraordinarias_ac", ("Vagas Extraordinárias AC",)),
73
+ FieldMapping("vagas_extraordinarias_l1", ("Vagas Extraordinárias l1",)),
74
+ FieldMapping("vagas_extraordinarias_l10", ("Vagas Extraordinárias l10",)),
75
+ FieldMapping("vagas_extraordinarias_l13", ("Vagas Extraordinárias l13",)),
76
+ FieldMapping("vagas_extraordinarias_l14", ("Vagas Extraordinárias l14",)),
77
+ FieldMapping("vagas_extraordinarias_l2", ("Vagas Extraordinárias l2",)),
78
+ FieldMapping("vagas_extraordinarias_l5", ("Vagas Extraordinárias l5",)),
79
+ FieldMapping("vagas_extraordinarias_l6", ("Vagas Extraordinárias l6",)),
80
+ FieldMapping("vagas_extraordinarias_l9", ("Vagas Extraordinárias l9",)),
81
+ FieldMapping("vagas_extraordinarias_lb_ppi", ("Vagas Extraordinárias LB_PPI",)),
82
+ FieldMapping("vagas_extraordinarias_lb_q", ("Vagas Extraordinárias LB_Q",)),
83
+ FieldMapping("vagas_extraordinarias_lb_pcd", ("Vagas Extraordinárias LB_PCD",)),
84
+ FieldMapping("vagas_extraordinarias_lb_ep", ("Vagas Extraordinárias LB_EP",)),
85
+ FieldMapping("vagas_extraordinarias_li_ppi", ("Vagas Extraordinárias LI_PPI",)),
86
+ FieldMapping("vagas_extraordinarias_li_q", ("Vagas Extraordinárias LI_Q",)),
87
+ FieldMapping("vagas_extraordinarias_li_pcd", ("Vagas Extraordinárias LI_PCD",)),
88
+ FieldMapping("vagas_extraordinarias_li_ep", ("Vagas Extraordinárias LI_EP",)),
89
+ FieldMapping("vagas_ofertadas", ("Vagas Ofertadas",)),
90
+ FieldMapping("vagas_regulares_ac", ("Vagas Regulares AC",)),
91
+ FieldMapping("vagas_regulares_l1", ("Vagas Regulares l1",)),
92
+ FieldMapping("vagas_regulares_l10", ("Vagas Regulares l10",)),
93
+ FieldMapping("vagas_regulares_l13", ("Vagas Regulares l13",)),
94
+ FieldMapping("vagas_regulares_l14", ("Vagas Regulares l14",)),
95
+ FieldMapping("vagas_regulares_l2", ("Vagas Regulares l2",)),
96
+ FieldMapping("vagas_regulares_l5", ("Vagas Regulares l5",)),
97
+ FieldMapping("vagas_regulares_l6", ("Vagas Regulares l6",)),
98
+ FieldMapping("vagas_regulares_l9", ("Vagas Regulares l9",)),
99
+ FieldMapping("vagas_regulares_lb_ppi", ("Vagas Regulares LB_PPI",)),
100
+ FieldMapping("vagas_regulares_lb_q", ("Vagas Regulares LB_Q",)),
101
+ FieldMapping("vagas_regulares_lb_pcd", ("Vagas Regulares LB_PCD",)),
102
+ FieldMapping("vagas_regulares_lb_ep", ("Vagas Regulares LB_EP",)),
103
+ FieldMapping("vagas_regulares_li_ppi", ("Vagas Regulares LI_PPI",)),
104
+ FieldMapping("vagas_regulares_li_q", ("Vagas Regulares LI_Q",)),
105
+ FieldMapping("vagas_regulares_li_pcd", ("Vagas Regulares LI_PCD",)),
106
+ FieldMapping("vagas_regulares_li_ep", ("Vagas Regulares LI_EP",)),
107
+ )
108
+
109
+ FINANCEIRO_FIELD_MAPPINGS = (
110
+ FieldMapping("uo", ("UO",)),
111
+ FieldMapping("nome_uo", ("nomeUO",)),
112
+ FieldMapping("cod_acao", ("codAcao",)),
113
+ FieldMapping("nome_acao", ("nomeAcao",)),
114
+ FieldMapping("grupo_despesa", ("GrupoDespesa",)),
115
+ FieldMapping("liquidacoes_totais", ("liquidacoesTotais",)),
116
+ )
117
+
118
+ SERVIDORES_FIELD_MAPPINGS = (
119
+ FieldMapping("classe", ("Classe",)),
120
+ FieldMapping("cod_unidade", ("Cod_Unidade",)),
121
+ FieldMapping("codigo_da_unidade_de_ensino_sistec", ("Código_da_Unidade_de_Ensino___SISTEC",)),
122
+ FieldMapping("codigo_municipio_com_dv", ("Código_Municipio_com_DV",)),
123
+ FieldMapping("instituicao", ("Instituição",)),
124
+ FieldMapping("jornada_de_trabalho", ("Jornada_de_Trabalho",)),
125
+ FieldMapping("matricula", ("Matrícula",)),
126
+ FieldMapping("municipio", ("Município",)),
127
+ FieldMapping("regiao", ("Região",)),
128
+ FieldMapping("rsc", ("RSC",)),
129
+ FieldMapping("titulacao", ("Titulação",)),
130
+ FieldMapping("unidade_de_lotacao", ("Unidade_de_Lotação",)),
131
+ FieldMapping("vinculo_carreira", ("Vinculo_Carreira",)),
132
+ FieldMapping("vinculo_contrato", ("Vinculo_Contrato",)),
133
+ FieldMapping("vinculo_professor", ("Vinculo_Professor",)),
134
+ FieldMapping("numero_de_registros", ("Número_de_registros",)),
135
+ )
136
+
137
+ DOMAIN_SPECS = {
138
+ "Matrículas": DomainSpec(
139
+ label="Matrículas",
140
+ domain_key="matriculas",
141
+ raw_table_name="pnp_matriculas_src",
142
+ field_mappings=ACADEMIC_FIELD_MAPPINGS,
143
+ ),
144
+ "Eficiência Acadêmica": DomainSpec(
145
+ label="Eficiência Acadêmica",
146
+ domain_key="eficiencia_academica",
147
+ raw_table_name="pnp_eficiencia_academica_src",
148
+ field_mappings=ACADEMIC_FIELD_MAPPINGS,
149
+ ),
150
+ "Financeiro": DomainSpec(
151
+ label="Financeiro",
152
+ domain_key="financeiro",
153
+ raw_table_name="pnp_financeiro_src",
154
+ field_mappings=FINANCEIRO_FIELD_MAPPINGS,
155
+ ),
156
+ "Servidores": DomainSpec(
157
+ label="Servidores",
158
+ domain_key="servidores",
159
+ raw_table_name="pnp_servidores_src",
160
+ field_mappings=SERVIDORES_FIELD_MAPPINGS,
161
+ ),
162
+ }
163
+
164
+
165
+ def _to_int(value: Any) -> int | None:
166
+ if value is None:
167
+ return None
168
+ try:
169
+ return int(str(value).strip())
170
+ except (TypeError, ValueError):
171
+ return None
172
+
173
+
174
+ def _to_float(value: Any) -> float | None:
175
+ if value is None:
176
+ return None
177
+ try:
178
+ text = str(value).strip().replace(".", "").replace(",", ".")
179
+ return float(text)
180
+ except (TypeError, ValueError):
181
+ return None
182
+
183
+
184
+ def normalize_record(
185
+ payload: dict[str, Any],
186
+ source_url: str,
187
+ run_id: str,
188
+ endpoint_id: int = 0,
189
+ endpoint_key: str = "default",
190
+ source_kind: str = "api",
191
+ ) -> dict[str, Any]:
192
+ source_record_id = str(
193
+ payload.get("id")
194
+ or payload.get("_id")
195
+ or payload.get("codigo")
196
+ or payload.get("cod")
197
+ or payload.get("uuid")
198
+ or ""
199
+ )
200
+
201
+ serialized = json.dumps(
202
+ {
203
+ "endpoint_id": endpoint_id,
204
+ "source_url": source_url,
205
+ "payload": payload,
206
+ },
207
+ sort_keys=True,
208
+ ensure_ascii=True,
209
+ )
210
+ payload_hash = hashlib.sha256(serialized.encode("utf-8")).hexdigest()
211
+
212
+ return {
213
+ "run_id": run_id,
214
+ "endpoint_id": endpoint_id,
215
+ "endpoint_key": endpoint_key,
216
+ "source_kind": source_kind,
217
+ "source_url": source_url,
218
+ "source_record_id": source_record_id or None,
219
+ "dataset": str(payload.get("dataset") or payload.get("base") or endpoint_key or "nilo_pecanha"),
220
+ "entidade": str(
221
+ payload.get("entidade")
222
+ or payload.get("instituicao")
223
+ or payload.get("municipio")
224
+ or payload.get("nome")
225
+ or ""
226
+ )
227
+ or None,
228
+ "ano": _to_int(payload.get("ano") or payload.get("year")),
229
+ "indicador": str(payload.get("indicador") or payload.get("metric") or payload.get("tipo") or "") or None,
230
+ "valor": _to_float(payload.get("valor") or payload.get("value")),
231
+ "payload_hash": payload_hash,
232
+ "payload": payload,
233
+ }
234
+
235
+
236
+ def normalize_column_name(value: str) -> str:
237
+ ascii_text = unicodedata.normalize("NFKD", value).encode("ascii", "ignore").decode("ascii")
238
+ lowered = ascii_text.lower().strip()
239
+ collapsed = re.sub(r"[^a-z0-9]+", "_", lowered).strip("_")
240
+ return collapsed or "coluna"
241
+
242
+
243
+ def domain_spec_for_label(label: str) -> DomainSpec:
244
+ try:
245
+ return DOMAIN_SPECS[label]
246
+ except KeyError as exc:
247
+ raise KeyError(f"Unsupported PNP microdados domain: {label}") from exc
248
+
249
+
250
+ def _coalesce_payload_value(payload: dict[str, Any], headers: tuple[str, ...]) -> str | None:
251
+ for header in headers:
252
+ value = payload.get(header)
253
+ if value is None:
254
+ continue
255
+ if isinstance(value, str):
256
+ return value.strip() or None
257
+ return str(value)
258
+ return None
259
+
260
+
261
+ def normalize_domain_record(
262
+ payload: dict[str, Any],
263
+ *,
264
+ run_id: str,
265
+ instance_key: str | None,
266
+ source_url: str,
267
+ ) -> dict[str, Any]:
268
+ tipo_microdados = str(payload.get("tipo_microdados") or payload.get("tipo") or "").strip()
269
+ if not tipo_microdados:
270
+ raise KeyError("payload is missing tipo_microdados")
271
+
272
+ domain_spec = domain_spec_for_label(tipo_microdados)
273
+ field_values = {
274
+ mapping.column_name: _coalesce_payload_value(payload, mapping.source_headers)
275
+ for mapping in domain_spec.field_mappings
276
+ }
277
+
278
+ return {
279
+ "run_id": run_id,
280
+ "instance_key": instance_key,
281
+ "tipo_microdados": tipo_microdados,
282
+ "domain_key": domain_spec.domain_key,
283
+ "raw_table_name": domain_spec.raw_table_name,
284
+ "source_url": source_url,
285
+ "source_record_id": str(payload.get("id") or "").strip() or None,
286
+ "source_row_number": _to_int(payload.get("source_row_number")),
287
+ "source_file_name": str(payload.get("source_file_name") or "").strip() or None,
288
+ "source_file_sha256": str(payload.get("source_file_sha256") or "").strip() or None,
289
+ "ano_base": str(payload.get("ano") or payload.get("Ano") or "").strip() or None,
290
+ "record_hash": normalize_record(
291
+ payload=payload,
292
+ source_url=source_url,
293
+ run_id=run_id,
294
+ )["payload_hash"],
295
+ "field_values": field_values,
296
+ }
@@ -0,0 +1 @@
1
+ """Pipeline jobs entrypoints."""
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import uuid
6
+ from datetime import UTC, datetime
7
+
8
+ from dataif_pipelines.connectors.base.types import RunContext
9
+ from dataif_pipelines.connectors.nilo_pecanha.config import load_config
10
+ from dataif_pipelines.connectors.nilo_pecanha.connector import NiloPecanhaConnector
11
+ from dataif_pipelines.repositories import pnp_raw_repository
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def _warehouse_dsn() -> str:
17
+ dsn = os.getenv("WAREHOUSE_DSN")
18
+ if not dsn:
19
+ raise RuntimeError("WAREHOUSE_DSN is required")
20
+ return dsn
21
+
22
+
23
+ def run_extract_to_raw(instance_key: str | None = None) -> str:
24
+ run_id = str(uuid.uuid4())
25
+ config = load_config()
26
+ started_at = datetime.now(tz=UTC)
27
+ run_context = RunContext(run_id=run_id, started_at=started_at, source_url=config.endpoint)
28
+ connector = NiloPecanhaConnector(dsn=_warehouse_dsn(), config=config)
29
+ pnp_raw_repository.register_run_start(
30
+ _warehouse_dsn(),
31
+ run_id=run_id,
32
+ instance_key=instance_key,
33
+ status="running",
34
+ trigger_mode="legacy_run_extract_to_raw",
35
+ requested_by="nilo_pipeline.run_extract_to_raw",
36
+ logical_date=started_at,
37
+ started_at=started_at,
38
+ )
39
+
40
+ try:
41
+ extracted_count = 0
42
+ if hasattr(connector, "extract_and_load_raw"):
43
+ loaded_count = connector.extract_and_load_raw(run_context, instance_key=instance_key)
44
+ extracted_count = int(connector.runtime_stats().get("raw_domain_count") or loaded_count)
45
+ else:
46
+ raw_records = connector.fetch(run_context, instance_key=instance_key)
47
+ normalized_records = connector.normalize(raw_records, run_context)
48
+ loaded_count = connector.load_raw(normalized_records, run_context)
49
+ extracted_count = len(raw_records)
50
+ checks = connector.post_load_checks(run_id)
51
+
52
+ details = {
53
+ "source": config.endpoint,
54
+ "instance_key": instance_key,
55
+ "runtime": connector.runtime_stats(),
56
+ "checks": checks,
57
+ "extracted_count": extracted_count,
58
+ "loaded_count": loaded_count,
59
+ }
60
+
61
+ pnp_raw_repository.finish_run(
62
+ _warehouse_dsn(),
63
+ run_id=run_id,
64
+ status="success",
65
+ catalog_entry_count=int(checks.get("catalog_entry_count") or 0),
66
+ selected_download_count=int(checks.get("run_selection_count") or 0),
67
+ downloaded_file_count=int(checks.get("download_count") or 0),
68
+ raw_record_count=int(checks.get("raw_count") or loaded_count),
69
+ error_message=None,
70
+ run_summary=details,
71
+ finished_at=datetime.now(tz=UTC),
72
+ )
73
+
74
+ logger.info("Raw load completed run_id=%s extracted=%s loaded=%s", run_id, extracted_count, loaded_count)
75
+ return run_id
76
+ except Exception as exc:
77
+ details = {
78
+ "source": config.endpoint,
79
+ "instance_key": instance_key,
80
+ "runtime": connector.runtime_stats(),
81
+ "error": str(exc),
82
+ }
83
+ pnp_raw_repository.finish_run(
84
+ _warehouse_dsn(),
85
+ run_id=run_id,
86
+ status="failed",
87
+ catalog_entry_count=0,
88
+ selected_download_count=0,
89
+ downloaded_file_count=int(connector.runtime_stats().get("download_count") or 0),
90
+ raw_record_count=int(connector.runtime_stats().get("raw_domain_count") or 0),
91
+ error_message=str(exc),
92
+ run_summary=details,
93
+ finished_at=datetime.now(tz=UTC),
94
+ )
95
+ pnp_raw_repository.mark_run_downloads_failed(
96
+ _warehouse_dsn(),
97
+ run_id=run_id,
98
+ error_message=str(exc),
99
+ )
100
+ raise
101
+
102
+
103
+ def run_staging(run_id: str) -> int:
104
+ raise RuntimeError(
105
+ "run_staging no longer supports the legacy nilo staging tables; use the PNP workflow staging services instead"
106
+ )
107
+
108
+
109
+ def run_mart_and_curated(run_id: str) -> int:
110
+ raise RuntimeError(
111
+ "run_mart_and_curated no longer supports the legacy nilo mart tables; use the PNP workflow curated services instead"
112
+ )
@@ -0,0 +1,21 @@
1
+ from .pnp_workflow import (
2
+ finalize_run,
3
+ load_instance_config,
4
+ materialize_staging,
5
+ register_pipeline_run,
6
+ resolve_pipeline_targets,
7
+ resolve_catalog,
8
+ run_quality_checks,
9
+ sync_raw,
10
+ )
11
+
12
+ __all__ = [
13
+ "finalize_run",
14
+ "load_instance_config",
15
+ "materialize_staging",
16
+ "register_pipeline_run",
17
+ "resolve_pipeline_targets",
18
+ "resolve_catalog",
19
+ "run_quality_checks",
20
+ "sync_raw",
21
+ ]