@dataif/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +16 -0
  2. package/bin/dataif.js +623 -0
  3. package/package.json +26 -0
  4. package/scripts/build-template.mjs +72 -0
  5. package/templates/dataif/README.md +157 -0
  6. package/templates/dataif/infra/.env.example +119 -0
  7. package/templates/dataif/infra/.env.stg.example +119 -0
  8. package/templates/dataif/infra/airflow/Dockerfile +11 -0
  9. package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
  10. package/templates/dataif/infra/airflow/requirements.txt +3 -0
  11. package/templates/dataif/infra/docker-compose.yml +306 -0
  12. package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
  13. package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
  14. package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
  15. package/templates/dataif/infra/keycloak/Dockerfile +4 -0
  16. package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
  17. package/templates/dataif/infra/ollama/Dockerfile +9 -0
  18. package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
  19. package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
  20. package/templates/dataif/infra/postgres/Dockerfile +4 -0
  21. package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
  22. package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
  23. package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
  24. package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
  25. package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
  26. package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
  27. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
  28. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
  29. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
  30. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
  31. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
  32. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
  33. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
  34. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
  35. package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
  36. package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
  37. package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
  38. package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
  39. package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
  40. package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
  41. package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
  42. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
  43. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
  44. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
  45. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
  46. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
  47. package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
  48. package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
  49. package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  50. package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
  51. package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
  52. package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  53. package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  54. package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  55. package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  56. package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  57. package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  58. package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  59. package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
  60. package/templates/dataif/scripts/configure-env.sh +149 -0
  61. package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
  62. package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
  63. package/templates/dataif/scripts/deploy.sh +79 -0
  64. package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
  65. package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
  66. package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
  67. package/templates/dataif/scripts/publish-images.sh +31 -0
  68. package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
  69. package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
  70. package/templates/dataif/services/api/.dockerignore +18 -0
  71. package/templates/dataif/services/api/Dockerfile +12 -0
  72. package/templates/dataif/services/api/app/__init__.py +1 -0
  73. package/templates/dataif/services/api/app/auth.py +48 -0
  74. package/templates/dataif/services/api/app/config.py +59 -0
  75. package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
  76. package/templates/dataif/services/api/app/main.py +2432 -0
  77. package/templates/dataif/services/api/app/metabase_admin.py +191 -0
  78. package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
  79. package/templates/dataif/services/api/app/metabase_embed.py +15 -0
  80. package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
  81. package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
  82. package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
  83. package/templates/dataif/services/api/app/vanna_client.py +32 -0
  84. package/templates/dataif/services/api/requirements.txt +9 -0
  85. package/templates/dataif/services/vanna/.dockerignore +18 -0
  86. package/templates/dataif/services/vanna/Dockerfile +12 -0
  87. package/templates/dataif/services/vanna/app/config.py +57 -0
  88. package/templates/dataif/services/vanna/app/main.py +108 -0
  89. package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
  90. package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
  91. package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
  92. package/templates/dataif/services/vanna/requirements.txt +8 -0
  93. package/templates/dataif/services/web/.dockerignore +13 -0
  94. package/templates/dataif/services/web/Dockerfile +16 -0
  95. package/templates/dataif/services/web/index.html +12 -0
  96. package/templates/dataif/services/web/nginx.conf +74 -0
  97. package/templates/dataif/services/web/package-lock.json +4397 -0
  98. package/templates/dataif/services/web/package.json +32 -0
  99. package/templates/dataif/services/web/postcss.config.mjs +5 -0
  100. package/templates/dataif/services/web/src/App.jsx +2817 -0
  101. package/templates/dataif/services/web/src/adminAuth.js +245 -0
  102. package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
  103. package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
  104. package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
  105. package/templates/dataif/services/web/src/assets/if.svg +0 -0
  106. package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
  107. package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
  108. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
  109. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
  110. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
  111. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
  112. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
  113. package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
  114. package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
  115. package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
  116. package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
  117. package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
  118. package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
  119. package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
  120. package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
  121. package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
  122. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
  123. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
  124. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
  125. package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
  126. package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
  127. package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
  128. package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
  129. package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
  130. package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
  131. package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
  132. package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
  133. package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
  134. package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
  135. package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
  136. package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
  137. package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
  138. package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
  139. package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
  140. package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
  141. package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
  142. package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
  143. package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
  144. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
  145. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
  146. package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
  147. package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
  148. package/templates/dataif/services/web/src/main.jsx +14 -0
  149. package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
  150. package/templates/dataif/services/web/src/styles/globals.css +60 -0
  151. package/templates/dataif/services/web/src/styles/theme.css +1326 -0
  152. package/templates/dataif/services/web/src/styles/typography.css +430 -0
  153. package/templates/dataif/services/web/src/styles.css +1287 -0
  154. package/templates/dataif/services/web/src/utils/cx.ts +24 -0
  155. package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
  156. package/templates/dataif/services/web/vite.config.js +14 -0
  157. package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
  158. package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
  159. package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
  160. package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
  161. package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
  162. package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
  163. package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
  164. package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
  165. package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
  166. package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
  167. package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
  168. package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
  169. package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
  170. package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
  171. package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
  172. package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  173. package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
  174. package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
  175. package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
  176. package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  177. package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  178. package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  179. package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  180. package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  181. package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  182. package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  183. package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
@@ -0,0 +1,83 @@
1
+ WITH selected_rows AS (
2
+ SELECT
3
+ src.*,
4
+ ROW_NUMBER() OVER (
5
+ PARTITION BY src.instance_key, src.record_hash
6
+ ORDER BY src.raw_record_id DESC
7
+ ) AS dedup_rank
8
+ FROM raw.pnp_eficiencia_academica_src src
9
+ LEFT JOIN raw.pnp_downloads downloads ON downloads.download_id = src.download_id
10
+ JOIN raw.pnp_instance_selection selection
11
+ ON selection.instance_key = src.instance_key
12
+ AND selection.is_active = TRUE
13
+ AND selection.ano_base = src.ano_base
14
+ AND selection.tipo_microdados = src.tipo_microdados
15
+ AND (
16
+ selection.configured_microdados_url IS NULL
17
+ OR selection.configured_microdados_url = downloads.microdados_url
18
+ )
19
+ WHERE src.run_id = %(run_id)s
20
+ AND src.instance_key IS NOT DISTINCT FROM %(instance_key)s
21
+ AND src.download_id = %(download_id)s
22
+ ),
23
+ deduplicated_rows AS (
24
+ SELECT *
25
+ FROM selected_rows
26
+ WHERE dedup_rank = 1
27
+ )
28
+ INSERT INTO staging.pnp_eficiencia_academica (
29
+ raw_record_id,
30
+ run_id,
31
+ instance_key,
32
+ ano,
33
+ instituicao,
34
+ regiao,
35
+ uf,
36
+ municipio,
37
+ sexo,
38
+ cor_raca,
39
+ renda_familiar,
40
+ faixa_etaria,
41
+ categoria_situacao,
42
+ situacao_matricula,
43
+ matricula_atendida,
44
+ processed_at
45
+ )
46
+ SELECT
47
+ deduplicated_rows.raw_record_id,
48
+ deduplicated_rows.run_id,
49
+ deduplicated_rows.instance_key,
50
+ CASE
51
+ WHEN NULLIF(deduplicated_rows.ano, '') ~ '^[0-9]{1,4}$' THEN deduplicated_rows.ano::INTEGER
52
+ ELSE NULL
53
+ END AS ano,
54
+ NULLIF(deduplicated_rows.instituicao, '') AS instituicao,
55
+ NULLIF(deduplicated_rows.regiao, '') AS regiao,
56
+ NULLIF(deduplicated_rows.uf, '') AS uf,
57
+ NULLIF(deduplicated_rows.municipio, '') AS municipio,
58
+ NULLIF(deduplicated_rows.sexo, '') AS sexo,
59
+ NULLIF(deduplicated_rows.cor_raca, '') AS cor_raca,
60
+ NULLIF(deduplicated_rows.renda_familiar, '') AS renda_familiar,
61
+ NULLIF(deduplicated_rows.faixa_etaria, '') AS faixa_etaria,
62
+ NULLIF(deduplicated_rows.categoria_da_situacao, '') AS categoria_situacao,
63
+ NULLIF(deduplicated_rows.situacao_de_matricula, '') AS situacao_matricula,
64
+ NULLIF(deduplicated_rows.matricula_atendida, '') AS matricula_atendida,
65
+ NOW()
66
+ FROM deduplicated_rows
67
+ ON CONFLICT (raw_record_id) DO UPDATE
68
+ SET
69
+ run_id = EXCLUDED.run_id,
70
+ instance_key = EXCLUDED.instance_key,
71
+ ano = EXCLUDED.ano,
72
+ instituicao = EXCLUDED.instituicao,
73
+ regiao = EXCLUDED.regiao,
74
+ uf = EXCLUDED.uf,
75
+ municipio = EXCLUDED.municipio,
76
+ sexo = EXCLUDED.sexo,
77
+ cor_raca = EXCLUDED.cor_raca,
78
+ renda_familiar = EXCLUDED.renda_familiar,
79
+ faixa_etaria = EXCLUDED.faixa_etaria,
80
+ categoria_situacao = EXCLUDED.categoria_situacao,
81
+ situacao_matricula = EXCLUDED.situacao_matricula,
82
+ matricula_atendida = EXCLUDED.matricula_atendida,
83
+ processed_at = NOW();
@@ -0,0 +1,90 @@
1
+ WITH selected_rows AS (
2
+ SELECT
3
+ src.*,
4
+ ROW_NUMBER() OVER (
5
+ PARTITION BY src.instance_key, src.record_hash
6
+ ORDER BY src.raw_record_id DESC
7
+ ) AS dedup_rank
8
+ FROM raw.pnp_servidores_src src
9
+ LEFT JOIN raw.pnp_downloads downloads ON downloads.download_id = src.download_id
10
+ JOIN raw.pnp_instance_selection selection
11
+ ON selection.instance_key = src.instance_key
12
+ AND selection.is_active = TRUE
13
+ AND selection.ano_base = src.ano_base
14
+ AND selection.tipo_microdados = src.tipo_microdados
15
+ AND (
16
+ selection.configured_microdados_url IS NULL
17
+ OR selection.configured_microdados_url = downloads.microdados_url
18
+ )
19
+ WHERE src.run_id = %(run_id)s
20
+ AND src.instance_key IS NOT DISTINCT FROM %(instance_key)s
21
+ AND src.download_id = %(download_id)s
22
+ ),
23
+ deduplicated_rows AS (
24
+ SELECT *
25
+ FROM selected_rows
26
+ WHERE dedup_rank = 1
27
+ )
28
+ INSERT INTO staging.pnp_servidores (
29
+ raw_record_id,
30
+ run_id,
31
+ instance_key,
32
+ ano,
33
+ instituicao,
34
+ regiao,
35
+ uf,
36
+ municipio,
37
+ classe,
38
+ jornada_trabalho,
39
+ titulacao,
40
+ rsc,
41
+ vinculo_carreira,
42
+ vinculo_contrato,
43
+ vinculo_professor,
44
+ numero_registros,
45
+ processed_at
46
+ )
47
+ SELECT
48
+ deduplicated_rows.raw_record_id,
49
+ deduplicated_rows.run_id,
50
+ deduplicated_rows.instance_key,
51
+ CASE
52
+ WHEN NULLIF(deduplicated_rows.ano_base, '') ~ '^[0-9]{1,4}$' THEN deduplicated_rows.ano_base::INTEGER
53
+ ELSE NULL
54
+ END AS ano,
55
+ NULLIF(deduplicated_rows.instituicao, '') AS instituicao,
56
+ NULLIF(deduplicated_rows.regiao, '') AS regiao,
57
+ NULLIF(NULL::TEXT, '') AS uf,
58
+ NULLIF(deduplicated_rows.municipio, '') AS municipio,
59
+ NULLIF(deduplicated_rows.classe, '') AS classe,
60
+ NULLIF(deduplicated_rows.jornada_de_trabalho, '') AS jornada_trabalho,
61
+ NULLIF(deduplicated_rows.titulacao, '') AS titulacao,
62
+ NULLIF(deduplicated_rows.rsc, '') AS rsc,
63
+ NULLIF(deduplicated_rows.vinculo_carreira, '') AS vinculo_carreira,
64
+ NULLIF(deduplicated_rows.vinculo_contrato, '') AS vinculo_contrato,
65
+ NULLIF(deduplicated_rows.vinculo_professor, '') AS vinculo_professor,
66
+ CASE
67
+ WHEN REPLACE(REPLACE(NULLIF(BTRIM(deduplicated_rows.numero_de_registros), ''), '.', ''), ',', '.') ~ '^-?[0-9]+(\.[0-9]+)?$'
68
+ THEN REPLACE(REPLACE(NULLIF(BTRIM(deduplicated_rows.numero_de_registros), ''), '.', ''), ',', '.')::NUMERIC
69
+ ELSE NULL
70
+ END AS numero_registros,
71
+ NOW()
72
+ FROM deduplicated_rows
73
+ ON CONFLICT (raw_record_id) DO UPDATE
74
+ SET
75
+ run_id = EXCLUDED.run_id,
76
+ instance_key = EXCLUDED.instance_key,
77
+ ano = EXCLUDED.ano,
78
+ instituicao = EXCLUDED.instituicao,
79
+ regiao = EXCLUDED.regiao,
80
+ uf = EXCLUDED.uf,
81
+ municipio = EXCLUDED.municipio,
82
+ classe = EXCLUDED.classe,
83
+ jornada_trabalho = EXCLUDED.jornada_trabalho,
84
+ titulacao = EXCLUDED.titulacao,
85
+ rsc = EXCLUDED.rsc,
86
+ vinculo_carreira = EXCLUDED.vinculo_carreira,
87
+ vinculo_contrato = EXCLUDED.vinculo_contrato,
88
+ vinculo_professor = EXCLUDED.vinculo_professor,
89
+ numero_registros = EXCLUDED.numero_registros,
90
+ processed_at = NOW();
@@ -0,0 +1,72 @@
1
+ WITH selected_rows AS (
2
+ SELECT
3
+ src.*,
4
+ ROW_NUMBER() OVER (
5
+ PARTITION BY src.instance_key, src.record_hash
6
+ ORDER BY src.raw_record_id DESC
7
+ ) AS dedup_rank
8
+ FROM raw.pnp_financeiro_src src
9
+ LEFT JOIN raw.pnp_downloads downloads ON downloads.download_id = src.download_id
10
+ JOIN raw.pnp_instance_selection selection
11
+ ON selection.instance_key = src.instance_key
12
+ AND selection.is_active = TRUE
13
+ AND selection.ano_base = src.ano_base
14
+ AND selection.tipo_microdados = src.tipo_microdados
15
+ AND (
16
+ selection.configured_microdados_url IS NULL
17
+ OR selection.configured_microdados_url = downloads.microdados_url
18
+ )
19
+ WHERE src.run_id = %(run_id)s
20
+ AND src.instance_key IS NOT DISTINCT FROM %(instance_key)s
21
+ AND src.download_id = %(download_id)s
22
+ ),
23
+ deduplicated_rows AS (
24
+ SELECT *
25
+ FROM selected_rows
26
+ WHERE dedup_rank = 1
27
+ )
28
+ INSERT INTO staging.pnp_financeiro (
29
+ raw_record_id,
30
+ run_id,
31
+ instance_key,
32
+ ano,
33
+ nome_uo,
34
+ uo,
35
+ cod_acao,
36
+ nome_acao,
37
+ grupo_despesa,
38
+ liquidacoes_totais,
39
+ processed_at
40
+ )
41
+ SELECT
42
+ deduplicated_rows.raw_record_id,
43
+ deduplicated_rows.run_id,
44
+ deduplicated_rows.instance_key,
45
+ CASE
46
+ WHEN NULLIF(deduplicated_rows.ano_base, '') ~ '^[0-9]{1,4}$' THEN deduplicated_rows.ano_base::INTEGER
47
+ ELSE NULL
48
+ END AS ano,
49
+ NULLIF(deduplicated_rows.nome_uo, '') AS nome_uo,
50
+ NULLIF(deduplicated_rows.uo, '') AS uo,
51
+ NULLIF(deduplicated_rows.cod_acao, '') AS cod_acao,
52
+ NULLIF(deduplicated_rows.nome_acao, '') AS nome_acao,
53
+ NULLIF(deduplicated_rows.grupo_despesa, '') AS grupo_despesa,
54
+ CASE
55
+ WHEN REPLACE(REPLACE(NULLIF(BTRIM(deduplicated_rows.liquidacoes_totais), ''), '.', ''), ',', '.') ~ '^-?[0-9]+(\.[0-9]+)?$'
56
+ THEN REPLACE(REPLACE(NULLIF(BTRIM(deduplicated_rows.liquidacoes_totais), ''), '.', ''), ',', '.')::NUMERIC
57
+ ELSE NULL
58
+ END AS liquidacoes_totais,
59
+ NOW()
60
+ FROM deduplicated_rows
61
+ ON CONFLICT (raw_record_id) DO UPDATE
62
+ SET
63
+ run_id = EXCLUDED.run_id,
64
+ instance_key = EXCLUDED.instance_key,
65
+ ano = EXCLUDED.ano,
66
+ nome_uo = EXCLUDED.nome_uo,
67
+ uo = EXCLUDED.uo,
68
+ cod_acao = EXCLUDED.cod_acao,
69
+ nome_acao = EXCLUDED.nome_acao,
70
+ grupo_despesa = EXCLUDED.grupo_despesa,
71
+ liquidacoes_totais = EXCLUDED.liquidacoes_totais,
72
+ processed_at = NOW();
@@ -0,0 +1,204 @@
1
+ DROP MATERIALIZED VIEW IF EXISTS curated.mv_pnp_dashboard_matriculas CASCADE;
2
+ CREATE MATERIALIZED VIEW curated.mv_pnp_dashboard_matriculas AS
3
+ SELECT
4
+ run_id,
5
+ instance_key,
6
+ ano,
7
+ instituicao,
8
+ regiao,
9
+ uf,
10
+ municipio,
11
+ sexo,
12
+ cor_raca,
13
+ renda_familiar,
14
+ faixa_etaria,
15
+ situacao_matricula,
16
+ modalidade_ensino,
17
+ tipo_curso,
18
+ tipo_oferta,
19
+ turno,
20
+ nome_curso,
21
+ COUNT(*) AS matriculas,
22
+ SUM(vagas_ofertadas) AS vagas_ofertadas,
23
+ SUM(total_inscritos) AS inscritos
24
+ FROM staging.pnp_matriculas
25
+ GROUP BY
26
+ run_id,
27
+ instance_key,
28
+ ano,
29
+ instituicao,
30
+ regiao,
31
+ uf,
32
+ municipio,
33
+ sexo,
34
+ cor_raca,
35
+ renda_familiar,
36
+ faixa_etaria,
37
+ situacao_matricula,
38
+ modalidade_ensino,
39
+ tipo_curso,
40
+ tipo_oferta,
41
+ turno,
42
+ nome_curso;
43
+
44
+ CREATE INDEX idx_mv_pnp_dashboard_matriculas_geo
45
+ ON curated.mv_pnp_dashboard_matriculas (run_id, ano, instituicao, regiao, uf, municipio);
46
+ CREATE INDEX idx_mv_pnp_dashboard_matriculas_perfil
47
+ ON curated.mv_pnp_dashboard_matriculas (sexo, cor_raca, renda_familiar, faixa_etaria, situacao_matricula);
48
+ CREATE INDEX idx_mv_pnp_dashboard_matriculas_oferta
49
+ ON curated.mv_pnp_dashboard_matriculas (modalidade_ensino, tipo_curso, tipo_oferta, turno);
50
+ CREATE INDEX idx_mv_pnp_dashboard_matriculas_curso
51
+ ON curated.mv_pnp_dashboard_matriculas (nome_curso);
52
+
53
+
54
+ DROP MATERIALIZED VIEW IF EXISTS curated.mv_pnp_dashboard_eficiencia CASCADE;
55
+ CREATE MATERIALIZED VIEW curated.mv_pnp_dashboard_eficiencia AS
56
+ SELECT
57
+ run_id,
58
+ instance_key,
59
+ ano,
60
+ instituicao,
61
+ regiao,
62
+ uf,
63
+ municipio,
64
+ sexo,
65
+ cor_raca,
66
+ renda_familiar,
67
+ faixa_etaria,
68
+ categoria_situacao,
69
+ situacao_matricula,
70
+ matricula_atendida,
71
+ SUM(registros) AS registros
72
+ FROM curated.vw_pnp_eficiencia_situacao
73
+ GROUP BY
74
+ run_id,
75
+ instance_key,
76
+ ano,
77
+ instituicao,
78
+ regiao,
79
+ uf,
80
+ municipio,
81
+ sexo,
82
+ cor_raca,
83
+ renda_familiar,
84
+ faixa_etaria,
85
+ categoria_situacao,
86
+ situacao_matricula,
87
+ matricula_atendida;
88
+
89
+ CREATE INDEX idx_mv_pnp_dashboard_eficiencia_geo
90
+ ON curated.mv_pnp_dashboard_eficiencia (run_id, ano, instituicao, regiao, uf, municipio);
91
+ CREATE INDEX idx_mv_pnp_dashboard_eficiencia_perfil
92
+ ON curated.mv_pnp_dashboard_eficiencia (sexo, cor_raca, renda_familiar, faixa_etaria);
93
+ CREATE INDEX idx_mv_pnp_dashboard_eficiencia_situacao
94
+ ON curated.mv_pnp_dashboard_eficiencia (categoria_situacao, situacao_matricula, matricula_atendida);
95
+
96
+
97
+ DROP MATERIALIZED VIEW IF EXISTS curated.mv_pnp_dashboard_servidores CASCADE;
98
+ CREATE MATERIALIZED VIEW curated.mv_pnp_dashboard_servidores AS
99
+ SELECT
100
+ run_id,
101
+ instance_key,
102
+ ano,
103
+ instituicao,
104
+ regiao,
105
+ classe,
106
+ jornada_trabalho,
107
+ titulacao,
108
+ vinculo_carreira,
109
+ vinculo_contrato,
110
+ vinculo_professor,
111
+ SUM(servidores) AS servidores,
112
+ SUM(total_registros) AS total_registros
113
+ FROM curated.vw_pnp_servidores_quadro
114
+ GROUP BY
115
+ run_id,
116
+ instance_key,
117
+ ano,
118
+ instituicao,
119
+ regiao,
120
+ classe,
121
+ jornada_trabalho,
122
+ titulacao,
123
+ vinculo_carreira,
124
+ vinculo_contrato,
125
+ vinculo_professor;
126
+
127
+ CREATE INDEX idx_mv_pnp_dashboard_servidores_geo
128
+ ON curated.mv_pnp_dashboard_servidores (run_id, ano, instituicao, regiao);
129
+ CREATE INDEX idx_mv_pnp_dashboard_servidores_dim
130
+ ON curated.mv_pnp_dashboard_servidores (classe, jornada_trabalho, titulacao, vinculo_carreira, vinculo_contrato, vinculo_professor);
131
+
132
+
133
+ DROP MATERIALIZED VIEW IF EXISTS curated.mv_pnp_dashboard_financeiro CASCADE;
134
+ CREATE MATERIALIZED VIEW curated.mv_pnp_dashboard_financeiro AS
135
+ SELECT
136
+ run_id,
137
+ instance_key,
138
+ ano,
139
+ nome_uo,
140
+ uo,
141
+ cod_acao,
142
+ nome_acao,
143
+ grupo_despesa,
144
+ SUM(registros) AS registros,
145
+ SUM(liquidacoes_totais) AS liquidacoes_totais
146
+ FROM curated.vw_pnp_financeiro_execucao
147
+ GROUP BY run_id, instance_key, ano, nome_uo, uo, cod_acao, nome_acao, grupo_despesa;
148
+
149
+ CREATE INDEX idx_mv_pnp_dashboard_financeiro_dim
150
+ ON curated.mv_pnp_dashboard_financeiro (run_id, ano, nome_uo, grupo_despesa, cod_acao, nome_acao);
151
+
152
+
153
+ DROP MATERIALIZED VIEW IF EXISTS curated.mv_pnp_dashboard_qualidade CASCADE;
154
+ CREATE MATERIALIZED VIEW curated.mv_pnp_dashboard_qualidade AS
155
+ SELECT
156
+ run_id,
157
+ instance_key,
158
+ tipo_microdados,
159
+ registros,
160
+ registros_sem_instituicao,
161
+ registros_sem_uf,
162
+ registros_sem_sexo,
163
+ registros_sem_cor_raca,
164
+ registros_sem_renda_familiar,
165
+ registros_sem_faixa_etaria,
166
+ registros_financeiros_sem_valor,
167
+ registros_servidores_sem_quantidade,
168
+ pct_sem_instituicao,
169
+ pct_sem_uf
170
+ FROM curated.vw_pnp_qualidade_dados;
171
+
172
+
173
+ DROP MATERIALIZED VIEW IF EXISTS curated.mv_pnp_dashboard_ingestao CASCADE;
174
+ CREATE MATERIALIZED VIEW curated.mv_pnp_dashboard_ingestao AS
175
+ SELECT
176
+ run_id,
177
+ instance_key,
178
+ connection_key,
179
+ connection_name,
180
+ airflow_dag_id,
181
+ airflow_dag_run_id,
182
+ trigger_mode,
183
+ requested_by,
184
+ logical_date,
185
+ status,
186
+ catalog_entry_count,
187
+ selected_download_count,
188
+ downloaded_file_count,
189
+ raw_record_count,
190
+ staging_record_count,
191
+ package_count,
192
+ quarantine_count,
193
+ quality_status,
194
+ started_at,
195
+ finished_at,
196
+ duration_seconds,
197
+ error_message
198
+ FROM curated.vw_pnp_admin_ingestao;
199
+
200
+ CREATE INDEX idx_mv_pnp_dashboard_ingestao_run
201
+ ON curated.mv_pnp_dashboard_ingestao (run_id, status);
202
+
203
+
204
+ GRANT SELECT ON ALL TABLES IN SCHEMA curated TO metabase_user, vanna_user;
@@ -0,0 +1,51 @@
1
+ CREATE OR REPLACE VIEW curated.vw_pnp_admin_ingestao AS
2
+ WITH package_counts AS (
3
+ SELECT
4
+ run_id,
5
+ COUNT(*) AS package_count
6
+ FROM raw.pnp_run_packages
7
+ GROUP BY run_id
8
+ ),
9
+ quarantine_counts AS (
10
+ SELECT
11
+ run_id,
12
+ COUNT(*) AS quarantine_count
13
+ FROM raw.pnp_ingestion_quarantine
14
+ GROUP BY run_id
15
+ )
16
+ SELECT
17
+ runs.run_id,
18
+ runs.instance_key,
19
+ instances.connection_key,
20
+ instances.connection_name,
21
+ runs.airflow_dag_id,
22
+ runs.airflow_dag_run_id,
23
+ runs.trigger_mode,
24
+ runs.requested_by,
25
+ runs.logical_date,
26
+ runs.status,
27
+ runs.catalog_entry_count,
28
+ runs.selected_download_count,
29
+ runs.downloaded_file_count,
30
+ runs.raw_record_count,
31
+ COALESCE(staging.deduplicated_record_count, 0) AS staging_record_count,
32
+ COALESCE(package_counts.package_count, 0) AS package_count,
33
+ COALESCE(quarantine_counts.quarantine_count, 0) AS quarantine_count,
34
+ staging.quality_status,
35
+ runs.started_at,
36
+ runs.finished_at,
37
+ CASE
38
+ WHEN runs.finished_at IS NULL THEN NULL
39
+ ELSE EXTRACT(EPOCH FROM (runs.finished_at - runs.started_at))::BIGINT
40
+ END AS duration_seconds,
41
+ runs.error_message,
42
+ runs.run_summary_json
43
+ FROM raw.pnp_runs runs
44
+ LEFT JOIN raw.pnp_instances instances
45
+ ON instances.instance_key = runs.instance_key
46
+ LEFT JOIN staging.pnp_ingestion_runs staging
47
+ ON staging.run_id = runs.run_id
48
+ LEFT JOIN package_counts
49
+ ON package_counts.run_id = runs.run_id
50
+ LEFT JOIN quarantine_counts
51
+ ON quarantine_counts.run_id = runs.run_id;
@@ -0,0 +1,114 @@
1
+ CREATE OR REPLACE VIEW curated.vw_pnp_qualidade_dados AS
2
+ WITH quality_rows AS (
3
+ SELECT
4
+ run_id,
5
+ instance_key,
6
+ 'Matrículas'::TEXT AS tipo_microdados,
7
+ instituicao,
8
+ uf,
9
+ sexo,
10
+ cor_raca,
11
+ renda_familiar,
12
+ faixa_etaria,
13
+ NULL::NUMERIC AS liquidacoes_totais,
14
+ NULL::NUMERIC AS numero_registros
15
+ FROM staging.pnp_matriculas
16
+ UNION ALL
17
+ SELECT
18
+ run_id,
19
+ instance_key,
20
+ 'Eficiência Acadêmica'::TEXT AS tipo_microdados,
21
+ instituicao,
22
+ uf,
23
+ sexo,
24
+ cor_raca,
25
+ renda_familiar,
26
+ faixa_etaria,
27
+ NULL::NUMERIC AS liquidacoes_totais,
28
+ NULL::NUMERIC AS numero_registros
29
+ FROM staging.pnp_eficiencia_academica
30
+ UNION ALL
31
+ SELECT
32
+ run_id,
33
+ instance_key,
34
+ 'Servidores'::TEXT AS tipo_microdados,
35
+ instituicao,
36
+ NULL::TEXT AS uf,
37
+ NULL::TEXT AS sexo,
38
+ NULL::TEXT AS cor_raca,
39
+ NULL::TEXT AS renda_familiar,
40
+ NULL::TEXT AS faixa_etaria,
41
+ NULL::NUMERIC AS liquidacoes_totais,
42
+ numero_registros
43
+ FROM staging.pnp_servidores
44
+ UNION ALL
45
+ SELECT
46
+ run_id,
47
+ instance_key,
48
+ 'Financeiro'::TEXT AS tipo_microdados,
49
+ NULL::TEXT AS instituicao,
50
+ NULL::TEXT AS uf,
51
+ NULL::TEXT AS sexo,
52
+ NULL::TEXT AS cor_raca,
53
+ NULL::TEXT AS renda_familiar,
54
+ NULL::TEXT AS faixa_etaria,
55
+ liquidacoes_totais,
56
+ NULL::NUMERIC AS numero_registros
57
+ FROM staging.pnp_financeiro
58
+ )
59
+ SELECT
60
+ quality_rows.run_id,
61
+ quality_rows.instance_key,
62
+ quality_rows.tipo_microdados,
63
+ COUNT(*) AS registros,
64
+ COUNT(*) FILTER (
65
+ WHERE quality_rows.tipo_microdados <> 'Financeiro'
66
+ AND quality_rows.instituicao IS NULL
67
+ ) AS registros_sem_instituicao,
68
+ COUNT(*) FILTER (
69
+ WHERE quality_rows.tipo_microdados IN ('Matrículas', 'Eficiência Acadêmica')
70
+ AND quality_rows.uf IS NULL
71
+ ) AS registros_sem_uf,
72
+ COUNT(*) FILTER (
73
+ WHERE quality_rows.tipo_microdados IN ('Matrículas', 'Eficiência Acadêmica')
74
+ AND quality_rows.sexo IS NULL
75
+ ) AS registros_sem_sexo,
76
+ COUNT(*) FILTER (
77
+ WHERE quality_rows.tipo_microdados IN ('Matrículas', 'Eficiência Acadêmica')
78
+ AND quality_rows.cor_raca IS NULL
79
+ ) AS registros_sem_cor_raca,
80
+ COUNT(*) FILTER (
81
+ WHERE quality_rows.tipo_microdados IN ('Matrículas', 'Eficiência Acadêmica')
82
+ AND quality_rows.renda_familiar IS NULL
83
+ ) AS registros_sem_renda_familiar,
84
+ COUNT(*) FILTER (
85
+ WHERE quality_rows.tipo_microdados IN ('Matrículas', 'Eficiência Acadêmica')
86
+ AND quality_rows.faixa_etaria IS NULL
87
+ ) AS registros_sem_faixa_etaria,
88
+ COUNT(*) FILTER (
89
+ WHERE quality_rows.tipo_microdados = 'Financeiro'
90
+ AND quality_rows.liquidacoes_totais IS NULL
91
+ ) AS registros_financeiros_sem_valor,
92
+ COUNT(*) FILTER (
93
+ WHERE quality_rows.tipo_microdados = 'Servidores'
94
+ AND quality_rows.numero_registros IS NULL
95
+ ) AS registros_servidores_sem_quantidade,
96
+ ROUND(
97
+ 100.0 * COUNT(*) FILTER (
98
+ WHERE quality_rows.tipo_microdados <> 'Financeiro'
99
+ AND quality_rows.instituicao IS NULL
100
+ ) / NULLIF(COUNT(*), 0),
101
+ 2
102
+ ) AS pct_sem_instituicao,
103
+ ROUND(
104
+ 100.0 * COUNT(*) FILTER (
105
+ WHERE quality_rows.tipo_microdados IN ('Matrículas', 'Eficiência Acadêmica')
106
+ AND quality_rows.uf IS NULL
107
+ ) / NULLIF(COUNT(*), 0),
108
+ 2
109
+ ) AS pct_sem_uf
110
+ FROM quality_rows
111
+ GROUP BY
112
+ quality_rows.run_id,
113
+ quality_rows.instance_key,
114
+ quality_rows.tipo_microdados;
@@ -0,0 +1,67 @@
1
+ CREATE OR REPLACE VIEW curated.vw_pnp_matriculas_perfil AS
2
+ SELECT
3
+ run_id,
4
+ instance_key,
5
+ ano,
6
+ instituicao,
7
+ regiao,
8
+ uf,
9
+ municipio,
10
+ sexo,
11
+ cor_raca,
12
+ renda_familiar,
13
+ faixa_etaria,
14
+ situacao_matricula,
15
+ COUNT(*) AS matriculas,
16
+ SUM(vagas_ofertadas) AS vagas_ofertadas,
17
+ SUM(total_inscritos) AS inscritos
18
+ FROM staging.pnp_matriculas
19
+ GROUP BY
20
+ run_id,
21
+ instance_key,
22
+ ano,
23
+ instituicao,
24
+ regiao,
25
+ uf,
26
+ municipio,
27
+ sexo,
28
+ cor_raca,
29
+ renda_familiar,
30
+ faixa_etaria,
31
+ situacao_matricula;
32
+
33
+ CREATE OR REPLACE VIEW curated.vw_pnp_matriculas_oferta AS
34
+ SELECT
35
+ run_id,
36
+ instance_key,
37
+ ano,
38
+ instituicao,
39
+ regiao,
40
+ uf,
41
+ municipio,
42
+ modalidade_ensino,
43
+ tipo_curso,
44
+ tipo_oferta,
45
+ turno,
46
+ eixo_tecnologico,
47
+ subeixo_tecnologico,
48
+ nome_curso,
49
+ COUNT(*) AS matriculas,
50
+ SUM(vagas_ofertadas) AS vagas_ofertadas,
51
+ SUM(total_inscritos) AS inscritos
52
+ FROM staging.pnp_matriculas
53
+ GROUP BY
54
+ run_id,
55
+ instance_key,
56
+ ano,
57
+ instituicao,
58
+ regiao,
59
+ uf,
60
+ municipio,
61
+ modalidade_ensino,
62
+ tipo_curso,
63
+ tipo_oferta,
64
+ turno,
65
+ eixo_tecnologico,
66
+ subeixo_tecnologico,
67
+ nome_curso;