@dataif/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +16 -0
  2. package/bin/dataif.js +623 -0
  3. package/package.json +26 -0
  4. package/scripts/build-template.mjs +72 -0
  5. package/templates/dataif/README.md +157 -0
  6. package/templates/dataif/infra/.env.example +119 -0
  7. package/templates/dataif/infra/.env.stg.example +119 -0
  8. package/templates/dataif/infra/airflow/Dockerfile +11 -0
  9. package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
  10. package/templates/dataif/infra/airflow/requirements.txt +3 -0
  11. package/templates/dataif/infra/docker-compose.yml +306 -0
  12. package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
  13. package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
  14. package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
  15. package/templates/dataif/infra/keycloak/Dockerfile +4 -0
  16. package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
  17. package/templates/dataif/infra/ollama/Dockerfile +9 -0
  18. package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
  19. package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
  20. package/templates/dataif/infra/postgres/Dockerfile +4 -0
  21. package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
  22. package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
  23. package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
  24. package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
  25. package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
  26. package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
  27. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
  28. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
  29. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
  30. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
  31. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
  32. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
  33. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
  34. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
  35. package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
  36. package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
  37. package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
  38. package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
  39. package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
  40. package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
  41. package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
  42. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
  43. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
  44. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
  45. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
  46. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
  47. package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
  48. package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
  49. package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  50. package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
  51. package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
  52. package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  53. package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  54. package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  55. package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  56. package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  57. package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  58. package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  59. package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
  60. package/templates/dataif/scripts/configure-env.sh +149 -0
  61. package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
  62. package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
  63. package/templates/dataif/scripts/deploy.sh +79 -0
  64. package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
  65. package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
  66. package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
  67. package/templates/dataif/scripts/publish-images.sh +31 -0
  68. package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
  69. package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
  70. package/templates/dataif/services/api/.dockerignore +18 -0
  71. package/templates/dataif/services/api/Dockerfile +12 -0
  72. package/templates/dataif/services/api/app/__init__.py +1 -0
  73. package/templates/dataif/services/api/app/auth.py +48 -0
  74. package/templates/dataif/services/api/app/config.py +59 -0
  75. package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
  76. package/templates/dataif/services/api/app/main.py +2432 -0
  77. package/templates/dataif/services/api/app/metabase_admin.py +191 -0
  78. package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
  79. package/templates/dataif/services/api/app/metabase_embed.py +15 -0
  80. package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
  81. package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
  82. package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
  83. package/templates/dataif/services/api/app/vanna_client.py +32 -0
  84. package/templates/dataif/services/api/requirements.txt +9 -0
  85. package/templates/dataif/services/vanna/.dockerignore +18 -0
  86. package/templates/dataif/services/vanna/Dockerfile +12 -0
  87. package/templates/dataif/services/vanna/app/config.py +57 -0
  88. package/templates/dataif/services/vanna/app/main.py +108 -0
  89. package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
  90. package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
  91. package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
  92. package/templates/dataif/services/vanna/requirements.txt +8 -0
  93. package/templates/dataif/services/web/.dockerignore +13 -0
  94. package/templates/dataif/services/web/Dockerfile +16 -0
  95. package/templates/dataif/services/web/index.html +12 -0
  96. package/templates/dataif/services/web/nginx.conf +74 -0
  97. package/templates/dataif/services/web/package-lock.json +4397 -0
  98. package/templates/dataif/services/web/package.json +32 -0
  99. package/templates/dataif/services/web/postcss.config.mjs +5 -0
  100. package/templates/dataif/services/web/src/App.jsx +2817 -0
  101. package/templates/dataif/services/web/src/adminAuth.js +245 -0
  102. package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
  103. package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
  104. package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
  105. package/templates/dataif/services/web/src/assets/if.svg +0 -0
  106. package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
  107. package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
  108. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
  109. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
  110. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
  111. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
  112. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
  113. package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
  114. package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
  115. package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
  116. package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
  117. package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
  118. package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
  119. package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
  120. package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
  121. package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
  122. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
  123. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
  124. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
  125. package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
  126. package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
  127. package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
  128. package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
  129. package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
  130. package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
  131. package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
  132. package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
  133. package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
  134. package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
  135. package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
  136. package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
  137. package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
  138. package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
  139. package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
  140. package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
  141. package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
  142. package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
  143. package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
  144. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
  145. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
  146. package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
  147. package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
  148. package/templates/dataif/services/web/src/main.jsx +14 -0
  149. package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
  150. package/templates/dataif/services/web/src/styles/globals.css +60 -0
  151. package/templates/dataif/services/web/src/styles/theme.css +1326 -0
  152. package/templates/dataif/services/web/src/styles/typography.css +430 -0
  153. package/templates/dataif/services/web/src/styles.css +1287 -0
  154. package/templates/dataif/services/web/src/utils/cx.ts +24 -0
  155. package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
  156. package/templates/dataif/services/web/vite.config.js +14 -0
  157. package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
  158. package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
  159. package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
  160. package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
  161. package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
  162. package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
  163. package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
  164. package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
  165. package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
  166. package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
  167. package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
  168. package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
  169. package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
  170. package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
  171. package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
  172. package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  173. package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
  174. package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
  175. package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
  176. package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  177. package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  178. package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  179. package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  180. package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  181. package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  182. package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  183. package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
@@ -0,0 +1,701 @@
1
+ -- Keep this file in sync with sql/ddl/003_pnp_raw_staging_curated.sql.
2
+
3
+ CREATE TABLE IF NOT EXISTS config.app_settings (
4
+ setting_key TEXT PRIMARY KEY,
5
+ setting_value JSONB NOT NULL,
6
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
7
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
8
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
9
+ );
10
+
11
+ CREATE TABLE IF NOT EXISTS raw.pnp_connections (
12
+ connection_key TEXT PRIMARY KEY,
13
+ connection_name TEXT NOT NULL,
14
+ page_url TEXT NOT NULL,
15
+ is_active BOOLEAN NOT NULL DEFAULT TRUE,
16
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
17
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
18
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
19
+ );
20
+
21
+ CREATE OR REPLACE FUNCTION raw.touch_updated_at()
22
+ RETURNS TRIGGER
23
+ LANGUAGE plpgsql
24
+ AS $$
25
+ BEGIN
26
+ NEW.updated_at = NOW();
27
+ RETURN NEW;
28
+ END;
29
+ $$;
30
+
31
+ DROP TRIGGER IF EXISTS trg_pnp_connections_updated_at ON raw.pnp_connections;
32
+ CREATE TRIGGER trg_pnp_connections_updated_at
33
+ BEFORE UPDATE ON raw.pnp_connections
34
+ FOR EACH ROW
35
+ EXECUTE FUNCTION raw.touch_updated_at();
36
+
37
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_connections_active
38
+ ON raw.pnp_connections (is_active, updated_at DESC);
39
+
40
+ CREATE TABLE IF NOT EXISTS raw.pnp_instances (
41
+ pipeline_id UUID NOT NULL DEFAULT gen_random_uuid(),
42
+ instance_key TEXT PRIMARY KEY,
43
+ instance_name TEXT NOT NULL,
44
+ connection_key TEXT,
45
+ connection_name TEXT,
46
+ page_url TEXT NOT NULL,
47
+ schedule TEXT,
48
+ is_active BOOLEAN NOT NULL DEFAULT TRUE,
49
+ legacy_mode TEXT,
50
+ legacy_endpoint_id BIGINT,
51
+ legacy_endpoint_key TEXT,
52
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
53
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
54
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
55
+ deleted_at TIMESTAMPTZ
56
+ );
57
+
58
+ DO $$
59
+ BEGIN
60
+ IF NOT EXISTS (
61
+ SELECT 1
62
+ FROM pg_constraint
63
+ WHERE conname = 'uq_raw_pnp_instances_pipeline_id'
64
+ AND conrelid = 'raw.pnp_instances'::regclass
65
+ ) THEN
66
+ ALTER TABLE raw.pnp_instances
67
+ ADD CONSTRAINT uq_raw_pnp_instances_pipeline_id UNIQUE (pipeline_id);
68
+ END IF;
69
+ END $$;
70
+
71
+ DROP TRIGGER IF EXISTS trg_pnp_instances_updated_at ON raw.pnp_instances;
72
+ CREATE TRIGGER trg_pnp_instances_updated_at
73
+ BEFORE UPDATE ON raw.pnp_instances
74
+ FOR EACH ROW
75
+ EXECUTE FUNCTION raw.touch_updated_at();
76
+
77
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_instances_active
78
+ ON raw.pnp_instances (is_active, updated_at DESC);
79
+
80
+ CREATE TABLE IF NOT EXISTS raw.pnp_instance_selection (
81
+ selection_id BIGSERIAL PRIMARY KEY,
82
+ pipeline_id UUID,
83
+ instance_key TEXT NOT NULL REFERENCES raw.pnp_instances(instance_key) ON DELETE CASCADE,
84
+ ano_base TEXT NOT NULL,
85
+ tipo_microdados TEXT NOT NULL,
86
+ configured_microdados_url TEXT,
87
+ is_active BOOLEAN NOT NULL DEFAULT TRUE,
88
+ selection_rank INTEGER,
89
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
90
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
91
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
92
+ CONSTRAINT uq_raw_pnp_instance_selection UNIQUE (instance_key, ano_base, tipo_microdados)
93
+ );
94
+
95
+ DROP TRIGGER IF EXISTS trg_pnp_instance_selection_updated_at ON raw.pnp_instance_selection;
96
+ CREATE TRIGGER trg_pnp_instance_selection_updated_at
97
+ BEFORE UPDATE ON raw.pnp_instance_selection
98
+ FOR EACH ROW
99
+ EXECUTE FUNCTION raw.touch_updated_at();
100
+
101
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_instance_selection_instance
102
+ ON raw.pnp_instance_selection (instance_key, is_active, ano_base, tipo_microdados);
103
+
104
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_instance_selection_pipeline
105
+ ON raw.pnp_instance_selection (pipeline_id, is_active, ano_base, tipo_microdados);
106
+
107
+ CREATE TABLE IF NOT EXISTS raw.pnp_endpoint_tables (
108
+ endpoint_key TEXT PRIMARY KEY,
109
+ endpoint_name TEXT NOT NULL,
110
+ tipo_microdados TEXT NOT NULL UNIQUE,
111
+ raw_table_schema TEXT NOT NULL DEFAULT 'raw',
112
+ raw_table_name TEXT NOT NULL,
113
+ staging_table_schema TEXT NOT NULL DEFAULT 'staging',
114
+ staging_table_name TEXT,
115
+ curated_relation_schema TEXT NOT NULL DEFAULT 'curated',
116
+ curated_relation_name TEXT,
117
+ is_active BOOLEAN NOT NULL DEFAULT TRUE,
118
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
119
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
120
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
121
+ CONSTRAINT uq_raw_pnp_endpoint_tables_raw_table UNIQUE (raw_table_schema, raw_table_name)
122
+ );
123
+
124
+ DROP TRIGGER IF EXISTS trg_pnp_endpoint_tables_updated_at ON raw.pnp_endpoint_tables;
125
+ CREATE TRIGGER trg_pnp_endpoint_tables_updated_at
126
+ BEFORE UPDATE ON raw.pnp_endpoint_tables
127
+ FOR EACH ROW
128
+ EXECUTE FUNCTION raw.touch_updated_at();
129
+
130
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_endpoint_tables_active
131
+ ON raw.pnp_endpoint_tables (is_active, endpoint_name);
132
+
133
+ CREATE TABLE IF NOT EXISTS raw.pnp_pipeline_endpoints (
134
+ pipeline_endpoint_id BIGSERIAL PRIMARY KEY,
135
+ pipeline_id UUID,
136
+ instance_key TEXT NOT NULL REFERENCES raw.pnp_instances(instance_key) ON DELETE CASCADE,
137
+ connection_key TEXT,
138
+ endpoint_key TEXT NOT NULL REFERENCES raw.pnp_endpoint_tables(endpoint_key),
139
+ selection_source TEXT,
140
+ is_active BOOLEAN NOT NULL DEFAULT TRUE,
141
+ metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
142
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
143
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
144
+ CONSTRAINT uq_raw_pnp_pipeline_endpoints UNIQUE (instance_key, endpoint_key)
145
+ );
146
+
147
+ DROP TRIGGER IF EXISTS trg_pnp_pipeline_endpoints_updated_at ON raw.pnp_pipeline_endpoints;
148
+ CREATE TRIGGER trg_pnp_pipeline_endpoints_updated_at
149
+ BEFORE UPDATE ON raw.pnp_pipeline_endpoints
150
+ FOR EACH ROW
151
+ EXECUTE FUNCTION raw.touch_updated_at();
152
+
153
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_pipeline_endpoints_instance
154
+ ON raw.pnp_pipeline_endpoints (instance_key, is_active, endpoint_key);
155
+
156
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_pipeline_endpoints_pipeline
157
+ ON raw.pnp_pipeline_endpoints (pipeline_id, is_active, endpoint_key);
158
+
159
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_pipeline_endpoints_connection
160
+ ON raw.pnp_pipeline_endpoints (connection_key, is_active, instance_key);
161
+
162
+ CREATE TABLE IF NOT EXISTS raw.pnp_runs (
163
+ run_id TEXT PRIMARY KEY,
164
+ instance_key TEXT,
165
+ airflow_dag_id TEXT,
166
+ airflow_dag_run_id TEXT,
167
+ logical_date TIMESTAMPTZ,
168
+ trigger_mode TEXT,
169
+ requested_by TEXT,
170
+ status TEXT NOT NULL,
171
+ legacy_status TEXT,
172
+ catalog_entry_count INTEGER NOT NULL DEFAULT 0,
173
+ selected_download_count INTEGER NOT NULL DEFAULT 0,
174
+ downloaded_file_count INTEGER NOT NULL DEFAULT 0,
175
+ raw_record_count BIGINT NOT NULL DEFAULT 0,
176
+ error_message TEXT,
177
+ run_summary_json JSONB NOT NULL DEFAULT '{}'::jsonb,
178
+ started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
179
+ finished_at TIMESTAMPTZ
180
+ );
181
+
182
+ CREATE UNIQUE INDEX IF NOT EXISTS uq_raw_pnp_runs_airflow
183
+ ON raw.pnp_runs (airflow_dag_id, airflow_dag_run_id)
184
+ WHERE airflow_dag_id IS NOT NULL AND airflow_dag_run_id IS NOT NULL;
185
+
186
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_runs_instance_started
187
+ ON raw.pnp_runs (instance_key, started_at DESC);
188
+
189
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_runs_status
190
+ ON raw.pnp_runs (status, started_at DESC);
191
+
192
+ CREATE TABLE IF NOT EXISTS raw.pnp_run_steps (
193
+ step_id BIGSERIAL PRIMARY KEY,
194
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
195
+ instance_key TEXT,
196
+ airflow_task_id TEXT NOT NULL,
197
+ map_index INTEGER,
198
+ map_index_key INTEGER GENERATED ALWAYS AS (COALESCE(map_index, -1)) STORED,
199
+ status TEXT NOT NULL,
200
+ started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
201
+ finished_at TIMESTAMPTZ,
202
+ records_affected BIGINT,
203
+ error_message TEXT,
204
+ details_json JSONB NOT NULL DEFAULT '{}'::jsonb,
205
+ CONSTRAINT uq_raw_pnp_run_steps_task_map UNIQUE (run_id, airflow_task_id, map_index_key)
206
+ );
207
+
208
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_run_steps_run
209
+ ON raw.pnp_run_steps (run_id, started_at DESC);
210
+
211
+ CREATE TABLE IF NOT EXISTS raw.pnp_run_packages (
212
+ package_id BIGSERIAL PRIMARY KEY,
213
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
214
+ instance_key TEXT,
215
+ airflow_dag_id TEXT,
216
+ airflow_dag_run_id TEXT,
217
+ airflow_task_id TEXT NOT NULL,
218
+ package_type TEXT NOT NULL,
219
+ package_name TEXT NOT NULL,
220
+ package_status TEXT NOT NULL,
221
+ records_affected BIGINT,
222
+ payload_json JSONB NOT NULL DEFAULT '{}'::jsonb,
223
+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
224
+ );
225
+
226
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_run_packages_run
227
+ ON raw.pnp_run_packages (run_id, created_at DESC);
228
+
229
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_run_packages_task
230
+ ON raw.pnp_run_packages (airflow_task_id, created_at DESC);
231
+
232
+ CREATE TABLE IF NOT EXISTS raw.pnp_catalog_entries (
233
+ catalog_entry_id BIGSERIAL PRIMARY KEY,
234
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
235
+ instance_key TEXT,
236
+ ano_base TEXT NOT NULL,
237
+ tipo_microdados TEXT NOT NULL,
238
+ microdados_url TEXT NOT NULL,
239
+ resource_key TEXT,
240
+ visual_id TEXT,
241
+ api_base_url TEXT,
242
+ catalog_hash TEXT,
243
+ is_selected BOOLEAN NOT NULL DEFAULT FALSE,
244
+ captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
245
+ CONSTRAINT uq_raw_pnp_catalog_entries UNIQUE (run_id, ano_base, tipo_microdados, microdados_url)
246
+ );
247
+
248
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_catalog_entries_run
249
+ ON raw.pnp_catalog_entries (run_id, is_selected, ano_base, tipo_microdados);
250
+
251
+ CREATE TABLE IF NOT EXISTS raw.pnp_run_selection (
252
+ run_selection_id BIGSERIAL PRIMARY KEY,
253
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
254
+ instance_key TEXT,
255
+ ano_base TEXT NOT NULL,
256
+ tipo_microdados TEXT NOT NULL,
257
+ microdados_url TEXT NOT NULL,
258
+ selection_source TEXT,
259
+ selection_rank INTEGER,
260
+ details_json JSONB NOT NULL DEFAULT '{}'::jsonb,
261
+ selected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
262
+ CONSTRAINT uq_raw_pnp_run_selection UNIQUE (run_id, ano_base, tipo_microdados, microdados_url)
263
+ );
264
+
265
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_run_selection_run
266
+ ON raw.pnp_run_selection (run_id, ano_base, tipo_microdados);
267
+
268
+ CREATE TABLE IF NOT EXISTS raw.pnp_downloads (
269
+ download_id BIGSERIAL PRIMARY KEY,
270
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
271
+ instance_key TEXT,
272
+ run_selection_id BIGINT REFERENCES raw.pnp_run_selection(run_selection_id) ON DELETE SET NULL,
273
+ legacy_asset_id BIGINT,
274
+ ano_base TEXT NOT NULL,
275
+ tipo_microdados TEXT NOT NULL,
276
+ microdados_url TEXT NOT NULL,
277
+ source_file_name TEXT,
278
+ source_file_sha256 TEXT,
279
+ content_type TEXT,
280
+ size_bytes BIGINT,
281
+ row_count_raw BIGINT,
282
+ status TEXT NOT NULL DEFAULT 'pending',
283
+ started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
284
+ finished_at TIMESTAMPTZ,
285
+ error_message TEXT,
286
+ details_json JSONB NOT NULL DEFAULT '{}'::jsonb,
287
+ CONSTRAINT uq_raw_pnp_downloads UNIQUE (run_id, microdados_url)
288
+ );
289
+
290
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_downloads_run
291
+ ON raw.pnp_downloads (run_id, status, ano_base, tipo_microdados);
292
+
293
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_downloads_sha
294
+ ON raw.pnp_downloads (source_file_sha256);
295
+
296
+ CREATE TABLE IF NOT EXISTS raw.pnp_download_columns (
297
+ download_column_id BIGSERIAL PRIMARY KEY,
298
+ download_id BIGINT NOT NULL REFERENCES raw.pnp_downloads(download_id) ON DELETE CASCADE,
299
+ column_position INTEGER NOT NULL,
300
+ column_name TEXT NOT NULL,
301
+ normalized_column_name TEXT NOT NULL,
302
+ captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
303
+ CONSTRAINT uq_raw_pnp_download_columns_position UNIQUE (download_id, column_position)
304
+ );
305
+
306
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_download_columns_download
307
+ ON raw.pnp_download_columns (download_id, column_position);
308
+
309
+ CREATE TABLE IF NOT EXISTS raw.pnp_ingestion_quarantine (
310
+ quarantine_id BIGSERIAL PRIMARY KEY,
311
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
312
+ instance_key TEXT,
313
+ download_id BIGINT REFERENCES raw.pnp_downloads(download_id) ON DELETE SET NULL,
314
+ source_row_number INTEGER,
315
+ error_type TEXT NOT NULL,
316
+ error_message TEXT NOT NULL,
317
+ raw_line_text TEXT,
318
+ details_json JSONB NOT NULL DEFAULT '{}'::jsonb,
319
+ ingested_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
320
+ );
321
+
322
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_ingestion_quarantine_run
323
+ ON raw.pnp_ingestion_quarantine (run_id, download_id, source_row_number);
324
+
325
+ CREATE TABLE IF NOT EXISTS raw.pnp_matriculas_src (
326
+ raw_record_id BIGSERIAL PRIMARY KEY,
327
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
328
+ instance_key TEXT,
329
+ download_id BIGINT REFERENCES raw.pnp_downloads(download_id) ON DELETE SET NULL,
330
+ record_hash TEXT NOT NULL,
331
+ source_record_id TEXT,
332
+ source_row_number INTEGER,
333
+ source_file_name TEXT,
334
+ source_file_sha256 TEXT,
335
+ source_url TEXT NOT NULL,
336
+ ano_base TEXT,
337
+ tipo_microdados TEXT NOT NULL,
338
+ ano TEXT,
339
+ carga_horaria TEXT,
340
+ carga_horaria_minima TEXT,
341
+ categoria_da_situacao TEXT,
342
+ co_inst TEXT,
343
+ cod_unidade TEXT,
344
+ cor_raca TEXT,
345
+ codigo_da_matricula TEXT,
346
+ codigo_da_unidade_de_ensino_sistec TEXT,
347
+ codigo_do_ciclo_matricula TEXT,
348
+ codigo_do_municipio_com_dv TEXT,
349
+ data_de_fim_previsto_do_ciclo TEXT,
350
+ data_de_inicio_do_ciclo TEXT,
351
+ data_de_ocorrencia_da_matricula TEXT,
352
+ eixo_tecnologico TEXT,
353
+ faixa_etaria TEXT,
354
+ fator_esforco_curso TEXT,
355
+ fonte_de_financiamento TEXT,
356
+ forma_de_ingresso TEXT,
357
+ habilitacao TEXT,
358
+ idade TEXT,
359
+ instituicao TEXT,
360
+ matricula_atendida TEXT,
361
+ modalidade_de_ensino TEXT,
362
+ municipio TEXT,
363
+ mes_de_ocorrencia_da_situacao TEXT,
364
+ nome_de_curso TEXT,
365
+ regiao TEXT,
366
+ renda_familiar TEXT,
367
+ sexo TEXT,
368
+ situacao_de_matricula TEXT,
369
+ subeixo_tecnologico TEXT,
370
+ tipo_de_curso TEXT,
371
+ tipo_de_oferta TEXT,
372
+ total_de_inscritos TEXT,
373
+ turno TEXT,
374
+ uf TEXT,
375
+ unidade_de_ensino TEXT,
376
+ vagas_extraordinarias_ac TEXT,
377
+ vagas_extraordinarias_l1 TEXT,
378
+ vagas_extraordinarias_l10 TEXT,
379
+ vagas_extraordinarias_l13 TEXT,
380
+ vagas_extraordinarias_l14 TEXT,
381
+ vagas_extraordinarias_l2 TEXT,
382
+ vagas_extraordinarias_l5 TEXT,
383
+ vagas_extraordinarias_l6 TEXT,
384
+ vagas_extraordinarias_l9 TEXT,
385
+ vagas_extraordinarias_lb_ppi TEXT,
386
+ vagas_extraordinarias_lb_q TEXT,
387
+ vagas_extraordinarias_lb_pcd TEXT,
388
+ vagas_extraordinarias_lb_ep TEXT,
389
+ vagas_extraordinarias_li_ppi TEXT,
390
+ vagas_extraordinarias_li_q TEXT,
391
+ vagas_extraordinarias_li_pcd TEXT,
392
+ vagas_extraordinarias_li_ep TEXT,
393
+ vagas_ofertadas TEXT,
394
+ vagas_regulares_ac TEXT,
395
+ vagas_regulares_l1 TEXT,
396
+ vagas_regulares_l10 TEXT,
397
+ vagas_regulares_l13 TEXT,
398
+ vagas_regulares_l14 TEXT,
399
+ vagas_regulares_l2 TEXT,
400
+ vagas_regulares_l5 TEXT,
401
+ vagas_regulares_l6 TEXT,
402
+ vagas_regulares_l9 TEXT,
403
+ vagas_regulares_lb_ppi TEXT,
404
+ vagas_regulares_lb_q TEXT,
405
+ vagas_regulares_lb_pcd TEXT,
406
+ vagas_regulares_lb_ep TEXT,
407
+ vagas_regulares_li_ppi TEXT,
408
+ vagas_regulares_li_q TEXT,
409
+ vagas_regulares_li_pcd TEXT,
410
+ vagas_regulares_li_ep TEXT,
411
+ ingested_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
412
+ CONSTRAINT uq_raw_pnp_matriculas_src_run_download_row UNIQUE (run_id, download_id, source_row_number)
413
+ );
414
+
415
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_matriculas_src_run
416
+ ON raw.pnp_matriculas_src (run_id, download_id);
417
+
418
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_matriculas_src_hash
419
+ ON raw.pnp_matriculas_src (record_hash);
420
+
421
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_matriculas_src_instance_hash
422
+ ON raw.pnp_matriculas_src (instance_key, record_hash);
423
+
424
+ CREATE TABLE IF NOT EXISTS raw.pnp_eficiencia_academica_src (
425
+ raw_record_id BIGSERIAL PRIMARY KEY,
426
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
427
+ instance_key TEXT,
428
+ download_id BIGINT REFERENCES raw.pnp_downloads(download_id) ON DELETE SET NULL,
429
+ record_hash TEXT NOT NULL,
430
+ source_record_id TEXT,
431
+ source_row_number INTEGER,
432
+ source_file_name TEXT,
433
+ source_file_sha256 TEXT,
434
+ source_url TEXT NOT NULL,
435
+ ano_base TEXT,
436
+ tipo_microdados TEXT NOT NULL,
437
+ ano TEXT,
438
+ carga_horaria TEXT,
439
+ carga_horaria_minima TEXT,
440
+ categoria_da_situacao TEXT,
441
+ co_inst TEXT,
442
+ cod_unidade TEXT,
443
+ cor_raca TEXT,
444
+ codigo_da_matricula TEXT,
445
+ codigo_da_unidade_de_ensino_sistec TEXT,
446
+ codigo_do_ciclo_matricula TEXT,
447
+ codigo_do_municipio_com_dv TEXT,
448
+ data_de_fim_previsto_do_ciclo TEXT,
449
+ data_de_inicio_do_ciclo TEXT,
450
+ data_de_ocorrencia_da_matricula TEXT,
451
+ eixo_tecnologico TEXT,
452
+ faixa_etaria TEXT,
453
+ fator_esforco_curso TEXT,
454
+ fonte_de_financiamento TEXT,
455
+ forma_de_ingresso TEXT,
456
+ habilitacao TEXT,
457
+ idade TEXT,
458
+ instituicao TEXT,
459
+ matricula_atendida TEXT,
460
+ modalidade_de_ensino TEXT,
461
+ municipio TEXT,
462
+ mes_de_ocorrencia_da_situacao TEXT,
463
+ nome_de_curso TEXT,
464
+ regiao TEXT,
465
+ renda_familiar TEXT,
466
+ sexo TEXT,
467
+ situacao_de_matricula TEXT,
468
+ subeixo_tecnologico TEXT,
469
+ tipo_de_curso TEXT,
470
+ tipo_de_oferta TEXT,
471
+ total_de_inscritos TEXT,
472
+ turno TEXT,
473
+ uf TEXT,
474
+ unidade_de_ensino TEXT,
475
+ vagas_extraordinarias_ac TEXT,
476
+ vagas_extraordinarias_l1 TEXT,
477
+ vagas_extraordinarias_l10 TEXT,
478
+ vagas_extraordinarias_l13 TEXT,
479
+ vagas_extraordinarias_l14 TEXT,
480
+ vagas_extraordinarias_l2 TEXT,
481
+ vagas_extraordinarias_l5 TEXT,
482
+ vagas_extraordinarias_l6 TEXT,
483
+ vagas_extraordinarias_l9 TEXT,
484
+ vagas_extraordinarias_lb_ppi TEXT,
485
+ vagas_extraordinarias_lb_q TEXT,
486
+ vagas_extraordinarias_lb_pcd TEXT,
487
+ vagas_extraordinarias_lb_ep TEXT,
488
+ vagas_extraordinarias_li_ppi TEXT,
489
+ vagas_extraordinarias_li_q TEXT,
490
+ vagas_extraordinarias_li_pcd TEXT,
491
+ vagas_extraordinarias_li_ep TEXT,
492
+ vagas_ofertadas TEXT,
493
+ vagas_regulares_ac TEXT,
494
+ vagas_regulares_l1 TEXT,
495
+ vagas_regulares_l10 TEXT,
496
+ vagas_regulares_l13 TEXT,
497
+ vagas_regulares_l14 TEXT,
498
+ vagas_regulares_l2 TEXT,
499
+ vagas_regulares_l5 TEXT,
500
+ vagas_regulares_l6 TEXT,
501
+ vagas_regulares_l9 TEXT,
502
+ vagas_regulares_lb_ppi TEXT,
503
+ vagas_regulares_lb_q TEXT,
504
+ vagas_regulares_lb_pcd TEXT,
505
+ vagas_regulares_lb_ep TEXT,
506
+ vagas_regulares_li_ppi TEXT,
507
+ vagas_regulares_li_q TEXT,
508
+ vagas_regulares_li_pcd TEXT,
509
+ vagas_regulares_li_ep TEXT,
510
+ ingested_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
511
+ CONSTRAINT uq_raw_pnp_eficiencia_src_run_download_row UNIQUE (run_id, download_id, source_row_number)
512
+ );
513
+
514
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_eficiencia_src_run
515
+ ON raw.pnp_eficiencia_academica_src (run_id, download_id);
516
+
517
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_eficiencia_src_hash
518
+ ON raw.pnp_eficiencia_academica_src (record_hash);
519
+
520
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_eficiencia_src_instance_hash
521
+ ON raw.pnp_eficiencia_academica_src (instance_key, record_hash);
522
+
523
+ CREATE TABLE IF NOT EXISTS raw.pnp_financeiro_src (
524
+ raw_record_id BIGSERIAL PRIMARY KEY,
525
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
526
+ instance_key TEXT,
527
+ download_id BIGINT REFERENCES raw.pnp_downloads(download_id) ON DELETE SET NULL,
528
+ record_hash TEXT NOT NULL,
529
+ source_record_id TEXT,
530
+ source_row_number INTEGER,
531
+ source_file_name TEXT,
532
+ source_file_sha256 TEXT,
533
+ source_url TEXT NOT NULL,
534
+ ano_base TEXT,
535
+ tipo_microdados TEXT NOT NULL,
536
+ uo TEXT,
537
+ nome_uo TEXT,
538
+ cod_acao TEXT,
539
+ nome_acao TEXT,
540
+ grupo_despesa TEXT,
541
+ liquidacoes_totais TEXT,
542
+ ingested_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
543
+ CONSTRAINT uq_raw_pnp_financeiro_src_run_download_row UNIQUE (run_id, download_id, source_row_number)
544
+ );
545
+
546
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_financeiro_src_run
547
+ ON raw.pnp_financeiro_src (run_id, download_id);
548
+
549
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_financeiro_src_hash
550
+ ON raw.pnp_financeiro_src (record_hash);
551
+
552
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_financeiro_src_instance_hash
553
+ ON raw.pnp_financeiro_src (instance_key, record_hash);
554
+
555
+ CREATE TABLE IF NOT EXISTS raw.pnp_servidores_src (
556
+ raw_record_id BIGSERIAL PRIMARY KEY,
557
+ run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
558
+ instance_key TEXT,
559
+ download_id BIGINT REFERENCES raw.pnp_downloads(download_id) ON DELETE SET NULL,
560
+ record_hash TEXT NOT NULL,
561
+ source_record_id TEXT,
562
+ source_row_number INTEGER,
563
+ source_file_name TEXT,
564
+ source_file_sha256 TEXT,
565
+ source_url TEXT NOT NULL,
566
+ ano_base TEXT,
567
+ tipo_microdados TEXT NOT NULL,
568
+ classe TEXT,
569
+ cod_unidade TEXT,
570
+ codigo_da_unidade_de_ensino_sistec TEXT,
571
+ codigo_municipio_com_dv TEXT,
572
+ instituicao TEXT,
573
+ jornada_de_trabalho TEXT,
574
+ matricula TEXT,
575
+ municipio TEXT,
576
+ regiao TEXT,
577
+ rsc TEXT,
578
+ titulacao TEXT,
579
+ unidade_de_lotacao TEXT,
580
+ vinculo_carreira TEXT,
581
+ vinculo_contrato TEXT,
582
+ vinculo_professor TEXT,
583
+ numero_de_registros TEXT,
584
+ ingested_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
585
+ CONSTRAINT uq_raw_pnp_servidores_src_run_download_row UNIQUE (run_id, download_id, source_row_number)
586
+ );
587
+
588
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_servidores_src_run
589
+ ON raw.pnp_servidores_src (run_id, download_id);
590
+
591
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_servidores_src_hash
592
+ ON raw.pnp_servidores_src (record_hash);
593
+
594
+ CREATE INDEX IF NOT EXISTS idx_raw_pnp_servidores_src_instance_hash
595
+ ON raw.pnp_servidores_src (instance_key, record_hash);
596
+
597
+ CREATE TABLE IF NOT EXISTS staging.pnp_ingestion_runs (
598
+ run_id TEXT PRIMARY KEY REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
599
+ instance_key TEXT,
600
+ status TEXT NOT NULL,
601
+ selected_download_count INTEGER NOT NULL DEFAULT 0,
602
+ downloaded_file_count INTEGER NOT NULL DEFAULT 0,
603
+ raw_record_count BIGINT NOT NULL DEFAULT 0,
604
+ deduplicated_record_count BIGINT NOT NULL DEFAULT 0,
605
+ quality_status TEXT,
606
+ quality_summary_json JSONB NOT NULL DEFAULT '{}'::jsonb,
607
+ started_at TIMESTAMPTZ,
608
+ finished_at TIMESTAMPTZ,
609
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
610
+ );
611
+
612
+ CREATE TABLE IF NOT EXISTS staging.pnp_matriculas (
613
+ raw_record_id BIGINT PRIMARY KEY REFERENCES raw.pnp_matriculas_src(raw_record_id) ON DELETE CASCADE,
614
+ run_id TEXT NOT NULL,
615
+ instance_key TEXT,
616
+ ano INTEGER,
617
+ instituicao TEXT,
618
+ regiao TEXT,
619
+ uf TEXT,
620
+ municipio TEXT,
621
+ sexo TEXT,
622
+ cor_raca TEXT,
623
+ renda_familiar TEXT,
624
+ faixa_etaria TEXT,
625
+ situacao_matricula TEXT,
626
+ modalidade_ensino TEXT,
627
+ tipo_curso TEXT,
628
+ tipo_oferta TEXT,
629
+ turno TEXT,
630
+ eixo_tecnologico TEXT,
631
+ subeixo_tecnologico TEXT,
632
+ nome_curso TEXT,
633
+ total_inscritos NUMERIC,
634
+ vagas_ofertadas NUMERIC,
635
+ processed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
636
+ );
637
+
638
+ CREATE INDEX IF NOT EXISTS idx_staging_pnp_matriculas_ano_inst
639
+ ON staging.pnp_matriculas (ano, instituicao, uf, municipio);
640
+
641
+ CREATE TABLE IF NOT EXISTS staging.pnp_eficiencia_academica (
642
+ raw_record_id BIGINT PRIMARY KEY REFERENCES raw.pnp_eficiencia_academica_src(raw_record_id) ON DELETE CASCADE,
643
+ run_id TEXT NOT NULL,
644
+ instance_key TEXT,
645
+ ano INTEGER,
646
+ instituicao TEXT,
647
+ regiao TEXT,
648
+ uf TEXT,
649
+ municipio TEXT,
650
+ sexo TEXT,
651
+ cor_raca TEXT,
652
+ renda_familiar TEXT,
653
+ faixa_etaria TEXT,
654
+ categoria_situacao TEXT,
655
+ situacao_matricula TEXT,
656
+ matricula_atendida TEXT,
657
+ processed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
658
+ );
659
+
660
+ CREATE INDEX IF NOT EXISTS idx_staging_pnp_eficiencia_ano_inst
661
+ ON staging.pnp_eficiencia_academica (ano, instituicao, uf, municipio);
662
+
663
+ CREATE TABLE IF NOT EXISTS staging.pnp_servidores (
664
+ raw_record_id BIGINT PRIMARY KEY REFERENCES raw.pnp_servidores_src(raw_record_id) ON DELETE CASCADE,
665
+ run_id TEXT NOT NULL,
666
+ instance_key TEXT,
667
+ ano INTEGER,
668
+ instituicao TEXT,
669
+ regiao TEXT,
670
+ uf TEXT,
671
+ municipio TEXT,
672
+ classe TEXT,
673
+ jornada_trabalho TEXT,
674
+ titulacao TEXT,
675
+ rsc TEXT,
676
+ vinculo_carreira TEXT,
677
+ vinculo_contrato TEXT,
678
+ vinculo_professor TEXT,
679
+ numero_registros NUMERIC,
680
+ processed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
681
+ );
682
+
683
+ CREATE INDEX IF NOT EXISTS idx_staging_pnp_servidores_ano_inst
684
+ ON staging.pnp_servidores (ano, instituicao, regiao);
685
+
686
+ CREATE TABLE IF NOT EXISTS staging.pnp_financeiro (
687
+ raw_record_id BIGINT PRIMARY KEY REFERENCES raw.pnp_financeiro_src(raw_record_id) ON DELETE CASCADE,
688
+ run_id TEXT NOT NULL,
689
+ instance_key TEXT,
690
+ ano INTEGER,
691
+ nome_uo TEXT,
692
+ uo TEXT,
693
+ cod_acao TEXT,
694
+ nome_acao TEXT,
695
+ grupo_despesa TEXT,
696
+ liquidacoes_totais NUMERIC,
697
+ processed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
698
+ );
699
+
700
+ CREATE INDEX IF NOT EXISTS idx_staging_pnp_financeiro_ano_uo
701
+ ON staging.pnp_financeiro (ano, nome_uo, cod_acao, grupo_despesa);
@@ -0,0 +1,4 @@
1
+ FROM quay.io/keycloak/keycloak:26.2
2
+
3
+ COPY infra/keycloak/realm-dataif.json /opt/keycloak/data/import/realm-dataif.json
4
+