@dataif/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/bin/dataif.js +623 -0
- package/package.json +26 -0
- package/scripts/build-template.mjs +72 -0
- package/templates/dataif/README.md +157 -0
- package/templates/dataif/infra/.env.example +119 -0
- package/templates/dataif/infra/.env.stg.example +119 -0
- package/templates/dataif/infra/airflow/Dockerfile +11 -0
- package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
- package/templates/dataif/infra/airflow/requirements.txt +3 -0
- package/templates/dataif/infra/docker-compose.yml +306 -0
- package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
- package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
- package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
- package/templates/dataif/infra/keycloak/Dockerfile +4 -0
- package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
- package/templates/dataif/infra/ollama/Dockerfile +9 -0
- package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
- package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
- package/templates/dataif/infra/postgres/Dockerfile +4 -0
- package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
- package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
- package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
- package/templates/dataif/scripts/configure-env.sh +149 -0
- package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
- package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
- package/templates/dataif/scripts/deploy.sh +79 -0
- package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
- package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
- package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
- package/templates/dataif/scripts/publish-images.sh +31 -0
- package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
- package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
- package/templates/dataif/services/api/.dockerignore +18 -0
- package/templates/dataif/services/api/Dockerfile +12 -0
- package/templates/dataif/services/api/app/__init__.py +1 -0
- package/templates/dataif/services/api/app/auth.py +48 -0
- package/templates/dataif/services/api/app/config.py +59 -0
- package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
- package/templates/dataif/services/api/app/main.py +2432 -0
- package/templates/dataif/services/api/app/metabase_admin.py +191 -0
- package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
- package/templates/dataif/services/api/app/metabase_embed.py +15 -0
- package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
- package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
- package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
- package/templates/dataif/services/api/app/vanna_client.py +32 -0
- package/templates/dataif/services/api/requirements.txt +9 -0
- package/templates/dataif/services/vanna/.dockerignore +18 -0
- package/templates/dataif/services/vanna/Dockerfile +12 -0
- package/templates/dataif/services/vanna/app/config.py +57 -0
- package/templates/dataif/services/vanna/app/main.py +108 -0
- package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
- package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
- package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
- package/templates/dataif/services/vanna/requirements.txt +8 -0
- package/templates/dataif/services/web/.dockerignore +13 -0
- package/templates/dataif/services/web/Dockerfile +16 -0
- package/templates/dataif/services/web/index.html +12 -0
- package/templates/dataif/services/web/nginx.conf +74 -0
- package/templates/dataif/services/web/package-lock.json +4397 -0
- package/templates/dataif/services/web/package.json +32 -0
- package/templates/dataif/services/web/postcss.config.mjs +5 -0
- package/templates/dataif/services/web/src/App.jsx +2817 -0
- package/templates/dataif/services/web/src/adminAuth.js +245 -0
- package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
- package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
- package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
- package/templates/dataif/services/web/src/assets/if.svg +0 -0
- package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
- package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
- package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
- package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
- package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
- package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
- package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
- package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
- package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
- package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
- package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
- package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
- package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
- package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
- package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
- package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
- package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
- package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
- package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
- package/templates/dataif/services/web/src/main.jsx +14 -0
- package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
- package/templates/dataif/services/web/src/styles/globals.css +60 -0
- package/templates/dataif/services/web/src/styles/theme.css +1326 -0
- package/templates/dataif/services/web/src/styles/typography.css +430 -0
- package/templates/dataif/services/web/src/styles.css +1287 -0
- package/templates/dataif/services/web/src/utils/cx.ts +24 -0
- package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
- package/templates/dataif/services/web/vite.config.js +14 -0
- package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
- package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
- package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
- package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
- package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
- package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
- package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
- package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
- package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
- package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
- package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
- package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
- package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
- package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
WITH explicit_connections AS (
|
|
2
|
+
SELECT DISTINCT ON (connection_key)
|
|
3
|
+
COALESCE(NULLIF(request_params->>'connection_key', ''), REPLACE(endpoint_key, '__connection', '')) AS connection_key,
|
|
4
|
+
COALESCE(
|
|
5
|
+
NULLIF(request_params->>'connection_name', ''),
|
|
6
|
+
NULLIF(description, ''),
|
|
7
|
+
COALESCE(NULLIF(request_params->>'connection_key', ''), REPLACE(endpoint_key, '__connection', ''))
|
|
8
|
+
) AS connection_name,
|
|
9
|
+
COALESCE(NULLIF(page_url, ''), NULLIF(api_endpoint_url, ''), NULLIF(csv_url, '')) AS page_url,
|
|
10
|
+
is_active,
|
|
11
|
+
jsonb_build_object(
|
|
12
|
+
'legacy_source', 'config.connector_endpoints',
|
|
13
|
+
'legacy_endpoint_key', endpoint_key,
|
|
14
|
+
'legacy_description', description,
|
|
15
|
+
'legacy_request_params', request_params
|
|
16
|
+
) AS metadata,
|
|
17
|
+
created_at,
|
|
18
|
+
updated_at,
|
|
19
|
+
CASE
|
|
20
|
+
WHEN COALESCE(request_params->>'deleted', 'false') = 'true'
|
|
21
|
+
THEN COALESCE(NULLIF(request_params->>'deleted_at', '')::timestamptz, updated_at)
|
|
22
|
+
ELSE NULL
|
|
23
|
+
END AS deleted_at
|
|
24
|
+
FROM config.connector_endpoints
|
|
25
|
+
WHERE connector_id = 'nilo_pecanha'
|
|
26
|
+
AND request_params->>'mode' = 'powerbi_microdados'
|
|
27
|
+
AND COALESCE(request_params->>'entity_type', 'pipeline') = 'connection'
|
|
28
|
+
AND COALESCE(NULLIF(request_params->>'connection_key', ''), REPLACE(endpoint_key, '__connection', '')) IS NOT NULL
|
|
29
|
+
ORDER BY connection_key, updated_at DESC, id DESC
|
|
30
|
+
)
|
|
31
|
+
INSERT INTO raw.pnp_connections (
|
|
32
|
+
connection_key,
|
|
33
|
+
connection_name,
|
|
34
|
+
page_url,
|
|
35
|
+
is_active,
|
|
36
|
+
metadata,
|
|
37
|
+
created_at,
|
|
38
|
+
updated_at,
|
|
39
|
+
deleted_at
|
|
40
|
+
)
|
|
41
|
+
SELECT
|
|
42
|
+
connection_key,
|
|
43
|
+
connection_name,
|
|
44
|
+
page_url,
|
|
45
|
+
is_active,
|
|
46
|
+
metadata,
|
|
47
|
+
created_at,
|
|
48
|
+
updated_at,
|
|
49
|
+
deleted_at
|
|
50
|
+
FROM explicit_connections
|
|
51
|
+
WHERE connection_key IS NOT NULL
|
|
52
|
+
AND connection_name IS NOT NULL
|
|
53
|
+
AND page_url IS NOT NULL
|
|
54
|
+
ON CONFLICT (connection_key) DO UPDATE
|
|
55
|
+
SET
|
|
56
|
+
connection_name = EXCLUDED.connection_name,
|
|
57
|
+
page_url = EXCLUDED.page_url,
|
|
58
|
+
is_active = EXCLUDED.is_active,
|
|
59
|
+
metadata = EXCLUDED.metadata,
|
|
60
|
+
updated_at = EXCLUDED.updated_at,
|
|
61
|
+
deleted_at = EXCLUDED.deleted_at;
|
|
62
|
+
|
|
63
|
+
WITH instances_needing_connection AS (
|
|
64
|
+
SELECT DISTINCT
|
|
65
|
+
page_url
|
|
66
|
+
FROM raw.pnp_instances
|
|
67
|
+
WHERE page_url IS NOT NULL
|
|
68
|
+
AND (
|
|
69
|
+
COALESCE(connection_key, '') = ''
|
|
70
|
+
OR NOT EXISTS (
|
|
71
|
+
SELECT 1
|
|
72
|
+
FROM raw.pnp_connections c
|
|
73
|
+
WHERE c.connection_key = raw.pnp_instances.connection_key
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
),
|
|
77
|
+
fallback_connections AS (
|
|
78
|
+
SELECT
|
|
79
|
+
CASE
|
|
80
|
+
WHEN ROW_NUMBER() OVER (ORDER BY page_url) = 1 THEN 'pnp_conn_principal'
|
|
81
|
+
ELSE format('pnp_conn_principal_%s', lpad(ROW_NUMBER() OVER (ORDER BY page_url)::text, 2, '0'))
|
|
82
|
+
END AS connection_key,
|
|
83
|
+
CASE
|
|
84
|
+
WHEN ROW_NUMBER() OVER (ORDER BY page_url) = 1 THEN 'PNP Principal'
|
|
85
|
+
ELSE format('PNP Principal %s', ROW_NUMBER() OVER (ORDER BY page_url))
|
|
86
|
+
END AS connection_name,
|
|
87
|
+
page_url
|
|
88
|
+
FROM instances_needing_connection
|
|
89
|
+
)
|
|
90
|
+
INSERT INTO raw.pnp_connections (
|
|
91
|
+
connection_key,
|
|
92
|
+
connection_name,
|
|
93
|
+
page_url,
|
|
94
|
+
is_active,
|
|
95
|
+
metadata
|
|
96
|
+
)
|
|
97
|
+
SELECT
|
|
98
|
+
connection_key,
|
|
99
|
+
connection_name,
|
|
100
|
+
page_url,
|
|
101
|
+
TRUE,
|
|
102
|
+
jsonb_build_object(
|
|
103
|
+
'backfill_source', 'raw.pnp_instances.page_url',
|
|
104
|
+
'synthetic', TRUE
|
|
105
|
+
)
|
|
106
|
+
FROM fallback_connections
|
|
107
|
+
ON CONFLICT (connection_key) DO UPDATE
|
|
108
|
+
SET
|
|
109
|
+
connection_name = EXCLUDED.connection_name,
|
|
110
|
+
page_url = EXCLUDED.page_url,
|
|
111
|
+
metadata = raw.pnp_connections.metadata || EXCLUDED.metadata;
|
|
112
|
+
|
|
113
|
+
WITH resolved_connections AS (
|
|
114
|
+
SELECT
|
|
115
|
+
i.instance_key,
|
|
116
|
+
COALESCE(i.connection_key, fallback.connection_key) AS connection_key,
|
|
117
|
+
COALESCE(NULLIF(i.connection_name, ''), explicit.connection_name, fallback.connection_name) AS connection_name
|
|
118
|
+
FROM raw.pnp_instances i
|
|
119
|
+
LEFT JOIN raw.pnp_connections explicit
|
|
120
|
+
ON explicit.connection_key = i.connection_key
|
|
121
|
+
LEFT JOIN raw.pnp_connections fallback
|
|
122
|
+
ON fallback.page_url = i.page_url
|
|
123
|
+
AND COALESCE(i.connection_key, '') = ''
|
|
124
|
+
)
|
|
125
|
+
UPDATE raw.pnp_instances i
|
|
126
|
+
SET
|
|
127
|
+
connection_key = resolved.connection_key,
|
|
128
|
+
connection_name = resolved.connection_name,
|
|
129
|
+
updated_at = NOW()
|
|
130
|
+
FROM resolved_connections resolved
|
|
131
|
+
WHERE i.instance_key = resolved.instance_key
|
|
132
|
+
AND resolved.connection_key IS NOT NULL
|
|
133
|
+
AND (
|
|
134
|
+
COALESCE(i.connection_key, '') <> resolved.connection_key
|
|
135
|
+
OR COALESCE(i.connection_name, '') <> COALESCE(resolved.connection_name, '')
|
|
136
|
+
);
|
|
137
|
+
|
|
138
|
+
WITH connection_status AS (
|
|
139
|
+
SELECT
|
|
140
|
+
c.connection_key,
|
|
141
|
+
BOOL_OR(i.deleted_at IS NULL AND i.is_active = TRUE) AS has_active_instances,
|
|
142
|
+
MAX(i.deleted_at) AS latest_instance_deleted_at
|
|
143
|
+
FROM raw.pnp_connections c
|
|
144
|
+
LEFT JOIN raw.pnp_instances i
|
|
145
|
+
ON i.connection_key = c.connection_key
|
|
146
|
+
GROUP BY c.connection_key
|
|
147
|
+
)
|
|
148
|
+
UPDATE raw.pnp_connections c
|
|
149
|
+
SET
|
|
150
|
+
is_active = COALESCE(status.has_active_instances, c.is_active),
|
|
151
|
+
deleted_at = CASE
|
|
152
|
+
WHEN COALESCE(status.has_active_instances, FALSE) THEN NULL
|
|
153
|
+
ELSE COALESCE(c.deleted_at, status.latest_instance_deleted_at)
|
|
154
|
+
END,
|
|
155
|
+
updated_at = NOW()
|
|
156
|
+
FROM connection_status status
|
|
157
|
+
WHERE c.connection_key = status.connection_key;
|
|
158
|
+
|
|
159
|
+
WITH instance_connection AS (
|
|
160
|
+
SELECT
|
|
161
|
+
instance_key,
|
|
162
|
+
connection_key,
|
|
163
|
+
connection_name
|
|
164
|
+
FROM raw.pnp_instances
|
|
165
|
+
WHERE connection_key IS NOT NULL
|
|
166
|
+
)
|
|
167
|
+
UPDATE config.connector_endpoints e
|
|
168
|
+
SET
|
|
169
|
+
request_params = jsonb_set(
|
|
170
|
+
jsonb_set(COALESCE(e.request_params, '{}'::jsonb), '{connection_key}', to_jsonb(ic.connection_key), TRUE),
|
|
171
|
+
'{connection_name}',
|
|
172
|
+
to_jsonb(ic.connection_name),
|
|
173
|
+
TRUE
|
|
174
|
+
),
|
|
175
|
+
updated_at = NOW()
|
|
176
|
+
FROM instance_connection ic
|
|
177
|
+
WHERE e.connector_id = 'nilo_pecanha'
|
|
178
|
+
AND e.request_params->>'mode' = 'powerbi_microdados'
|
|
179
|
+
AND COALESCE(e.request_params->>'entity_type', 'pipeline') <> 'connection'
|
|
180
|
+
AND COALESCE(NULLIF(e.request_params->>'instance_key', ''), NULLIF(e.request_params->>'pipeline_key', '')) = ic.instance_key
|
|
181
|
+
AND (
|
|
182
|
+
COALESCE(e.request_params->>'connection_key', '') <> ic.connection_key
|
|
183
|
+
OR COALESCE(e.request_params->>'connection_name', '') <> ic.connection_name
|
|
184
|
+
);
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
CREATE TABLE IF NOT EXISTS raw.pnp_run_packages (
|
|
2
|
+
package_id BIGSERIAL PRIMARY KEY,
|
|
3
|
+
run_id TEXT NOT NULL REFERENCES raw.pnp_runs(run_id) ON DELETE CASCADE,
|
|
4
|
+
instance_key TEXT,
|
|
5
|
+
airflow_dag_id TEXT,
|
|
6
|
+
airflow_dag_run_id TEXT,
|
|
7
|
+
airflow_task_id TEXT NOT NULL,
|
|
8
|
+
package_type TEXT NOT NULL,
|
|
9
|
+
package_name TEXT NOT NULL,
|
|
10
|
+
package_status TEXT NOT NULL,
|
|
11
|
+
records_affected BIGINT,
|
|
12
|
+
payload_json JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
13
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
14
|
+
);
|
|
15
|
+
|
|
16
|
+
CREATE INDEX IF NOT EXISTS idx_raw_pnp_run_packages_run
|
|
17
|
+
ON raw.pnp_run_packages (run_id, created_at DESC);
|
|
18
|
+
|
|
19
|
+
CREATE INDEX IF NOT EXISTS idx_raw_pnp_run_packages_task
|
|
20
|
+
ON raw.pnp_run_packages (airflow_task_id, created_at DESC);
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
CREATE TABLE IF NOT EXISTS raw.pnp_endpoint_tables (
|
|
2
|
+
endpoint_key TEXT PRIMARY KEY,
|
|
3
|
+
endpoint_name TEXT NOT NULL,
|
|
4
|
+
tipo_microdados TEXT NOT NULL UNIQUE,
|
|
5
|
+
raw_table_schema TEXT NOT NULL DEFAULT 'raw',
|
|
6
|
+
raw_table_name TEXT NOT NULL,
|
|
7
|
+
staging_table_schema TEXT NOT NULL DEFAULT 'staging',
|
|
8
|
+
staging_table_name TEXT,
|
|
9
|
+
curated_relation_schema TEXT NOT NULL DEFAULT 'curated',
|
|
10
|
+
curated_relation_name TEXT,
|
|
11
|
+
is_active BOOLEAN NOT NULL DEFAULT TRUE,
|
|
12
|
+
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
13
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
14
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
15
|
+
CONSTRAINT uq_raw_pnp_endpoint_tables_raw_table UNIQUE (raw_table_schema, raw_table_name)
|
|
16
|
+
);
|
|
17
|
+
|
|
18
|
+
DROP TRIGGER IF EXISTS trg_pnp_endpoint_tables_updated_at ON raw.pnp_endpoint_tables;
|
|
19
|
+
CREATE TRIGGER trg_pnp_endpoint_tables_updated_at
|
|
20
|
+
BEFORE UPDATE ON raw.pnp_endpoint_tables
|
|
21
|
+
FOR EACH ROW
|
|
22
|
+
EXECUTE FUNCTION raw.touch_updated_at();
|
|
23
|
+
|
|
24
|
+
CREATE INDEX IF NOT EXISTS idx_raw_pnp_endpoint_tables_active
|
|
25
|
+
ON raw.pnp_endpoint_tables (is_active, endpoint_name);
|
|
26
|
+
|
|
27
|
+
CREATE TABLE IF NOT EXISTS raw.pnp_pipeline_endpoints (
|
|
28
|
+
pipeline_endpoint_id BIGSERIAL PRIMARY KEY,
|
|
29
|
+
instance_key TEXT NOT NULL REFERENCES raw.pnp_instances(instance_key) ON DELETE CASCADE,
|
|
30
|
+
connection_key TEXT,
|
|
31
|
+
endpoint_key TEXT NOT NULL REFERENCES raw.pnp_endpoint_tables(endpoint_key),
|
|
32
|
+
selection_source TEXT,
|
|
33
|
+
is_active BOOLEAN NOT NULL DEFAULT TRUE,
|
|
34
|
+
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
|
|
35
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
36
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
37
|
+
CONSTRAINT uq_raw_pnp_pipeline_endpoints UNIQUE (instance_key, endpoint_key)
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
DROP TRIGGER IF EXISTS trg_pnp_pipeline_endpoints_updated_at ON raw.pnp_pipeline_endpoints;
|
|
41
|
+
CREATE TRIGGER trg_pnp_pipeline_endpoints_updated_at
|
|
42
|
+
BEFORE UPDATE ON raw.pnp_pipeline_endpoints
|
|
43
|
+
FOR EACH ROW
|
|
44
|
+
EXECUTE FUNCTION raw.touch_updated_at();
|
|
45
|
+
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_raw_pnp_pipeline_endpoints_instance
|
|
47
|
+
ON raw.pnp_pipeline_endpoints (instance_key, is_active, endpoint_key);
|
|
48
|
+
|
|
49
|
+
CREATE INDEX IF NOT EXISTS idx_raw_pnp_pipeline_endpoints_connection
|
|
50
|
+
ON raw.pnp_pipeline_endpoints (connection_key, is_active, instance_key);
|
|
51
|
+
|
|
52
|
+
INSERT INTO raw.pnp_endpoint_tables (
|
|
53
|
+
endpoint_key,
|
|
54
|
+
endpoint_name,
|
|
55
|
+
tipo_microdados,
|
|
56
|
+
raw_table_schema,
|
|
57
|
+
raw_table_name,
|
|
58
|
+
staging_table_schema,
|
|
59
|
+
staging_table_name,
|
|
60
|
+
curated_relation_schema,
|
|
61
|
+
curated_relation_name,
|
|
62
|
+
metadata
|
|
63
|
+
)
|
|
64
|
+
VALUES
|
|
65
|
+
(
|
|
66
|
+
'matriculas',
|
|
67
|
+
'Matrículas',
|
|
68
|
+
'Matrículas',
|
|
69
|
+
'raw',
|
|
70
|
+
'pnp_matriculas_src',
|
|
71
|
+
'staging',
|
|
72
|
+
'pnp_matriculas',
|
|
73
|
+
'curated',
|
|
74
|
+
NULL,
|
|
75
|
+
jsonb_build_object('domain_key', 'matriculas')
|
|
76
|
+
),
|
|
77
|
+
(
|
|
78
|
+
'eficiencia_academica',
|
|
79
|
+
'Eficiência Acadêmica',
|
|
80
|
+
'Eficiência Acadêmica',
|
|
81
|
+
'raw',
|
|
82
|
+
'pnp_eficiencia_academica_src',
|
|
83
|
+
'staging',
|
|
84
|
+
'pnp_eficiencia_academica',
|
|
85
|
+
'curated',
|
|
86
|
+
NULL,
|
|
87
|
+
jsonb_build_object('domain_key', 'eficiencia_academica')
|
|
88
|
+
),
|
|
89
|
+
(
|
|
90
|
+
'servidores',
|
|
91
|
+
'Servidores',
|
|
92
|
+
'Servidores',
|
|
93
|
+
'raw',
|
|
94
|
+
'pnp_servidores_src',
|
|
95
|
+
'staging',
|
|
96
|
+
'pnp_servidores',
|
|
97
|
+
'curated',
|
|
98
|
+
NULL,
|
|
99
|
+
jsonb_build_object('domain_key', 'servidores')
|
|
100
|
+
),
|
|
101
|
+
(
|
|
102
|
+
'financeiro',
|
|
103
|
+
'Financeiro',
|
|
104
|
+
'Financeiro',
|
|
105
|
+
'raw',
|
|
106
|
+
'pnp_financeiro_src',
|
|
107
|
+
'staging',
|
|
108
|
+
'pnp_financeiro',
|
|
109
|
+
'curated',
|
|
110
|
+
NULL,
|
|
111
|
+
jsonb_build_object('domain_key', 'financeiro')
|
|
112
|
+
)
|
|
113
|
+
ON CONFLICT (endpoint_key) DO UPDATE
|
|
114
|
+
SET
|
|
115
|
+
endpoint_name = EXCLUDED.endpoint_name,
|
|
116
|
+
tipo_microdados = EXCLUDED.tipo_microdados,
|
|
117
|
+
raw_table_schema = EXCLUDED.raw_table_schema,
|
|
118
|
+
raw_table_name = EXCLUDED.raw_table_name,
|
|
119
|
+
staging_table_schema = EXCLUDED.staging_table_schema,
|
|
120
|
+
staging_table_name = EXCLUDED.staging_table_name,
|
|
121
|
+
curated_relation_schema = EXCLUDED.curated_relation_schema,
|
|
122
|
+
curated_relation_name = EXCLUDED.curated_relation_name,
|
|
123
|
+
is_active = TRUE,
|
|
124
|
+
metadata = EXCLUDED.metadata,
|
|
125
|
+
updated_at = NOW();
|
|
126
|
+
|
|
127
|
+
INSERT INTO raw.pnp_pipeline_endpoints (
|
|
128
|
+
instance_key,
|
|
129
|
+
connection_key,
|
|
130
|
+
endpoint_key,
|
|
131
|
+
selection_source,
|
|
132
|
+
is_active,
|
|
133
|
+
metadata
|
|
134
|
+
)
|
|
135
|
+
SELECT
|
|
136
|
+
i.instance_key,
|
|
137
|
+
i.connection_key,
|
|
138
|
+
et.endpoint_key,
|
|
139
|
+
'phase7a_backfill',
|
|
140
|
+
COALESCE(i.is_active, TRUE) AND BOOL_OR(COALESCE(s.is_active, TRUE)),
|
|
141
|
+
jsonb_build_object(
|
|
142
|
+
'tipo_microdados', s.tipo_microdados,
|
|
143
|
+
'raw_table', format('%s.%s', et.raw_table_schema, et.raw_table_name),
|
|
144
|
+
'staging_table', CASE
|
|
145
|
+
WHEN et.staging_table_name IS NULL THEN NULL
|
|
146
|
+
ELSE format('%s.%s', et.staging_table_schema, et.staging_table_name)
|
|
147
|
+
END
|
|
148
|
+
)
|
|
149
|
+
FROM raw.pnp_instances i
|
|
150
|
+
JOIN raw.pnp_instance_selection s
|
|
151
|
+
ON s.instance_key = i.instance_key
|
|
152
|
+
JOIN raw.pnp_endpoint_tables et
|
|
153
|
+
ON et.tipo_microdados = s.tipo_microdados
|
|
154
|
+
GROUP BY
|
|
155
|
+
i.instance_key,
|
|
156
|
+
i.connection_key,
|
|
157
|
+
et.endpoint_key,
|
|
158
|
+
s.tipo_microdados,
|
|
159
|
+
et.raw_table_schema,
|
|
160
|
+
et.raw_table_name,
|
|
161
|
+
et.staging_table_schema,
|
|
162
|
+
et.staging_table_name
|
|
163
|
+
ON CONFLICT (instance_key, endpoint_key) DO UPDATE
|
|
164
|
+
SET
|
|
165
|
+
connection_key = EXCLUDED.connection_key,
|
|
166
|
+
selection_source = EXCLUDED.selection_source,
|
|
167
|
+
is_active = EXCLUDED.is_active,
|
|
168
|
+
metadata = EXCLUDED.metadata,
|
|
169
|
+
updated_at = NOW();
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
\ir ../views_curated/010_vw_pnp_admin_ingestao.sql
|
|
2
|
+
\ir ../views_curated/020_vw_pnp_qualidade_dados.sql
|
|
3
|
+
\ir ../views_curated/030_vw_pnp_matriculas.sql
|
|
4
|
+
\ir ../views_curated/040_vw_pnp_eficiencia.sql
|
|
5
|
+
\ir ../views_curated/050_vw_pnp_servidores.sql
|
|
6
|
+
\ir ../views_curated/060_vw_pnp_financeiro.sql
|
|
7
|
+
\ir ../views_curated/070_vw_pnp_vanna.sql
|
|
8
|
+
\ir ../views_curated/004_mv_pnp_dashboard_fast.sql
|
|
9
|
+
|
|
10
|
+
UPDATE raw.pnp_endpoint_tables
|
|
11
|
+
SET
|
|
12
|
+
curated_relation_schema = 'curated',
|
|
13
|
+
curated_relation_name = CASE endpoint_key
|
|
14
|
+
WHEN 'matriculas' THEN 'vw_pnp_matriculas_perfil'
|
|
15
|
+
WHEN 'eficiencia_academica' THEN 'vw_pnp_eficiencia_situacao'
|
|
16
|
+
WHEN 'servidores' THEN 'vw_pnp_servidores_quadro'
|
|
17
|
+
WHEN 'financeiro' THEN 'vw_pnp_financeiro_execucao'
|
|
18
|
+
ELSE curated_relation_name
|
|
19
|
+
END,
|
|
20
|
+
metadata = jsonb_set(
|
|
21
|
+
COALESCE(metadata, '{}'::jsonb),
|
|
22
|
+
'{curated_relation}',
|
|
23
|
+
to_jsonb(
|
|
24
|
+
CASE endpoint_key
|
|
25
|
+
WHEN 'matriculas' THEN 'curated.vw_pnp_matriculas_perfil'
|
|
26
|
+
WHEN 'eficiencia_academica' THEN 'curated.vw_pnp_eficiencia_situacao'
|
|
27
|
+
WHEN 'servidores' THEN 'curated.vw_pnp_servidores_quadro'
|
|
28
|
+
WHEN 'financeiro' THEN 'curated.vw_pnp_financeiro_execucao'
|
|
29
|
+
ELSE NULL
|
|
30
|
+
END
|
|
31
|
+
),
|
|
32
|
+
true
|
|
33
|
+
),
|
|
34
|
+
updated_at = NOW()
|
|
35
|
+
WHERE endpoint_key IN ('matriculas', 'eficiencia_academica', 'servidores', 'financeiro');
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
2
|
+
|
|
3
|
+
ALTER TABLE raw.pnp_instances
|
|
4
|
+
ADD COLUMN IF NOT EXISTS pipeline_id UUID;
|
|
5
|
+
|
|
6
|
+
ALTER TABLE raw.pnp_instances
|
|
7
|
+
ALTER COLUMN pipeline_id SET DEFAULT gen_random_uuid();
|
|
8
|
+
|
|
9
|
+
UPDATE raw.pnp_instances
|
|
10
|
+
SET pipeline_id = gen_random_uuid()
|
|
11
|
+
WHERE pipeline_id IS NULL;
|
|
12
|
+
|
|
13
|
+
ALTER TABLE raw.pnp_instances
|
|
14
|
+
ALTER COLUMN pipeline_id SET NOT NULL;
|
|
15
|
+
|
|
16
|
+
DO $$
|
|
17
|
+
BEGIN
|
|
18
|
+
IF NOT EXISTS (
|
|
19
|
+
SELECT 1
|
|
20
|
+
FROM pg_constraint
|
|
21
|
+
WHERE conname = 'uq_raw_pnp_instances_pipeline_id'
|
|
22
|
+
AND conrelid = 'raw.pnp_instances'::regclass
|
|
23
|
+
) THEN
|
|
24
|
+
ALTER TABLE raw.pnp_instances
|
|
25
|
+
ADD CONSTRAINT uq_raw_pnp_instances_pipeline_id UNIQUE (pipeline_id);
|
|
26
|
+
END IF;
|
|
27
|
+
END $$;
|
|
28
|
+
|
|
29
|
+
ALTER TABLE raw.pnp_instance_selection
|
|
30
|
+
ADD COLUMN IF NOT EXISTS pipeline_id UUID;
|
|
31
|
+
|
|
32
|
+
UPDATE raw.pnp_instance_selection selection
|
|
33
|
+
SET pipeline_id = instances.pipeline_id
|
|
34
|
+
FROM raw.pnp_instances instances
|
|
35
|
+
WHERE instances.instance_key = selection.instance_key
|
|
36
|
+
AND (selection.pipeline_id IS NULL OR selection.pipeline_id <> instances.pipeline_id);
|
|
37
|
+
|
|
38
|
+
ALTER TABLE raw.pnp_pipeline_endpoints
|
|
39
|
+
ADD COLUMN IF NOT EXISTS pipeline_id UUID;
|
|
40
|
+
|
|
41
|
+
UPDATE raw.pnp_pipeline_endpoints pipeline_endpoints
|
|
42
|
+
SET pipeline_id = instances.pipeline_id
|
|
43
|
+
FROM raw.pnp_instances instances
|
|
44
|
+
WHERE instances.instance_key = pipeline_endpoints.instance_key
|
|
45
|
+
AND (pipeline_endpoints.pipeline_id IS NULL OR pipeline_endpoints.pipeline_id <> instances.pipeline_id);
|
|
46
|
+
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_raw_pnp_instance_selection_pipeline
|
|
48
|
+
ON raw.pnp_instance_selection (pipeline_id, is_active, ano_base, tipo_microdados);
|
|
49
|
+
|
|
50
|
+
CREATE INDEX IF NOT EXISTS idx_raw_pnp_pipeline_endpoints_pipeline
|
|
51
|
+
ON raw.pnp_pipeline_endpoints (pipeline_id, is_active, endpoint_key);
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
WITH selected_rows AS (
|
|
2
|
+
SELECT
|
|
3
|
+
src.*,
|
|
4
|
+
ROW_NUMBER() OVER (
|
|
5
|
+
PARTITION BY src.instance_key, src.record_hash
|
|
6
|
+
ORDER BY src.raw_record_id DESC
|
|
7
|
+
) AS dedup_rank
|
|
8
|
+
FROM raw.pnp_matriculas_src src
|
|
9
|
+
LEFT JOIN raw.pnp_downloads downloads ON downloads.download_id = src.download_id
|
|
10
|
+
JOIN raw.pnp_instance_selection selection
|
|
11
|
+
ON selection.instance_key = src.instance_key
|
|
12
|
+
AND selection.is_active = TRUE
|
|
13
|
+
AND selection.ano_base = src.ano_base
|
|
14
|
+
AND selection.tipo_microdados = src.tipo_microdados
|
|
15
|
+
AND (
|
|
16
|
+
selection.configured_microdados_url IS NULL
|
|
17
|
+
OR selection.configured_microdados_url = downloads.microdados_url
|
|
18
|
+
)
|
|
19
|
+
WHERE src.run_id = %(run_id)s
|
|
20
|
+
AND src.instance_key IS NOT DISTINCT FROM %(instance_key)s
|
|
21
|
+
AND src.download_id = %(download_id)s
|
|
22
|
+
),
|
|
23
|
+
deduplicated_rows AS (
|
|
24
|
+
SELECT *
|
|
25
|
+
FROM selected_rows
|
|
26
|
+
WHERE dedup_rank = 1
|
|
27
|
+
)
|
|
28
|
+
INSERT INTO staging.pnp_matriculas (
|
|
29
|
+
raw_record_id,
|
|
30
|
+
run_id,
|
|
31
|
+
instance_key,
|
|
32
|
+
ano,
|
|
33
|
+
instituicao,
|
|
34
|
+
regiao,
|
|
35
|
+
uf,
|
|
36
|
+
municipio,
|
|
37
|
+
sexo,
|
|
38
|
+
cor_raca,
|
|
39
|
+
renda_familiar,
|
|
40
|
+
faixa_etaria,
|
|
41
|
+
situacao_matricula,
|
|
42
|
+
modalidade_ensino,
|
|
43
|
+
tipo_curso,
|
|
44
|
+
tipo_oferta,
|
|
45
|
+
turno,
|
|
46
|
+
eixo_tecnologico,
|
|
47
|
+
subeixo_tecnologico,
|
|
48
|
+
nome_curso,
|
|
49
|
+
total_inscritos,
|
|
50
|
+
vagas_ofertadas,
|
|
51
|
+
processed_at
|
|
52
|
+
)
|
|
53
|
+
SELECT
|
|
54
|
+
deduplicated_rows.raw_record_id,
|
|
55
|
+
deduplicated_rows.run_id,
|
|
56
|
+
deduplicated_rows.instance_key,
|
|
57
|
+
CASE
|
|
58
|
+
WHEN NULLIF(deduplicated_rows.ano, '') ~ '^[0-9]{1,4}$' THEN deduplicated_rows.ano::INTEGER
|
|
59
|
+
ELSE NULL
|
|
60
|
+
END AS ano,
|
|
61
|
+
NULLIF(deduplicated_rows.instituicao, '') AS instituicao,
|
|
62
|
+
NULLIF(deduplicated_rows.regiao, '') AS regiao,
|
|
63
|
+
NULLIF(deduplicated_rows.uf, '') AS uf,
|
|
64
|
+
NULLIF(deduplicated_rows.municipio, '') AS municipio,
|
|
65
|
+
NULLIF(deduplicated_rows.sexo, '') AS sexo,
|
|
66
|
+
NULLIF(deduplicated_rows.cor_raca, '') AS cor_raca,
|
|
67
|
+
NULLIF(deduplicated_rows.renda_familiar, '') AS renda_familiar,
|
|
68
|
+
NULLIF(deduplicated_rows.faixa_etaria, '') AS faixa_etaria,
|
|
69
|
+
NULLIF(deduplicated_rows.situacao_de_matricula, '') AS situacao_matricula,
|
|
70
|
+
NULLIF(deduplicated_rows.modalidade_de_ensino, '') AS modalidade_ensino,
|
|
71
|
+
NULLIF(deduplicated_rows.tipo_de_curso, '') AS tipo_curso,
|
|
72
|
+
NULLIF(deduplicated_rows.tipo_de_oferta, '') AS tipo_oferta,
|
|
73
|
+
NULLIF(deduplicated_rows.turno, '') AS turno,
|
|
74
|
+
NULLIF(deduplicated_rows.eixo_tecnologico, '') AS eixo_tecnologico,
|
|
75
|
+
NULLIF(deduplicated_rows.subeixo_tecnologico, '') AS subeixo_tecnologico,
|
|
76
|
+
NULLIF(deduplicated_rows.nome_de_curso, '') AS nome_curso,
|
|
77
|
+
CASE
|
|
78
|
+
WHEN REPLACE(REPLACE(NULLIF(BTRIM(deduplicated_rows.total_de_inscritos), ''), '.', ''), ',', '.') ~ '^-?[0-9]+(\.[0-9]+)?$'
|
|
79
|
+
THEN REPLACE(REPLACE(NULLIF(BTRIM(deduplicated_rows.total_de_inscritos), ''), '.', ''), ',', '.')::NUMERIC
|
|
80
|
+
ELSE NULL
|
|
81
|
+
END AS total_inscritos,
|
|
82
|
+
CASE
|
|
83
|
+
WHEN REPLACE(REPLACE(NULLIF(BTRIM(deduplicated_rows.vagas_ofertadas), ''), '.', ''), ',', '.') ~ '^-?[0-9]+(\.[0-9]+)?$'
|
|
84
|
+
THEN REPLACE(REPLACE(NULLIF(BTRIM(deduplicated_rows.vagas_ofertadas), ''), '.', ''), ',', '.')::NUMERIC
|
|
85
|
+
ELSE NULL
|
|
86
|
+
END AS vagas_ofertadas,
|
|
87
|
+
NOW()
|
|
88
|
+
FROM deduplicated_rows
|
|
89
|
+
ON CONFLICT (raw_record_id) DO UPDATE
|
|
90
|
+
SET
|
|
91
|
+
run_id = EXCLUDED.run_id,
|
|
92
|
+
instance_key = EXCLUDED.instance_key,
|
|
93
|
+
ano = EXCLUDED.ano,
|
|
94
|
+
instituicao = EXCLUDED.instituicao,
|
|
95
|
+
regiao = EXCLUDED.regiao,
|
|
96
|
+
uf = EXCLUDED.uf,
|
|
97
|
+
municipio = EXCLUDED.municipio,
|
|
98
|
+
sexo = EXCLUDED.sexo,
|
|
99
|
+
cor_raca = EXCLUDED.cor_raca,
|
|
100
|
+
renda_familiar = EXCLUDED.renda_familiar,
|
|
101
|
+
faixa_etaria = EXCLUDED.faixa_etaria,
|
|
102
|
+
situacao_matricula = EXCLUDED.situacao_matricula,
|
|
103
|
+
modalidade_ensino = EXCLUDED.modalidade_ensino,
|
|
104
|
+
tipo_curso = EXCLUDED.tipo_curso,
|
|
105
|
+
tipo_oferta = EXCLUDED.tipo_oferta,
|
|
106
|
+
turno = EXCLUDED.turno,
|
|
107
|
+
eixo_tecnologico = EXCLUDED.eixo_tecnologico,
|
|
108
|
+
subeixo_tecnologico = EXCLUDED.subeixo_tecnologico,
|
|
109
|
+
nome_curso = EXCLUDED.nome_curso,
|
|
110
|
+
total_inscritos = EXCLUDED.total_inscritos,
|
|
111
|
+
vagas_ofertadas = EXCLUDED.vagas_ofertadas,
|
|
112
|
+
processed_at = NOW();
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
WITH selected_rows AS (
|
|
2
|
+
SELECT
|
|
3
|
+
src.*,
|
|
4
|
+
ROW_NUMBER() OVER (
|
|
5
|
+
PARTITION BY src.instance_key, src.record_hash
|
|
6
|
+
ORDER BY src.raw_record_id DESC
|
|
7
|
+
) AS dedup_rank
|
|
8
|
+
FROM raw.pnp_eficiencia_academica_src src
|
|
9
|
+
LEFT JOIN raw.pnp_downloads downloads ON downloads.download_id = src.download_id
|
|
10
|
+
JOIN raw.pnp_instance_selection selection
|
|
11
|
+
ON selection.instance_key = src.instance_key
|
|
12
|
+
AND selection.is_active = TRUE
|
|
13
|
+
AND selection.ano_base = src.ano_base
|
|
14
|
+
AND selection.tipo_microdados = src.tipo_microdados
|
|
15
|
+
AND (
|
|
16
|
+
selection.configured_microdados_url IS NULL
|
|
17
|
+
OR selection.configured_microdados_url = downloads.microdados_url
|
|
18
|
+
)
|
|
19
|
+
WHERE src.run_id = %(run_id)s
|
|
20
|
+
AND src.instance_key IS NOT DISTINCT FROM %(instance_key)s
|
|
21
|
+
AND src.download_id = %(download_id)s
|
|
22
|
+
),
|
|
23
|
+
deduplicated_rows AS (
|
|
24
|
+
SELECT *
|
|
25
|
+
FROM selected_rows
|
|
26
|
+
WHERE dedup_rank = 1
|
|
27
|
+
)
|
|
28
|
+
INSERT INTO staging.pnp_eficiencia_academica (
|
|
29
|
+
raw_record_id,
|
|
30
|
+
run_id,
|
|
31
|
+
instance_key,
|
|
32
|
+
ano,
|
|
33
|
+
instituicao,
|
|
34
|
+
regiao,
|
|
35
|
+
uf,
|
|
36
|
+
municipio,
|
|
37
|
+
sexo,
|
|
38
|
+
cor_raca,
|
|
39
|
+
renda_familiar,
|
|
40
|
+
faixa_etaria,
|
|
41
|
+
categoria_situacao,
|
|
42
|
+
situacao_matricula,
|
|
43
|
+
matricula_atendida,
|
|
44
|
+
processed_at
|
|
45
|
+
)
|
|
46
|
+
SELECT
|
|
47
|
+
deduplicated_rows.raw_record_id,
|
|
48
|
+
deduplicated_rows.run_id,
|
|
49
|
+
deduplicated_rows.instance_key,
|
|
50
|
+
CASE
|
|
51
|
+
WHEN NULLIF(deduplicated_rows.ano, '') ~ '^[0-9]{1,4}$' THEN deduplicated_rows.ano::INTEGER
|
|
52
|
+
ELSE NULL
|
|
53
|
+
END AS ano,
|
|
54
|
+
NULLIF(deduplicated_rows.instituicao, '') AS instituicao,
|
|
55
|
+
NULLIF(deduplicated_rows.regiao, '') AS regiao,
|
|
56
|
+
NULLIF(deduplicated_rows.uf, '') AS uf,
|
|
57
|
+
NULLIF(deduplicated_rows.municipio, '') AS municipio,
|
|
58
|
+
NULLIF(deduplicated_rows.sexo, '') AS sexo,
|
|
59
|
+
NULLIF(deduplicated_rows.cor_raca, '') AS cor_raca,
|
|
60
|
+
NULLIF(deduplicated_rows.renda_familiar, '') AS renda_familiar,
|
|
61
|
+
NULLIF(deduplicated_rows.faixa_etaria, '') AS faixa_etaria,
|
|
62
|
+
NULLIF(deduplicated_rows.categoria_da_situacao, '') AS categoria_situacao,
|
|
63
|
+
NULLIF(deduplicated_rows.situacao_de_matricula, '') AS situacao_matricula,
|
|
64
|
+
NULLIF(deduplicated_rows.matricula_atendida, '') AS matricula_atendida,
|
|
65
|
+
NOW()
|
|
66
|
+
FROM deduplicated_rows
|
|
67
|
+
ON CONFLICT (raw_record_id) DO UPDATE
|
|
68
|
+
SET
|
|
69
|
+
run_id = EXCLUDED.run_id,
|
|
70
|
+
instance_key = EXCLUDED.instance_key,
|
|
71
|
+
ano = EXCLUDED.ano,
|
|
72
|
+
instituicao = EXCLUDED.instituicao,
|
|
73
|
+
regiao = EXCLUDED.regiao,
|
|
74
|
+
uf = EXCLUDED.uf,
|
|
75
|
+
municipio = EXCLUDED.municipio,
|
|
76
|
+
sexo = EXCLUDED.sexo,
|
|
77
|
+
cor_raca = EXCLUDED.cor_raca,
|
|
78
|
+
renda_familiar = EXCLUDED.renda_familiar,
|
|
79
|
+
faixa_etaria = EXCLUDED.faixa_etaria,
|
|
80
|
+
categoria_situacao = EXCLUDED.categoria_situacao,
|
|
81
|
+
situacao_matricula = EXCLUDED.situacao_matricula,
|
|
82
|
+
matricula_atendida = EXCLUDED.matricula_atendida,
|
|
83
|
+
processed_at = NOW();
|