@dataif/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +16 -0
  2. package/bin/dataif.js +623 -0
  3. package/package.json +26 -0
  4. package/scripts/build-template.mjs +72 -0
  5. package/templates/dataif/README.md +157 -0
  6. package/templates/dataif/infra/.env.example +119 -0
  7. package/templates/dataif/infra/.env.stg.example +119 -0
  8. package/templates/dataif/infra/airflow/Dockerfile +11 -0
  9. package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
  10. package/templates/dataif/infra/airflow/requirements.txt +3 -0
  11. package/templates/dataif/infra/docker-compose.yml +306 -0
  12. package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
  13. package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
  14. package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
  15. package/templates/dataif/infra/keycloak/Dockerfile +4 -0
  16. package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
  17. package/templates/dataif/infra/ollama/Dockerfile +9 -0
  18. package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
  19. package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
  20. package/templates/dataif/infra/postgres/Dockerfile +4 -0
  21. package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
  22. package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
  23. package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
  24. package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
  25. package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
  26. package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
  27. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
  28. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
  29. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
  30. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
  31. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
  32. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
  33. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
  34. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
  35. package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
  36. package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
  37. package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
  38. package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
  39. package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
  40. package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
  41. package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
  42. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
  43. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
  44. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
  45. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
  46. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
  47. package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
  48. package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
  49. package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  50. package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
  51. package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
  52. package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  53. package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  54. package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  55. package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  56. package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  57. package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  58. package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  59. package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
  60. package/templates/dataif/scripts/configure-env.sh +149 -0
  61. package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
  62. package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
  63. package/templates/dataif/scripts/deploy.sh +79 -0
  64. package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
  65. package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
  66. package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
  67. package/templates/dataif/scripts/publish-images.sh +31 -0
  68. package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
  69. package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
  70. package/templates/dataif/services/api/.dockerignore +18 -0
  71. package/templates/dataif/services/api/Dockerfile +12 -0
  72. package/templates/dataif/services/api/app/__init__.py +1 -0
  73. package/templates/dataif/services/api/app/auth.py +48 -0
  74. package/templates/dataif/services/api/app/config.py +59 -0
  75. package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
  76. package/templates/dataif/services/api/app/main.py +2432 -0
  77. package/templates/dataif/services/api/app/metabase_admin.py +191 -0
  78. package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
  79. package/templates/dataif/services/api/app/metabase_embed.py +15 -0
  80. package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
  81. package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
  82. package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
  83. package/templates/dataif/services/api/app/vanna_client.py +32 -0
  84. package/templates/dataif/services/api/requirements.txt +9 -0
  85. package/templates/dataif/services/vanna/.dockerignore +18 -0
  86. package/templates/dataif/services/vanna/Dockerfile +12 -0
  87. package/templates/dataif/services/vanna/app/config.py +57 -0
  88. package/templates/dataif/services/vanna/app/main.py +108 -0
  89. package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
  90. package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
  91. package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
  92. package/templates/dataif/services/vanna/requirements.txt +8 -0
  93. package/templates/dataif/services/web/.dockerignore +13 -0
  94. package/templates/dataif/services/web/Dockerfile +16 -0
  95. package/templates/dataif/services/web/index.html +12 -0
  96. package/templates/dataif/services/web/nginx.conf +74 -0
  97. package/templates/dataif/services/web/package-lock.json +4397 -0
  98. package/templates/dataif/services/web/package.json +32 -0
  99. package/templates/dataif/services/web/postcss.config.mjs +5 -0
  100. package/templates/dataif/services/web/src/App.jsx +2817 -0
  101. package/templates/dataif/services/web/src/adminAuth.js +245 -0
  102. package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
  103. package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
  104. package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
  105. package/templates/dataif/services/web/src/assets/if.svg +0 -0
  106. package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
  107. package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
  108. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
  109. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
  110. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
  111. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
  112. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
  113. package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
  114. package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
  115. package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
  116. package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
  117. package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
  118. package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
  119. package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
  120. package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
  121. package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
  122. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
  123. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
  124. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
  125. package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
  126. package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
  127. package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
  128. package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
  129. package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
  130. package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
  131. package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
  132. package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
  133. package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
  134. package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
  135. package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
  136. package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
  137. package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
  138. package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
  139. package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
  140. package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
  141. package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
  142. package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
  143. package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
  144. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
  145. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
  146. package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
  147. package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
  148. package/templates/dataif/services/web/src/main.jsx +14 -0
  149. package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
  150. package/templates/dataif/services/web/src/styles/globals.css +60 -0
  151. package/templates/dataif/services/web/src/styles/theme.css +1326 -0
  152. package/templates/dataif/services/web/src/styles/typography.css +430 -0
  153. package/templates/dataif/services/web/src/styles.css +1287 -0
  154. package/templates/dataif/services/web/src/utils/cx.ts +24 -0
  155. package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
  156. package/templates/dataif/services/web/vite.config.js +14 -0
  157. package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
  158. package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
  159. package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
  160. package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
  161. package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
  162. package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
  163. package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
  164. package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
  165. package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
  166. package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
  167. package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
  168. package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
  169. package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
  170. package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
  171. package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
  172. package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  173. package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
  174. package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
  175. package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
  176. package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  177. package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  178. package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  179. package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  180. package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  181. package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  182. package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  183. package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
@@ -0,0 +1,951 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import OrderedDict
4
+ from datetime import datetime, timezone
5
+ from typing import Any, Callable
6
+ from uuid import uuid4
7
+
8
+ import psycopg2
9
+ from psycopg2.extras import Json, RealDictCursor, execute_values
10
+ from . import pnp_dag_provisioner
11
+
12
+ PNP_INTERNAL_CONNECTOR_ID = "nilo_pecanha"
13
+ PNP_POWERBI_GROUP_LABEL = "Microdados Publicos"
14
+ PNP_POWERBI_SOURCE_LABEL = "Catalogo publico de microdados via Power BI"
15
+
16
+ _CONNECTION_ENTITY = "connection"
17
+ _PIPELINE_ENTITY = "pipeline"
18
+ _PIPELINE_ENDPOINT_SOURCE = "pipeline_selection"
19
+
20
+
21
+ class PnpConnectionNotFoundError(LookupError):
22
+ pass
23
+
24
+ _ENDPOINT_TABLE_CATALOG: tuple[dict[str, str | None], ...] = (
25
+ {
26
+ "endpoint_key": "matriculas",
27
+ "endpoint_name": "Matrículas",
28
+ "tipo_microdados": "Matrículas",
29
+ "raw_table_schema": "raw",
30
+ "raw_table_name": "pnp_matriculas_src",
31
+ "staging_table_schema": "staging",
32
+ "staging_table_name": "pnp_matriculas",
33
+ "curated_relation_schema": "curated",
34
+ "curated_relation_name": "vw_pnp_matriculas_perfil",
35
+ },
36
+ {
37
+ "endpoint_key": "eficiencia_academica",
38
+ "endpoint_name": "Eficiência Acadêmica",
39
+ "tipo_microdados": "Eficiência Acadêmica",
40
+ "raw_table_schema": "raw",
41
+ "raw_table_name": "pnp_eficiencia_academica_src",
42
+ "staging_table_schema": "staging",
43
+ "staging_table_name": "pnp_eficiencia_academica",
44
+ "curated_relation_schema": "curated",
45
+ "curated_relation_name": "vw_pnp_eficiencia_situacao",
46
+ },
47
+ {
48
+ "endpoint_key": "servidores",
49
+ "endpoint_name": "Servidores",
50
+ "tipo_microdados": "Servidores",
51
+ "raw_table_schema": "raw",
52
+ "raw_table_name": "pnp_servidores_src",
53
+ "staging_table_schema": "staging",
54
+ "staging_table_name": "pnp_servidores",
55
+ "curated_relation_schema": "curated",
56
+ "curated_relation_name": "vw_pnp_servidores_quadro",
57
+ },
58
+ {
59
+ "endpoint_key": "financeiro",
60
+ "endpoint_name": "Financeiro",
61
+ "tipo_microdados": "Financeiro",
62
+ "raw_table_schema": "raw",
63
+ "raw_table_name": "pnp_financeiro_src",
64
+ "staging_table_schema": "staging",
65
+ "staging_table_name": "pnp_financeiro",
66
+ "curated_relation_schema": "curated",
67
+ "curated_relation_name": "vw_pnp_financeiro_execucao",
68
+ },
69
+ )
70
+
71
+
72
+ def _connect(connect_factory: Callable[[], Any]):
73
+ conn = connect_factory()
74
+ if getattr(conn, "cursor_factory", None) is None and not isinstance(conn, psycopg2.extensions.connection):
75
+ return conn
76
+ return conn
77
+
78
+
79
+ def _normalize_selected_downloads(items: list[dict[str, Any]] | tuple[dict[str, Any], ...] | None) -> list[dict[str, str]]:
80
+ normalized: list[dict[str, str]] = []
81
+ seen: set[tuple[str, str, str]] = set()
82
+
83
+ for item in items or []:
84
+ if not isinstance(item, dict):
85
+ continue
86
+ ano_base = str(item.get("ano_base") or "").strip()
87
+ tipo_microdados = str(item.get("tipo_microdados") or "").strip()
88
+ microdados_url = str(item.get("microdados_url") or "").strip()
89
+ if not ano_base or not tipo_microdados or not microdados_url:
90
+ continue
91
+ key = (ano_base, tipo_microdados, microdados_url)
92
+ if key in seen:
93
+ continue
94
+ seen.add(key)
95
+ normalized.append(
96
+ {
97
+ "ano_base": ano_base,
98
+ "tipo_microdados": tipo_microdados,
99
+ "microdados_url": microdados_url,
100
+ }
101
+ )
102
+ return normalized
103
+
104
+
105
+ def _build_connection_request_params(connection_key: str, connection_name: str) -> dict[str, Any]:
106
+ return {
107
+ "mode": "powerbi_microdados",
108
+ "entity_type": _CONNECTION_ENTITY,
109
+ "connection_key": connection_key,
110
+ "connection_name": connection_name,
111
+ "selected_source_label": PNP_POWERBI_SOURCE_LABEL,
112
+ "selected_source_group": PNP_POWERBI_GROUP_LABEL,
113
+ "source_path": "powerbi_microdados",
114
+ }
115
+
116
+
117
+ def _build_pipeline_request_params(
118
+ *,
119
+ pipeline_id: str,
120
+ instance_key: str,
121
+ instance_name: str,
122
+ connection_key: str,
123
+ connection_name: str,
124
+ selected_years: list[str],
125
+ selected_microdados_types: list[str],
126
+ selected_downloads: list[dict[str, str]],
127
+ schedule: str | None,
128
+ ) -> dict[str, Any]:
129
+ request_params: dict[str, Any] = {
130
+ "mode": "powerbi_microdados",
131
+ "pipeline_id": pipeline_id,
132
+ "entity_type": _PIPELINE_ENTITY,
133
+ "pipeline_key": instance_key,
134
+ "pipeline_name": instance_name,
135
+ "connection_key": connection_key,
136
+ "connection_name": connection_name,
137
+ "instance_key": instance_key,
138
+ "instance_name": instance_name,
139
+ "selected_years": list(selected_years),
140
+ "selected_microdados_types": list(selected_microdados_types),
141
+ "selected_downloads": _normalize_selected_downloads(selected_downloads),
142
+ "selected_source_label": PNP_POWERBI_SOURCE_LABEL,
143
+ "selected_source_group": PNP_POWERBI_GROUP_LABEL,
144
+ "source_path": "powerbi_microdados",
145
+ }
146
+ if schedule and schedule.strip():
147
+ request_params["schedule"] = schedule.strip()
148
+ return request_params
149
+
150
+
151
+ def _endpoint_table_ref(schema_name: str | None, relation_name: str | None) -> str | None:
152
+ if not schema_name or not relation_name:
153
+ return None
154
+ return f"{schema_name}.{relation_name}"
155
+
156
+
157
+ def _ensure_endpoint_catalog(cur) -> None:
158
+ execute_values(
159
+ cur,
160
+ """
161
+ INSERT INTO raw.pnp_endpoint_tables (
162
+ endpoint_key,
163
+ endpoint_name,
164
+ tipo_microdados,
165
+ raw_table_schema,
166
+ raw_table_name,
167
+ staging_table_schema,
168
+ staging_table_name,
169
+ curated_relation_schema,
170
+ curated_relation_name,
171
+ metadata
172
+ ) VALUES %s
173
+ ON CONFLICT (endpoint_key) DO UPDATE
174
+ SET
175
+ endpoint_name = EXCLUDED.endpoint_name,
176
+ tipo_microdados = EXCLUDED.tipo_microdados,
177
+ raw_table_schema = EXCLUDED.raw_table_schema,
178
+ raw_table_name = EXCLUDED.raw_table_name,
179
+ staging_table_schema = EXCLUDED.staging_table_schema,
180
+ staging_table_name = EXCLUDED.staging_table_name,
181
+ curated_relation_schema = EXCLUDED.curated_relation_schema,
182
+ curated_relation_name = EXCLUDED.curated_relation_name,
183
+ is_active = TRUE,
184
+ metadata = EXCLUDED.metadata,
185
+ updated_at = NOW()
186
+ """,
187
+ [
188
+ (
189
+ item["endpoint_key"],
190
+ item["endpoint_name"],
191
+ item["tipo_microdados"],
192
+ item["raw_table_schema"],
193
+ item["raw_table_name"],
194
+ item["staging_table_schema"],
195
+ item["staging_table_name"],
196
+ item["curated_relation_schema"],
197
+ item["curated_relation_name"],
198
+ Json({"domain_key": item["endpoint_key"]}),
199
+ )
200
+ for item in _ENDPOINT_TABLE_CATALOG
201
+ ],
202
+ )
203
+
204
+
205
+ def _sync_pipeline_endpoints(
206
+ cur,
207
+ *,
208
+ pipeline_id: str,
209
+ instance_key: str,
210
+ connection_key: str | None,
211
+ is_active: bool,
212
+ ) -> None:
213
+ _ensure_endpoint_catalog(cur)
214
+ cur.execute(
215
+ """
216
+ WITH selected_types AS (
217
+ SELECT DISTINCT tipo_microdados
218
+ FROM raw.pnp_instance_selection
219
+ WHERE instance_key = %s
220
+ AND is_active = TRUE
221
+ )
222
+ INSERT INTO raw.pnp_pipeline_endpoints (
223
+ pipeline_id,
224
+ instance_key,
225
+ connection_key,
226
+ endpoint_key,
227
+ selection_source,
228
+ is_active,
229
+ metadata
230
+ )
231
+ SELECT
232
+ %s,
233
+ %s,
234
+ %s,
235
+ et.endpoint_key,
236
+ %s,
237
+ %s,
238
+ jsonb_build_object(
239
+ 'tipo_microdados', et.tipo_microdados,
240
+ 'raw_table', concat_ws('.', et.raw_table_schema, et.raw_table_name),
241
+ 'staging_table', CASE
242
+ WHEN et.staging_table_name IS NULL THEN NULL
243
+ ELSE concat_ws('.', et.staging_table_schema, et.staging_table_name)
244
+ END
245
+ )
246
+ FROM selected_types st
247
+ JOIN raw.pnp_endpoint_tables et
248
+ ON et.tipo_microdados = st.tipo_microdados
249
+ ON CONFLICT (instance_key, endpoint_key) DO UPDATE
250
+ SET
251
+ pipeline_id = EXCLUDED.pipeline_id,
252
+ connection_key = EXCLUDED.connection_key,
253
+ selection_source = EXCLUDED.selection_source,
254
+ is_active = EXCLUDED.is_active,
255
+ metadata = EXCLUDED.metadata,
256
+ updated_at = NOW()
257
+ """,
258
+ (
259
+ instance_key,
260
+ pipeline_id,
261
+ instance_key,
262
+ connection_key,
263
+ _PIPELINE_ENDPOINT_SOURCE,
264
+ is_active,
265
+ ),
266
+ )
267
+ cur.execute(
268
+ """
269
+ UPDATE raw.pnp_pipeline_endpoints
270
+ SET
271
+ connection_key = %s,
272
+ is_active = FALSE,
273
+ updated_at = NOW()
274
+ WHERE instance_key = %s
275
+ AND endpoint_key NOT IN (
276
+ SELECT et.endpoint_key
277
+ FROM raw.pnp_instance_selection s
278
+ JOIN raw.pnp_endpoint_tables et
279
+ ON et.tipo_microdados = s.tipo_microdados
280
+ WHERE s.instance_key = %s
281
+ AND s.is_active = TRUE
282
+ )
283
+ """,
284
+ (connection_key, instance_key, instance_key),
285
+ )
286
+
287
+
288
+ def _build_connection_row(row: dict[str, Any]) -> dict[str, Any]:
289
+ request_params = _build_connection_request_params(
290
+ connection_key=str(row["connection_key"]),
291
+ connection_name=str(row["connection_name"]),
292
+ )
293
+ return {
294
+ "id": None,
295
+ "connector_id": PNP_INTERNAL_CONNECTOR_ID,
296
+ "endpoint_key": f"{row['connection_key']}__connection",
297
+ "description": f"{row['connection_name']} - conexão PNP",
298
+ "page_url": row.get("page_url"),
299
+ "api_endpoint_url": None,
300
+ "csv_url": None,
301
+ "dictionary_url": None,
302
+ "request_params": request_params,
303
+ "is_active": row.get("is_active"),
304
+ "created_at": row.get("created_at"),
305
+ "updated_at": row.get("updated_at"),
306
+ "_raw_metadata": dict(row.get("metadata") or {}),
307
+ }
308
+
309
+
310
+ def _group_instance_records(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
311
+ grouped: OrderedDict[str, dict[str, Any]] = OrderedDict()
312
+
313
+ for row in rows:
314
+ instance_key = str(row["instance_key"])
315
+ pipeline_id = str(row["pipeline_id"])
316
+ instance = grouped.setdefault(
317
+ instance_key,
318
+ {
319
+ "id": row.get("legacy_endpoint_id"),
320
+ "connector_id": PNP_INTERNAL_CONNECTOR_ID,
321
+ "endpoint_key": row.get("legacy_endpoint_key") or f"{instance_key}__powerbi_microdados",
322
+ "description": f"{row['instance_name']} - {PNP_POWERBI_SOURCE_LABEL}",
323
+ "page_url": row.get("page_url"),
324
+ "api_endpoint_url": None,
325
+ "csv_url": None,
326
+ "dictionary_url": None,
327
+ "request_params": {
328
+ "mode": "powerbi_microdados",
329
+ "pipeline_id": pipeline_id,
330
+ "entity_type": _PIPELINE_ENTITY,
331
+ "pipeline_key": instance_key,
332
+ "pipeline_name": row["instance_name"],
333
+ "connection_key": row.get("connection_key") or instance_key,
334
+ "connection_name": row.get("connection_name") or row["instance_name"],
335
+ "instance_key": instance_key,
336
+ "instance_name": row["instance_name"],
337
+ "selected_years": [],
338
+ "selected_microdados_types": [],
339
+ "selected_downloads": [],
340
+ "selected_source_label": PNP_POWERBI_SOURCE_LABEL,
341
+ "selected_source_group": PNP_POWERBI_GROUP_LABEL,
342
+ "source_path": "powerbi_microdados",
343
+ },
344
+ "is_active": row.get("is_active"),
345
+ "created_at": row.get("created_at"),
346
+ "updated_at": row.get("updated_at"),
347
+ "_raw_metadata": dict(row.get("metadata") or {}),
348
+ },
349
+ )
350
+ if row.get("schedule"):
351
+ instance["request_params"]["schedule"] = row["schedule"]
352
+ instance["request_params"]["pipeline_id"] = pipeline_id
353
+ if row.get("deleted_at"):
354
+ instance["_raw_metadata"] = {
355
+ **instance["_raw_metadata"],
356
+ "deleted": True,
357
+ "deleted_at": row["deleted_at"].isoformat(),
358
+ }
359
+
360
+ ano_base = row.get("ano_base")
361
+ tipo_microdados = row.get("tipo_microdados")
362
+ microdados_url = row.get("configured_microdados_url")
363
+
364
+ if isinstance(ano_base, str) and ano_base.strip():
365
+ years = instance["request_params"]["selected_years"]
366
+ if ano_base not in years:
367
+ years.append(ano_base)
368
+ if isinstance(tipo_microdados, str) and tipo_microdados.strip():
369
+ types = instance["request_params"]["selected_microdados_types"]
370
+ if tipo_microdados not in types:
371
+ types.append(tipo_microdados)
372
+ if isinstance(microdados_url, str) and microdados_url.strip() and isinstance(ano_base, str) and isinstance(tipo_microdados, str):
373
+ downloads = instance["request_params"]["selected_downloads"]
374
+ candidate = {
375
+ "ano_base": ano_base,
376
+ "tipo_microdados": tipo_microdados,
377
+ "microdados_url": microdados_url,
378
+ }
379
+ if candidate not in downloads:
380
+ downloads.append(candidate)
381
+
382
+ return list(grouped.values())
383
+
384
+
385
+ def _attach_pipeline_endpoint_catalog(
386
+ connect_factory: Callable[[], Any],
387
+ instances: list[dict[str, Any]],
388
+ *,
389
+ include_deleted: bool = False,
390
+ ) -> list[dict[str, Any]]:
391
+ if not instances:
392
+ return instances
393
+
394
+ instance_keys = [str(item["request_params"]["instance_key"]) for item in instances]
395
+ active_filter = "" if include_deleted else "AND pe.is_active = TRUE AND et.is_active = TRUE"
396
+ with _connect(connect_factory) as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
397
+ cur.execute(
398
+ f"""
399
+ SELECT
400
+ pe.instance_key,
401
+ pe.connection_key,
402
+ pe.endpoint_key,
403
+ pe.selection_source,
404
+ pe.is_active,
405
+ pe.metadata AS pipeline_endpoint_metadata,
406
+ et.endpoint_name,
407
+ et.tipo_microdados,
408
+ et.raw_table_schema,
409
+ et.raw_table_name,
410
+ et.staging_table_schema,
411
+ et.staging_table_name,
412
+ et.curated_relation_schema,
413
+ et.curated_relation_name,
414
+ et.metadata AS endpoint_metadata
415
+ FROM raw.pnp_pipeline_endpoints pe
416
+ JOIN raw.pnp_endpoint_tables et
417
+ ON et.endpoint_key = pe.endpoint_key
418
+ WHERE pe.instance_key = ANY(%s)
419
+ {active_filter}
420
+ ORDER BY pe.instance_key, et.endpoint_name
421
+ """,
422
+ (instance_keys,),
423
+ )
424
+ rows = [dict(row) for row in cur.fetchall()]
425
+
426
+ rows_by_instance: dict[str, list[dict[str, Any]]] = {}
427
+ for row in rows:
428
+ rows_by_instance.setdefault(str(row["instance_key"]), []).append(row)
429
+
430
+ for instance in instances:
431
+ request_params = instance.setdefault("request_params", {})
432
+ endpoint_rows = rows_by_instance.get(str(request_params.get("instance_key") or ""), [])
433
+ endpoint_tables = [
434
+ {
435
+ "endpoint_key": str(row["endpoint_key"]),
436
+ "endpoint_name": str(row["endpoint_name"]),
437
+ "tipo_microdados": str(row["tipo_microdados"]),
438
+ "selection_source": row.get("selection_source"),
439
+ "raw_table": _endpoint_table_ref(row.get("raw_table_schema"), row.get("raw_table_name")),
440
+ "staging_table": _endpoint_table_ref(row.get("staging_table_schema"), row.get("staging_table_name")),
441
+ "curated_relation": _endpoint_table_ref(
442
+ row.get("curated_relation_schema"),
443
+ row.get("curated_relation_name"),
444
+ ),
445
+ "metadata": {
446
+ **dict(row.get("endpoint_metadata") or {}),
447
+ **dict(row.get("pipeline_endpoint_metadata") or {}),
448
+ },
449
+ }
450
+ for row in endpoint_rows
451
+ ]
452
+ request_params["selected_endpoints"] = [item["endpoint_key"] for item in endpoint_tables]
453
+ request_params["endpoint_tables"] = endpoint_tables
454
+ return instances
455
+
456
+
457
+ def _load_connection_rows(connect_factory: Callable[[], Any], *, include_deleted: bool = False) -> list[dict[str, Any]]:
458
+ with _connect(connect_factory) as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
459
+ cur.execute(
460
+ """
461
+ SELECT
462
+ connection_key,
463
+ connection_name,
464
+ page_url,
465
+ is_active,
466
+ metadata,
467
+ created_at,
468
+ updated_at
469
+ FROM raw.pnp_connections
470
+ ORDER BY connection_name, connection_key
471
+ """
472
+ )
473
+ return [dict(row) for row in cur.fetchall()]
474
+
475
+
476
+ def _load_instance_source_rows(
477
+ connect_factory: Callable[[], Any],
478
+ *,
479
+ instance_key: str | None = None,
480
+ include_deleted: bool = False,
481
+ ) -> list[dict[str, Any]]:
482
+ deleted_filter = "" if include_deleted else "AND i.deleted_at IS NULL"
483
+ selection_filter = "" if include_deleted else "AND COALESCE(s.is_active, TRUE) = TRUE"
484
+ params: list[Any] = []
485
+ instance_filter = ""
486
+ if instance_key is not None:
487
+ instance_filter = "AND i.instance_key = %s"
488
+ params.append(instance_key)
489
+
490
+ with _connect(connect_factory) as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
491
+ cur.execute(
492
+ f"""
493
+ SELECT
494
+ i.pipeline_id,
495
+ i.instance_key,
496
+ i.instance_name,
497
+ i.connection_key,
498
+ i.connection_name,
499
+ i.page_url,
500
+ i.schedule,
501
+ i.is_active,
502
+ i.legacy_mode,
503
+ i.legacy_endpoint_id,
504
+ i.legacy_endpoint_key,
505
+ i.metadata,
506
+ i.created_at,
507
+ i.updated_at,
508
+ i.deleted_at,
509
+ s.selection_id,
510
+ s.ano_base,
511
+ s.tipo_microdados,
512
+ s.configured_microdados_url,
513
+ s.is_active AS selection_is_active,
514
+ s.selection_rank
515
+ FROM raw.pnp_instances i
516
+ LEFT JOIN raw.pnp_instance_selection s
517
+ ON s.instance_key = i.instance_key
518
+ {selection_filter}
519
+ WHERE 1 = 1
520
+ {deleted_filter}
521
+ {instance_filter}
522
+ ORDER BY
523
+ i.instance_name,
524
+ i.instance_key,
525
+ COALESCE(s.selection_rank, 2147483647),
526
+ s.ano_base DESC NULLS LAST,
527
+ s.tipo_microdados NULLS LAST
528
+ """,
529
+ tuple(params),
530
+ )
531
+ return [dict(row) for row in cur.fetchall()]
532
+
533
+
534
+ def load_all_rows(connect_factory: Callable[[], Any], *, include_deleted: bool = False) -> list[dict[str, Any]]:
535
+ rows = [_build_connection_row(row) for row in _load_connection_rows(connect_factory, include_deleted=include_deleted)]
536
+ rows.extend(
537
+ _attach_pipeline_endpoint_catalog(
538
+ connect_factory,
539
+ _group_instance_records(_load_instance_source_rows(connect_factory, include_deleted=include_deleted)),
540
+ include_deleted=include_deleted,
541
+ )
542
+ )
543
+ return rows
544
+
545
+
546
+ def load_instance_rows(
547
+ connect_factory: Callable[[], Any],
548
+ instance_key: str,
549
+ *,
550
+ include_deleted: bool = False,
551
+ ) -> list[dict[str, Any]]:
552
+ rows = _attach_pipeline_endpoint_catalog(
553
+ connect_factory,
554
+ _group_instance_records(
555
+ _load_instance_source_rows(
556
+ connect_factory,
557
+ instance_key=instance_key,
558
+ include_deleted=include_deleted,
559
+ )
560
+ ),
561
+ include_deleted=include_deleted,
562
+ )
563
+ if not rows:
564
+ raise LookupError(instance_key)
565
+ return rows
566
+
567
+
568
+ def create_connection(
569
+ connect_factory: Callable[[], Any],
570
+ *,
571
+ connection_key: str,
572
+ connection_name: str,
573
+ page_url: str,
574
+ is_active: bool,
575
+ ) -> None:
576
+ with _connect(connect_factory) as conn, conn.cursor() as cur:
577
+ cur.execute(
578
+ """
579
+ INSERT INTO raw.pnp_connections (
580
+ connection_key,
581
+ connection_name,
582
+ page_url,
583
+ is_active,
584
+ metadata
585
+ )
586
+ VALUES (%s, %s, %s, %s, %s)
587
+ """,
588
+ (
589
+ connection_key,
590
+ connection_name,
591
+ page_url,
592
+ is_active,
593
+ Json(
594
+ {
595
+ "selected_source_label": PNP_POWERBI_SOURCE_LABEL,
596
+ "selected_source_group": PNP_POWERBI_GROUP_LABEL,
597
+ "source_path": "powerbi_microdados",
598
+ }
599
+ ),
600
+ ),
601
+ )
602
+
603
+
604
+ def load_connection(
605
+ connect_factory: Callable[[], Any],
606
+ connection_key: str,
607
+ *,
608
+ include_deleted: bool = False,
609
+ ) -> dict[str, Any]:
610
+ with _connect(connect_factory) as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
611
+ row = _load_connection_record(cur, connection_key, include_deleted=include_deleted)
612
+ if not row:
613
+ raise PnpConnectionNotFoundError(connection_key)
614
+ return _build_connection_row(row)
615
+
616
+
617
+ def _load_connection_record(
618
+ cur,
619
+ connection_key: str,
620
+ *,
621
+ include_deleted: bool = False,
622
+ ) -> dict[str, Any] | None:
623
+ cur.execute(
624
+ """
625
+ SELECT
626
+ connection_key,
627
+ connection_name,
628
+ page_url,
629
+ is_active,
630
+ metadata,
631
+ created_at,
632
+ updated_at
633
+ FROM raw.pnp_connections
634
+ WHERE connection_key = %s
635
+ """,
636
+ (connection_key,),
637
+ )
638
+ row = cur.fetchone()
639
+ return dict(row) if row else None
640
+
641
+
642
+ def create_instance(
643
+ connect_factory: Callable[[], Any],
644
+ *,
645
+ instance_key: str,
646
+ instance_name: str,
647
+ connection_key: str,
648
+ selected_years: list[str],
649
+ selected_microdados_types: list[str],
650
+ selected_downloads: list[dict[str, str]],
651
+ schedule: str | None,
652
+ is_active: bool,
653
+ ) -> None:
654
+ normalized_downloads = _normalize_selected_downloads(selected_downloads)
655
+ pipeline_id = str(uuid4())
656
+
657
+ with _connect(connect_factory) as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
658
+ connection = _load_connection_record(cur, connection_key)
659
+ if not connection:
660
+ raise PnpConnectionNotFoundError(connection_key)
661
+
662
+ cur.execute(
663
+ """
664
+ DELETE FROM raw.pnp_instances
665
+ WHERE instance_key = %s
666
+ AND deleted_at IS NOT NULL
667
+ """,
668
+ (instance_key,),
669
+ )
670
+
671
+ cur.execute(
672
+ """
673
+ INSERT INTO raw.pnp_instances (
674
+ pipeline_id,
675
+ instance_key,
676
+ instance_name,
677
+ connection_key,
678
+ connection_name,
679
+ page_url,
680
+ schedule,
681
+ is_active,
682
+ legacy_mode,
683
+ metadata
684
+ )
685
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
686
+ """,
687
+ (
688
+ pipeline_id,
689
+ instance_key,
690
+ instance_name,
691
+ connection_key,
692
+ connection["connection_name"],
693
+ connection["page_url"],
694
+ schedule,
695
+ is_active,
696
+ "powerbi_microdados",
697
+ Json(
698
+ {
699
+ "selected_source_label": PNP_POWERBI_SOURCE_LABEL,
700
+ "selected_source_group": PNP_POWERBI_GROUP_LABEL,
701
+ "source_path": "powerbi_microdados",
702
+ }
703
+ ),
704
+ ),
705
+ )
706
+
707
+ selection_rows = [
708
+ (
709
+ pipeline_id,
710
+ instance_key,
711
+ item["ano_base"],
712
+ item["tipo_microdados"],
713
+ item["microdados_url"],
714
+ True,
715
+ index,
716
+ Json({"selection_source": "selected_downloads"}),
717
+ )
718
+ for index, item in enumerate(normalized_downloads, start=1)
719
+ ]
720
+ if not selection_rows:
721
+ selection_rows = [
722
+ (
723
+ pipeline_id,
724
+ instance_key,
725
+ year,
726
+ microdados_type,
727
+ None,
728
+ True,
729
+ index,
730
+ Json({"selection_source": "year_type_matrix"}),
731
+ )
732
+ for index, (year, microdados_type) in enumerate(
733
+ [(year, microdados_type) for year in selected_years for microdados_type in selected_microdados_types],
734
+ start=1,
735
+ )
736
+ ]
737
+ execute_values(
738
+ cur,
739
+ """
740
+ INSERT INTO raw.pnp_instance_selection (
741
+ pipeline_id,
742
+ instance_key,
743
+ ano_base,
744
+ tipo_microdados,
745
+ configured_microdados_url,
746
+ is_active,
747
+ selection_rank,
748
+ metadata
749
+ ) VALUES %s
750
+ """,
751
+ selection_rows,
752
+ )
753
+
754
+ _sync_pipeline_endpoints(
755
+ cur,
756
+ pipeline_id=pipeline_id,
757
+ instance_key=instance_key,
758
+ connection_key=str(connection["connection_key"]),
759
+ is_active=is_active,
760
+ )
761
+
762
+ cur.execute(
763
+ """
764
+ UPDATE raw.pnp_instances
765
+ SET
766
+ legacy_endpoint_id = NULL,
767
+ legacy_endpoint_key = NULL
768
+ WHERE instance_key = %s
769
+ """,
770
+ (instance_key,),
771
+ )
772
+
773
+ pnp_dag_provisioner.provision_pipeline_dag(
774
+ pipeline_id=pipeline_id,
775
+ instance_key=instance_key,
776
+ schedule=schedule,
777
+ is_active=is_active,
778
+ )
779
+
780
+
781
+ def update_instance_settings(
782
+ connect_factory: Callable[[], Any],
783
+ *,
784
+ instance_key: str,
785
+ schedule: str | None = None,
786
+ is_active: bool | None = None,
787
+ ) -> None:
788
+ with _connect(connect_factory) as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
789
+ cur.execute(
790
+ """
791
+ SELECT
792
+ pipeline_id,
793
+ instance_key,
794
+ instance_name,
795
+ connection_key,
796
+ connection_name,
797
+ page_url,
798
+ schedule,
799
+ is_active,
800
+ deleted_at
801
+ FROM raw.pnp_instances
802
+ WHERE instance_key = %s
803
+ AND deleted_at IS NULL
804
+ """,
805
+ (instance_key,),
806
+ )
807
+ row = cur.fetchone()
808
+ if not row:
809
+ raise LookupError(instance_key)
810
+
811
+ next_schedule = schedule if schedule is not None else row["schedule"]
812
+ next_is_active = is_active if is_active is not None else row["is_active"]
813
+
814
+ cur.execute(
815
+ """
816
+ UPDATE raw.pnp_instances
817
+ SET
818
+ schedule = %s,
819
+ is_active = %s,
820
+ updated_at = NOW()
821
+ WHERE instance_key = %s
822
+ """,
823
+ (next_schedule, next_is_active, instance_key),
824
+ )
825
+
826
+ _sync_pipeline_endpoints(
827
+ cur,
828
+ pipeline_id=str(row["pipeline_id"]),
829
+ instance_key=instance_key,
830
+ connection_key=str(row["connection_key"]),
831
+ is_active=bool(next_is_active),
832
+ )
833
+
834
+ cur.execute(
835
+ """
836
+ SELECT
837
+ ano_base,
838
+ tipo_microdados,
839
+ configured_microdados_url
840
+ FROM raw.pnp_instance_selection
841
+ WHERE instance_key = %s
842
+ AND is_active = TRUE
843
+ ORDER BY COALESCE(selection_rank, 2147483647), ano_base DESC, tipo_microdados
844
+ """,
845
+ (instance_key,),
846
+ )
847
+ selections = [dict(item) for item in cur.fetchall()]
848
+ selected_years = list(OrderedDict((str(item["ano_base"]), None) for item in selections).keys())
849
+ selected_microdados_types = list(OrderedDict((str(item["tipo_microdados"]), None) for item in selections).keys())
850
+ selected_downloads = _normalize_selected_downloads(
851
+ [
852
+ {
853
+ "ano_base": item["ano_base"],
854
+ "tipo_microdados": item["tipo_microdados"],
855
+ "microdados_url": item["configured_microdados_url"],
856
+ }
857
+ for item in selections
858
+ if item.get("configured_microdados_url")
859
+ ]
860
+ )
861
+
862
+ pnp_dag_provisioner.provision_pipeline_dag(
863
+ pipeline_id=str(row["pipeline_id"]),
864
+ instance_key=instance_key,
865
+ schedule=next_schedule,
866
+ is_active=bool(next_is_active),
867
+ )
868
+
869
+
870
+ def delete_instance(connect_factory: Callable[[], Any], *, instance_key: str) -> dict[str, Any]:
871
+ with _connect(connect_factory) as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
872
+ cur.execute(
873
+ """
874
+ SELECT
875
+ pipeline_id,
876
+ instance_key,
877
+ instance_name,
878
+ legacy_endpoint_id,
879
+ deleted_at
880
+ FROM raw.pnp_instances
881
+ WHERE instance_key = %s
882
+ """,
883
+ (instance_key,),
884
+ )
885
+ row = cur.fetchone()
886
+ if not row:
887
+ raise LookupError(instance_key)
888
+ cur.execute(
889
+ """
890
+ DELETE FROM raw.pnp_instances
891
+ WHERE instance_key = %s
892
+ """,
893
+ (instance_key,),
894
+ )
895
+ deleted_count = cur.rowcount
896
+
897
+ pnp_dag_provisioner.remove_pipeline_dag(
898
+ instance_key=instance_key,
899
+ pipeline_id=str(row["pipeline_id"]),
900
+ )
901
+
902
+ return {
903
+ "pipeline_id": str(row["pipeline_id"]),
904
+ "instance_key": str(row["instance_key"]),
905
+ "instance_name": str(row["instance_name"]),
906
+ "deleted_endpoint_count": deleted_count,
907
+ "mode": "physical_delete",
908
+ "already_deleted": deleted_count == 0,
909
+ }
910
+
911
+
912
+ def delete_connection(connect_factory: Callable[[], Any], *, connection_key: str) -> dict[str, Any]:
913
+ with _connect(connect_factory) as conn, conn.cursor(cursor_factory=RealDictCursor) as cur:
914
+ connection = _load_connection_record(cur, connection_key, include_deleted=True)
915
+ if not connection:
916
+ raise PnpConnectionNotFoundError(connection_key)
917
+
918
+ cur.execute(
919
+ """
920
+ SELECT instance_key
921
+ FROM raw.pnp_instances
922
+ WHERE connection_key = %s
923
+ AND deleted_at IS NULL
924
+ ORDER BY instance_key
925
+ """,
926
+ (connection_key,),
927
+ )
928
+ instance_keys = [str(item["instance_key"]) for item in cur.fetchall()]
929
+
930
+ deleted_instances = 0
931
+ for instance_key in instance_keys:
932
+ result = delete_instance(connect_factory, instance_key=instance_key)
933
+ deleted_instances += int(result.get("deleted_endpoint_count") or 0)
934
+
935
+ with _connect(connect_factory) as conn, conn.cursor() as cur:
936
+ cur.execute(
937
+ """
938
+ DELETE FROM raw.pnp_connections
939
+ WHERE connection_key = %s
940
+ """,
941
+ (connection_key,),
942
+ )
943
+ deleted_connection_rows = cur.rowcount
944
+
945
+ return {
946
+ "connection_key": str(connection["connection_key"]),
947
+ "connection_name": str(connection["connection_name"]),
948
+ "deleted_endpoint_count": deleted_instances + deleted_connection_rows,
949
+ "mode": "physical_delete",
950
+ "already_deleted": deleted_connection_rows == 0,
951
+ }