@dataif/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +16 -0
  2. package/bin/dataif.js +623 -0
  3. package/package.json +26 -0
  4. package/scripts/build-template.mjs +72 -0
  5. package/templates/dataif/README.md +157 -0
  6. package/templates/dataif/infra/.env.example +119 -0
  7. package/templates/dataif/infra/.env.stg.example +119 -0
  8. package/templates/dataif/infra/airflow/Dockerfile +11 -0
  9. package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
  10. package/templates/dataif/infra/airflow/requirements.txt +3 -0
  11. package/templates/dataif/infra/docker-compose.yml +306 -0
  12. package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
  13. package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
  14. package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
  15. package/templates/dataif/infra/keycloak/Dockerfile +4 -0
  16. package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
  17. package/templates/dataif/infra/ollama/Dockerfile +9 -0
  18. package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
  19. package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
  20. package/templates/dataif/infra/postgres/Dockerfile +4 -0
  21. package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
  22. package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
  23. package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
  24. package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
  25. package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
  26. package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
  27. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
  28. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
  29. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
  30. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
  31. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
  32. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
  33. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
  34. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
  35. package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
  36. package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
  37. package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
  38. package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
  39. package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
  40. package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
  41. package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
  42. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
  43. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
  44. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
  45. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
  46. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
  47. package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
  48. package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
  49. package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  50. package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
  51. package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
  52. package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  53. package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  54. package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  55. package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  56. package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  57. package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  58. package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  59. package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
  60. package/templates/dataif/scripts/configure-env.sh +149 -0
  61. package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
  62. package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
  63. package/templates/dataif/scripts/deploy.sh +79 -0
  64. package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
  65. package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
  66. package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
  67. package/templates/dataif/scripts/publish-images.sh +31 -0
  68. package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
  69. package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
  70. package/templates/dataif/services/api/.dockerignore +18 -0
  71. package/templates/dataif/services/api/Dockerfile +12 -0
  72. package/templates/dataif/services/api/app/__init__.py +1 -0
  73. package/templates/dataif/services/api/app/auth.py +48 -0
  74. package/templates/dataif/services/api/app/config.py +59 -0
  75. package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
  76. package/templates/dataif/services/api/app/main.py +2432 -0
  77. package/templates/dataif/services/api/app/metabase_admin.py +191 -0
  78. package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
  79. package/templates/dataif/services/api/app/metabase_embed.py +15 -0
  80. package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
  81. package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
  82. package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
  83. package/templates/dataif/services/api/app/vanna_client.py +32 -0
  84. package/templates/dataif/services/api/requirements.txt +9 -0
  85. package/templates/dataif/services/vanna/.dockerignore +18 -0
  86. package/templates/dataif/services/vanna/Dockerfile +12 -0
  87. package/templates/dataif/services/vanna/app/config.py +57 -0
  88. package/templates/dataif/services/vanna/app/main.py +108 -0
  89. package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
  90. package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
  91. package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
  92. package/templates/dataif/services/vanna/requirements.txt +8 -0
  93. package/templates/dataif/services/web/.dockerignore +13 -0
  94. package/templates/dataif/services/web/Dockerfile +16 -0
  95. package/templates/dataif/services/web/index.html +12 -0
  96. package/templates/dataif/services/web/nginx.conf +74 -0
  97. package/templates/dataif/services/web/package-lock.json +4397 -0
  98. package/templates/dataif/services/web/package.json +32 -0
  99. package/templates/dataif/services/web/postcss.config.mjs +5 -0
  100. package/templates/dataif/services/web/src/App.jsx +2817 -0
  101. package/templates/dataif/services/web/src/adminAuth.js +245 -0
  102. package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
  103. package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
  104. package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
  105. package/templates/dataif/services/web/src/assets/if.svg +0 -0
  106. package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
  107. package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
  108. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
  109. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
  110. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
  111. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
  112. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
  113. package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
  114. package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
  115. package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
  116. package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
  117. package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
  118. package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
  119. package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
  120. package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
  121. package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
  122. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
  123. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
  124. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
  125. package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
  126. package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
  127. package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
  128. package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
  129. package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
  130. package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
  131. package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
  132. package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
  133. package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
  134. package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
  135. package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
  136. package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
  137. package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
  138. package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
  139. package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
  140. package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
  141. package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
  142. package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
  143. package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
  144. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
  145. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
  146. package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
  147. package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
  148. package/templates/dataif/services/web/src/main.jsx +14 -0
  149. package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
  150. package/templates/dataif/services/web/src/styles/globals.css +60 -0
  151. package/templates/dataif/services/web/src/styles/theme.css +1326 -0
  152. package/templates/dataif/services/web/src/styles/typography.css +430 -0
  153. package/templates/dataif/services/web/src/styles.css +1287 -0
  154. package/templates/dataif/services/web/src/utils/cx.ts +24 -0
  155. package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
  156. package/templates/dataif/services/web/vite.config.js +14 -0
  157. package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
  158. package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
  159. package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
  160. package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
  161. package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
  162. package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
  163. package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
  164. package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
  165. package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
  166. package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
  167. package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
  168. package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
  169. package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
  170. package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
  171. package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
  172. package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  173. package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
  174. package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
  175. package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
  176. package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  177. package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  178. package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  179. package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  180. package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  181. package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  182. package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  183. package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
@@ -0,0 +1,783 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import uuid
5
+ from datetime import UTC, datetime
6
+ from typing import Any
7
+
8
+ from dataif_pipelines.connectors.base.types import RunContext
9
+ from dataif_pipelines.connectors.nilo_pecanha.config import load_config
10
+ from dataif_pipelines.connectors.nilo_pecanha.connector import NiloPecanhaConnector
11
+ from dataif_pipelines.repositories import pnp_raw_repository
12
+ from dataif_pipelines.services import (
13
+ pnp_curated_service,
14
+ pnp_quality_service,
15
+ pnp_raw_ingestion_service,
16
+ pnp_staging_service,
17
+ powerbi_catalog_service,
18
+ )
19
+
20
+
21
+ def _warehouse_dsn() -> str:
22
+ dsn = os.getenv("WAREHOUSE_DSN")
23
+ if not dsn:
24
+ raise RuntimeError("WAREHOUSE_DSN is required")
25
+ return dsn
26
+
27
+
28
+ def _utcnow() -> datetime:
29
+ return datetime.now(tz=UTC)
30
+
31
+
32
+ def _step_details(
33
+ *,
34
+ dag_id: str,
35
+ dag_run_id: str,
36
+ task_id: str,
37
+ logical_date: str | None,
38
+ map_index: int | None,
39
+ extra: dict[str, Any] | None = None,
40
+ ) -> dict[str, Any]:
41
+ details = {
42
+ "airflow_dag_id": dag_id,
43
+ "airflow_dag_run_id": dag_run_id,
44
+ "airflow_task_id": task_id,
45
+ "logical_date": logical_date,
46
+ }
47
+ if map_index is not None and map_index >= 0:
48
+ details["map_index"] = map_index
49
+ if extra:
50
+ details.update(extra)
51
+ return details
52
+
53
+
54
+ def _start_step(
55
+ *,
56
+ run_id: str,
57
+ instance_key: str | None,
58
+ dag_id: str,
59
+ dag_run_id: str,
60
+ task_id: str,
61
+ logical_date: str | None,
62
+ map_index: int | None,
63
+ extra: dict[str, Any] | None = None,
64
+ ) -> None:
65
+ pnp_raw_repository.register_run_step_start(
66
+ _warehouse_dsn(),
67
+ run_id=run_id,
68
+ instance_key=instance_key,
69
+ airflow_task_id=task_id,
70
+ map_index=map_index,
71
+ status="running",
72
+ details=_step_details(
73
+ dag_id=dag_id,
74
+ dag_run_id=dag_run_id,
75
+ task_id=task_id,
76
+ logical_date=logical_date,
77
+ map_index=map_index,
78
+ extra=extra,
79
+ ),
80
+ started_at=_utcnow(),
81
+ )
82
+
83
+
84
+ def _finish_step(
85
+ *,
86
+ run_id: str,
87
+ instance_key: str | None,
88
+ dag_id: str,
89
+ dag_run_id: str,
90
+ task_id: str,
91
+ logical_date: str | None,
92
+ map_index: int | None,
93
+ status: str,
94
+ records_affected: int | None,
95
+ error_message: str | None,
96
+ extra: dict[str, Any] | None = None,
97
+ ) -> None:
98
+ details = _step_details(
99
+ dag_id=dag_id,
100
+ dag_run_id=dag_run_id,
101
+ task_id=task_id,
102
+ logical_date=logical_date,
103
+ map_index=map_index,
104
+ extra=extra,
105
+ )
106
+ pnp_raw_repository.finish_run_step(
107
+ _warehouse_dsn(),
108
+ run_id=run_id,
109
+ airflow_task_id=task_id,
110
+ map_index=map_index,
111
+ status=status,
112
+ finished_at=_utcnow(),
113
+ records_affected=records_affected,
114
+ error_message=error_message,
115
+ details=details,
116
+ )
117
+ pnp_raw_repository.append_run_package(
118
+ _warehouse_dsn(),
119
+ run_id=run_id,
120
+ instance_key=instance_key,
121
+ airflow_dag_id=dag_id,
122
+ airflow_dag_run_id=dag_run_id,
123
+ airflow_task_id=task_id,
124
+ package_type="step_result",
125
+ package_name=task_id,
126
+ package_status=status,
127
+ records_affected=records_affected,
128
+ payload={
129
+ **details,
130
+ "status": status,
131
+ "records_affected": records_affected,
132
+ "error_message": error_message,
133
+ },
134
+ )
135
+
136
+
137
+ def resolve_pipeline_targets(
138
+ *,
139
+ dag_id: str,
140
+ dag_run_id: str,
141
+ logical_date: str | None,
142
+ requested_instance_key: str | None,
143
+ requested_operation: str | None,
144
+ requested_by: str,
145
+ ) -> list[dict[str, Any]]:
146
+ operation = str(requested_operation or "sync").strip().lower() or "sync"
147
+ if operation not in {"sync", "validate"}:
148
+ raise RuntimeError(f"Unsupported operation for pnp_pipeline: {operation}")
149
+
150
+ if requested_instance_key:
151
+ instance_key = requested_instance_key.strip()
152
+ if not instance_key:
153
+ raise RuntimeError("instance_key cannot be blank")
154
+ return [
155
+ {
156
+ "instance_key": instance_key,
157
+ "operation": operation,
158
+ "trigger_mode": f"airflow_manual_{operation}",
159
+ "requested_by": requested_by,
160
+ "dag_id": dag_id,
161
+ "dag_run_id": dag_run_id,
162
+ "logical_date": logical_date,
163
+ }
164
+ ]
165
+
166
+ if operation == "validate":
167
+ raise RuntimeError("Manual validation requires an explicit instance_key")
168
+
169
+ from croniter import croniter
170
+
171
+ now = datetime.fromisoformat(logical_date) if logical_date else _utcnow()
172
+ rows = pnp_raw_repository.list_active_instance_schedules(_warehouse_dsn())
173
+ targets: list[dict[str, Any]] = []
174
+ for row in rows:
175
+ schedule = str(row.get("schedule") or "").strip()
176
+ if not schedule or bool(row.get("has_running_run")):
177
+ continue
178
+ last_started_at = row.get("last_started_at")
179
+ if last_started_at is None:
180
+ due = True
181
+ else:
182
+ due = croniter(schedule, last_started_at).get_next(datetime) <= now
183
+ if not due:
184
+ continue
185
+ targets.append(
186
+ {
187
+ "instance_key": str(row["instance_key"]),
188
+ "operation": "sync",
189
+ "trigger_mode": "airflow_scheduled_sync",
190
+ "requested_by": requested_by,
191
+ "dag_id": dag_id,
192
+ "dag_run_id": dag_run_id,
193
+ "logical_date": logical_date,
194
+ }
195
+ )
196
+ return targets
197
+
198
+
199
+ def register_pipeline_run(
200
+ target: dict[str, Any],
201
+ *,
202
+ task_id: str,
203
+ map_index: int | None,
204
+ ) -> dict[str, Any]:
205
+ instance_key = str(target["instance_key"])
206
+ pipeline_id = str(target.get("pipeline_id") or "")
207
+ dag_id = str(target["dag_id"])
208
+ dag_run_id = str(target["dag_run_id"])
209
+ logical_date = target.get("logical_date")
210
+ operation = str(target.get("operation") or "sync")
211
+ trigger_mode = str(target.get("trigger_mode") or f"airflow_manual_{operation}")
212
+ requested_by = str(target.get("requested_by") or f"airflow.{dag_id}")
213
+ run_id = str(uuid.uuid4())
214
+ started_at = _utcnow()
215
+
216
+ pnp_raw_repository.register_run_start(
217
+ _warehouse_dsn(),
218
+ run_id=run_id,
219
+ instance_key=instance_key,
220
+ airflow_dag_id=dag_id,
221
+ airflow_dag_run_id=dag_run_id,
222
+ status="running",
223
+ trigger_mode=trigger_mode,
224
+ requested_by=requested_by,
225
+ logical_date=datetime.fromisoformat(logical_date) if logical_date else started_at,
226
+ started_at=started_at,
227
+ )
228
+ _start_step(
229
+ run_id=run_id,
230
+ instance_key=instance_key,
231
+ dag_id=dag_id,
232
+ dag_run_id=dag_run_id,
233
+ task_id=task_id,
234
+ logical_date=logical_date,
235
+ map_index=map_index,
236
+ extra={"operation": operation, "trigger_mode": trigger_mode},
237
+ )
238
+ result = {
239
+ "run_id": run_id,
240
+ "instance_key": instance_key,
241
+ "pipeline_id": pipeline_id,
242
+ "operation": operation,
243
+ "dag_id": dag_id,
244
+ "dag_run_id": dag_run_id,
245
+ "logical_date": logical_date,
246
+ "requested_by": requested_by,
247
+ "trigger_mode": trigger_mode,
248
+ "started_at": started_at.isoformat(),
249
+ }
250
+ _finish_step(
251
+ run_id=run_id,
252
+ instance_key=instance_key,
253
+ dag_id=dag_id,
254
+ dag_run_id=dag_run_id,
255
+ task_id=task_id,
256
+ logical_date=logical_date,
257
+ map_index=map_index,
258
+ status="success",
259
+ records_affected=1,
260
+ error_message=None,
261
+ extra=result,
262
+ )
263
+ return result
264
+
265
+
266
+ def load_instance_config(
267
+ run_ref: dict[str, Any],
268
+ *,
269
+ task_id: str,
270
+ map_index: int | None,
271
+ ) -> dict[str, Any]:
272
+ run_id = str(run_ref["run_id"])
273
+ instance_key = str(run_ref["instance_key"])
274
+ dag_id = str(run_ref["dag_id"])
275
+ dag_run_id = str(run_ref["dag_run_id"])
276
+ logical_date = run_ref.get("logical_date")
277
+ _start_step(
278
+ run_id=run_id,
279
+ instance_key=instance_key,
280
+ dag_id=dag_id,
281
+ dag_run_id=dag_run_id,
282
+ task_id=task_id,
283
+ logical_date=logical_date,
284
+ map_index=map_index,
285
+ extra={"operation": run_ref.get("operation")},
286
+ )
287
+ try:
288
+ config = pnp_raw_repository.load_instance_runtime_config(_warehouse_dsn(), instance_key=instance_key)
289
+ _finish_step(
290
+ run_id=run_id,
291
+ instance_key=instance_key,
292
+ dag_id=dag_id,
293
+ dag_run_id=dag_run_id,
294
+ task_id=task_id,
295
+ logical_date=logical_date,
296
+ map_index=map_index,
297
+ status="success",
298
+ records_affected=len(config.get("selection_rows") or []),
299
+ error_message=None,
300
+ extra={
301
+ "operation": run_ref.get("operation"),
302
+ "page_url": config.get("page_url"),
303
+ "selection_count": len(config.get("selection_rows") or []),
304
+ "connection_key": config.get("connection_key"),
305
+ },
306
+ )
307
+ return config
308
+ except Exception as exc:
309
+ _finish_step(
310
+ run_id=run_id,
311
+ instance_key=instance_key,
312
+ dag_id=dag_id,
313
+ dag_run_id=dag_run_id,
314
+ task_id=task_id,
315
+ logical_date=logical_date,
316
+ map_index=map_index,
317
+ status="failed",
318
+ records_affected=0,
319
+ error_message=str(exc),
320
+ extra={"operation": run_ref.get("operation")},
321
+ )
322
+ raise
323
+
324
+
325
+ def resolve_catalog(
326
+ run_ref: dict[str, Any],
327
+ instance_config: dict[str, Any],
328
+ *,
329
+ task_id: str,
330
+ map_index: int | None,
331
+ ) -> dict[str, Any]:
332
+ run_id = str(run_ref["run_id"])
333
+ instance_key = str(run_ref["instance_key"])
334
+ dag_id = str(run_ref["dag_id"])
335
+ dag_run_id = str(run_ref["dag_run_id"])
336
+ logical_date = run_ref.get("logical_date")
337
+ _start_step(
338
+ run_id=run_id,
339
+ instance_key=instance_key,
340
+ dag_id=dag_id,
341
+ dag_run_id=dag_run_id,
342
+ task_id=task_id,
343
+ logical_date=logical_date,
344
+ map_index=map_index,
345
+ extra={"operation": run_ref.get("operation")},
346
+ )
347
+ try:
348
+ config = load_config()
349
+ client = powerbi_catalog_service.create_powerbi_client(
350
+ page_url=str(instance_config["page_url"]),
351
+ timeout_seconds=config.timeout_seconds,
352
+ )
353
+ selection = powerbi_catalog_service.resolve_catalog_selection(
354
+ client=client,
355
+ request_params=dict(instance_config["request_params"]),
356
+ )
357
+ catalog_rows = powerbi_catalog_service.build_catalog_entry_rows(run_id=run_id, selection=selection)
358
+ selection_rows = powerbi_catalog_service.build_run_selection_rows(run_id=run_id, selection=selection)
359
+ pnp_raw_ingestion_service.upsert_raw_metadata(
360
+ _warehouse_dsn(),
361
+ pending_assets=[],
362
+ pending_catalog_entries=catalog_rows,
363
+ pending_run_selection=selection_rows,
364
+ pending_downloads=[],
365
+ write_legacy=False,
366
+ include_download_columns=False,
367
+ )
368
+ result = {
369
+ "operation": run_ref.get("operation"),
370
+ "catalog_entry_count": len(catalog_rows),
371
+ "selected_download_count": len(selection_rows),
372
+ "selection_source": selection.selection_source,
373
+ "visual_id": selection.context.visual_id,
374
+ "visual_type": selection.context.visual_type,
375
+ "section_display_name": selection.context.section_display_name,
376
+ }
377
+ _finish_step(
378
+ run_id=run_id,
379
+ instance_key=instance_key,
380
+ dag_id=dag_id,
381
+ dag_run_id=dag_run_id,
382
+ task_id=task_id,
383
+ logical_date=logical_date,
384
+ map_index=map_index,
385
+ status="success",
386
+ records_affected=len(selection_rows),
387
+ error_message=None,
388
+ extra=result,
389
+ )
390
+ return result
391
+ except Exception as exc:
392
+ _finish_step(
393
+ run_id=run_id,
394
+ instance_key=instance_key,
395
+ dag_id=dag_id,
396
+ dag_run_id=dag_run_id,
397
+ task_id=task_id,
398
+ logical_date=logical_date,
399
+ map_index=map_index,
400
+ status="failed",
401
+ records_affected=0,
402
+ error_message=str(exc),
403
+ extra={"operation": run_ref.get("operation")},
404
+ )
405
+ raise
406
+
407
+
408
+ def sync_raw(
409
+ run_ref: dict[str, Any],
410
+ instance_config: dict[str, Any],
411
+ *,
412
+ task_id: str,
413
+ map_index: int | None,
414
+ ) -> dict[str, Any]:
415
+ run_id = str(run_ref["run_id"])
416
+ instance_key = str(run_ref["instance_key"])
417
+ dag_id = str(run_ref["dag_id"])
418
+ dag_run_id = str(run_ref["dag_run_id"])
419
+ logical_date = run_ref.get("logical_date")
420
+ operation = str(run_ref.get("operation") or "sync")
421
+ _start_step(
422
+ run_id=run_id,
423
+ instance_key=instance_key,
424
+ dag_id=dag_id,
425
+ dag_run_id=dag_run_id,
426
+ task_id=task_id,
427
+ logical_date=logical_date,
428
+ map_index=map_index,
429
+ extra={"operation": operation},
430
+ )
431
+ if operation != "sync":
432
+ result = {"operation": operation, "skipped": True, "reason": "validate_only"}
433
+ _finish_step(
434
+ run_id=run_id,
435
+ instance_key=instance_key,
436
+ dag_id=dag_id,
437
+ dag_run_id=dag_run_id,
438
+ task_id=task_id,
439
+ logical_date=logical_date,
440
+ map_index=map_index,
441
+ status="success",
442
+ records_affected=0,
443
+ error_message=None,
444
+ extra=result,
445
+ )
446
+ return result
447
+ try:
448
+ config = load_config()
449
+ connector = NiloPecanhaConnector(dsn=_warehouse_dsn(), config=config)
450
+ run_context = RunContext(
451
+ run_id=run_id,
452
+ started_at=_utcnow(),
453
+ source_url=str(instance_config["page_url"]),
454
+ )
455
+ loaded_count = connector.extract_and_load_raw(run_context, instance_key=instance_key)
456
+ result = {
457
+ "operation": operation,
458
+ "loaded_count": loaded_count,
459
+ "runtime": connector.runtime_stats(),
460
+ }
461
+ _finish_step(
462
+ run_id=run_id,
463
+ instance_key=instance_key,
464
+ dag_id=dag_id,
465
+ dag_run_id=dag_run_id,
466
+ task_id=task_id,
467
+ logical_date=logical_date,
468
+ map_index=map_index,
469
+ status="success",
470
+ records_affected=loaded_count,
471
+ error_message=None,
472
+ extra=result,
473
+ )
474
+ return result
475
+ except Exception as exc:
476
+ pnp_raw_repository.mark_run_downloads_failed(
477
+ _warehouse_dsn(),
478
+ run_id=run_id,
479
+ error_message=str(exc),
480
+ )
481
+ _finish_step(
482
+ run_id=run_id,
483
+ instance_key=instance_key,
484
+ dag_id=dag_id,
485
+ dag_run_id=dag_run_id,
486
+ task_id=task_id,
487
+ logical_date=logical_date,
488
+ map_index=map_index,
489
+ status="failed",
490
+ records_affected=0,
491
+ error_message=str(exc),
492
+ extra={"operation": operation},
493
+ )
494
+ raise
495
+
496
+
497
+ def materialize_staging(
498
+ run_ref: dict[str, Any],
499
+ *,
500
+ task_id: str,
501
+ map_index: int | None,
502
+ ) -> dict[str, Any]:
503
+ run_id = str(run_ref["run_id"])
504
+ instance_key = str(run_ref["instance_key"])
505
+ dag_id = str(run_ref["dag_id"])
506
+ dag_run_id = str(run_ref["dag_run_id"])
507
+ logical_date = run_ref.get("logical_date")
508
+ operation = str(run_ref.get("operation") or "sync")
509
+ _start_step(
510
+ run_id=run_id,
511
+ instance_key=instance_key,
512
+ dag_id=dag_id,
513
+ dag_run_id=dag_run_id,
514
+ task_id=task_id,
515
+ logical_date=logical_date,
516
+ map_index=map_index,
517
+ extra={"operation": operation},
518
+ )
519
+ if operation != "sync":
520
+ result = {"operation": operation, "skipped": True, "reason": "validate_only"}
521
+ _finish_step(
522
+ run_id=run_id,
523
+ instance_key=instance_key,
524
+ dag_id=dag_id,
525
+ dag_run_id=dag_run_id,
526
+ task_id=task_id,
527
+ logical_date=logical_date,
528
+ map_index=map_index,
529
+ status="success",
530
+ records_affected=0,
531
+ error_message=None,
532
+ extra=result,
533
+ )
534
+ return result
535
+ try:
536
+ result = pnp_staging_service.materialize_instance_staging(
537
+ _warehouse_dsn(),
538
+ run_id=run_id,
539
+ instance_key=instance_key,
540
+ )
541
+ result["operation"] = operation
542
+ _finish_step(
543
+ run_id=run_id,
544
+ instance_key=instance_key,
545
+ dag_id=dag_id,
546
+ dag_run_id=dag_run_id,
547
+ task_id=task_id,
548
+ logical_date=logical_date,
549
+ map_index=map_index,
550
+ status="success",
551
+ records_affected=int(result.get("deduplicated_record_count") or 0),
552
+ error_message=None,
553
+ extra=result,
554
+ )
555
+ return result
556
+ except Exception as exc:
557
+ _finish_step(
558
+ run_id=run_id,
559
+ instance_key=instance_key,
560
+ dag_id=dag_id,
561
+ dag_run_id=dag_run_id,
562
+ task_id=task_id,
563
+ logical_date=logical_date,
564
+ map_index=map_index,
565
+ status="failed",
566
+ records_affected=0,
567
+ error_message=str(exc),
568
+ extra={"operation": operation},
569
+ )
570
+ raise
571
+
572
+
573
+ def materialize_curated(
574
+ run_ref: dict[str, Any],
575
+ *,
576
+ task_id: str,
577
+ map_index: int | None,
578
+ ) -> dict[str, Any]:
579
+ run_id = str(run_ref["run_id"])
580
+ instance_key = str(run_ref["instance_key"])
581
+ dag_id = str(run_ref["dag_id"])
582
+ dag_run_id = str(run_ref["dag_run_id"])
583
+ logical_date = run_ref.get("logical_date")
584
+ operation = str(run_ref.get("operation") or "sync")
585
+ _start_step(
586
+ run_id=run_id,
587
+ instance_key=instance_key,
588
+ dag_id=dag_id,
589
+ dag_run_id=dag_run_id,
590
+ task_id=task_id,
591
+ logical_date=logical_date,
592
+ map_index=map_index,
593
+ extra={"operation": operation},
594
+ )
595
+ if operation != "sync":
596
+ result = {"operation": operation, "skipped": True, "reason": "validate_only"}
597
+ _finish_step(
598
+ run_id=run_id,
599
+ instance_key=instance_key,
600
+ dag_id=dag_id,
601
+ dag_run_id=dag_run_id,
602
+ task_id=task_id,
603
+ logical_date=logical_date,
604
+ map_index=map_index,
605
+ status="success",
606
+ records_affected=0,
607
+ error_message=None,
608
+ extra=result,
609
+ )
610
+ return result
611
+ try:
612
+ result = pnp_curated_service.materialize_instance_curated(
613
+ _warehouse_dsn(),
614
+ run_id=run_id,
615
+ instance_key=instance_key,
616
+ )
617
+ result["operation"] = operation
618
+ _finish_step(
619
+ run_id=run_id,
620
+ instance_key=instance_key,
621
+ dag_id=dag_id,
622
+ dag_run_id=dag_run_id,
623
+ task_id=task_id,
624
+ logical_date=logical_date,
625
+ map_index=map_index,
626
+ status="success",
627
+ records_affected=int(result.get("vanna_resumo_count") or 0),
628
+ error_message=None,
629
+ extra=result,
630
+ )
631
+ return result
632
+ except Exception as exc:
633
+ _finish_step(
634
+ run_id=run_id,
635
+ instance_key=instance_key,
636
+ dag_id=dag_id,
637
+ dag_run_id=dag_run_id,
638
+ task_id=task_id,
639
+ logical_date=logical_date,
640
+ map_index=map_index,
641
+ status="failed",
642
+ records_affected=0,
643
+ error_message=str(exc),
644
+ extra={"operation": operation},
645
+ )
646
+ raise
647
+
648
+
649
+ def run_quality_checks(
650
+ run_ref: dict[str, Any],
651
+ *,
652
+ task_id: str,
653
+ map_index: int | None,
654
+ ) -> dict[str, Any]:
655
+ run_id = str(run_ref["run_id"])
656
+ instance_key = str(run_ref["instance_key"])
657
+ dag_id = str(run_ref["dag_id"])
658
+ dag_run_id = str(run_ref["dag_run_id"])
659
+ logical_date = run_ref.get("logical_date")
660
+ operation = str(run_ref.get("operation") or "sync")
661
+ _start_step(
662
+ run_id=run_id,
663
+ instance_key=instance_key,
664
+ dag_id=dag_id,
665
+ dag_run_id=dag_run_id,
666
+ task_id=task_id,
667
+ logical_date=logical_date,
668
+ map_index=map_index,
669
+ extra={"operation": operation},
670
+ )
671
+ try:
672
+ checks = pnp_quality_service.collect_run_checks(_warehouse_dsn(), run_id)
673
+ checks["operation"] = operation
674
+ _finish_step(
675
+ run_id=run_id,
676
+ instance_key=instance_key,
677
+ dag_id=dag_id,
678
+ dag_run_id=dag_run_id,
679
+ task_id=task_id,
680
+ logical_date=logical_date,
681
+ map_index=map_index,
682
+ status="success",
683
+ records_affected=int(checks.get("raw_count") or 0),
684
+ error_message=None,
685
+ extra=checks,
686
+ )
687
+ return checks
688
+ except Exception as exc:
689
+ _finish_step(
690
+ run_id=run_id,
691
+ instance_key=instance_key,
692
+ dag_id=dag_id,
693
+ dag_run_id=dag_run_id,
694
+ task_id=task_id,
695
+ logical_date=logical_date,
696
+ map_index=map_index,
697
+ status="failed",
698
+ records_affected=0,
699
+ error_message=str(exc),
700
+ extra={"operation": operation},
701
+ )
702
+ raise
703
+
704
+
705
+ def finalize_run(
706
+ run_ref: dict[str, Any],
707
+ *,
708
+ dag_status: str,
709
+ task_states: dict[str, str],
710
+ task_id: str,
711
+ map_index: int | None,
712
+ checks: dict[str, Any] | None = None,
713
+ error_message: str | None = None,
714
+ ) -> dict[str, Any]:
715
+ run_id = str(run_ref["run_id"])
716
+ instance_key = str(run_ref["instance_key"])
717
+ dag_id = str(run_ref["dag_id"])
718
+ dag_run_id = str(run_ref["dag_run_id"])
719
+ logical_date = run_ref.get("logical_date")
720
+ operation = str(run_ref.get("operation") or "sync")
721
+ _start_step(
722
+ run_id=run_id,
723
+ instance_key=instance_key,
724
+ dag_id=dag_id,
725
+ dag_run_id=dag_run_id,
726
+ task_id=task_id,
727
+ logical_date=logical_date,
728
+ map_index=map_index,
729
+ extra={"dag_status": dag_status, "operation": operation},
730
+ )
731
+ resolved_checks = checks or pnp_quality_service.collect_run_checks(_warehouse_dsn(), run_id)
732
+ failed_tasks = sorted(task_name for task_name, state in task_states.items() if state in {"failed", "upstream_failed"})
733
+ final_status = "success" if dag_status == "success" and not failed_tasks else "failed"
734
+ resolved_error = error_message
735
+ if not resolved_error and failed_tasks:
736
+ resolved_error = f"Airflow tasks failed: {', '.join(failed_tasks)}"
737
+
738
+ pnp_raw_repository.finish_run(
739
+ _warehouse_dsn(),
740
+ run_id=run_id,
741
+ status=final_status,
742
+ catalog_entry_count=int(resolved_checks.get("catalog_entry_count") or 0),
743
+ selected_download_count=int(resolved_checks.get("run_selection_count") or 0),
744
+ downloaded_file_count=int(resolved_checks.get("download_count") or 0),
745
+ raw_record_count=int(resolved_checks.get("raw_count") or 0),
746
+ error_message=resolved_error,
747
+ run_summary={
748
+ "operation": operation,
749
+ "checks": resolved_checks,
750
+ "task_states": task_states,
751
+ "dag_status": final_status,
752
+ },
753
+ finished_at=_utcnow(),
754
+ )
755
+ if final_status != "success":
756
+ pnp_raw_repository.mark_run_downloads_failed(
757
+ _warehouse_dsn(),
758
+ run_id=run_id,
759
+ error_message=resolved_error or "Run finished with failed Airflow tasks",
760
+ )
761
+
762
+ result = {
763
+ "run_id": run_id,
764
+ "operation": operation,
765
+ "status": final_status,
766
+ "error_message": resolved_error,
767
+ "checks": resolved_checks,
768
+ "task_states": task_states,
769
+ }
770
+ _finish_step(
771
+ run_id=run_id,
772
+ instance_key=instance_key,
773
+ dag_id=dag_id,
774
+ dag_run_id=dag_run_id,
775
+ task_id=task_id,
776
+ logical_date=logical_date,
777
+ map_index=map_index,
778
+ status="success",
779
+ records_affected=int(resolved_checks.get("raw_count") or 0),
780
+ error_message=None,
781
+ extra=result,
782
+ )
783
+ return result