@dataif/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +16 -0
  2. package/bin/dataif.js +623 -0
  3. package/package.json +26 -0
  4. package/scripts/build-template.mjs +72 -0
  5. package/templates/dataif/README.md +157 -0
  6. package/templates/dataif/infra/.env.example +119 -0
  7. package/templates/dataif/infra/.env.stg.example +119 -0
  8. package/templates/dataif/infra/airflow/Dockerfile +11 -0
  9. package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
  10. package/templates/dataif/infra/airflow/requirements.txt +3 -0
  11. package/templates/dataif/infra/docker-compose.yml +306 -0
  12. package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
  13. package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
  14. package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
  15. package/templates/dataif/infra/keycloak/Dockerfile +4 -0
  16. package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
  17. package/templates/dataif/infra/ollama/Dockerfile +9 -0
  18. package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
  19. package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
  20. package/templates/dataif/infra/postgres/Dockerfile +4 -0
  21. package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
  22. package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
  23. package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
  24. package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
  25. package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
  26. package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
  27. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
  28. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
  29. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
  30. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
  31. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
  32. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
  33. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
  34. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
  35. package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
  36. package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
  37. package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
  38. package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
  39. package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
  40. package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
  41. package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
  42. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
  43. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
  44. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
  45. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
  46. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
  47. package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
  48. package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
  49. package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  50. package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
  51. package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
  52. package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  53. package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  54. package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  55. package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  56. package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  57. package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  58. package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  59. package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
  60. package/templates/dataif/scripts/configure-env.sh +149 -0
  61. package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
  62. package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
  63. package/templates/dataif/scripts/deploy.sh +79 -0
  64. package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
  65. package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
  66. package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
  67. package/templates/dataif/scripts/publish-images.sh +31 -0
  68. package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
  69. package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
  70. package/templates/dataif/services/api/.dockerignore +18 -0
  71. package/templates/dataif/services/api/Dockerfile +12 -0
  72. package/templates/dataif/services/api/app/__init__.py +1 -0
  73. package/templates/dataif/services/api/app/auth.py +48 -0
  74. package/templates/dataif/services/api/app/config.py +59 -0
  75. package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
  76. package/templates/dataif/services/api/app/main.py +2432 -0
  77. package/templates/dataif/services/api/app/metabase_admin.py +191 -0
  78. package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
  79. package/templates/dataif/services/api/app/metabase_embed.py +15 -0
  80. package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
  81. package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
  82. package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
  83. package/templates/dataif/services/api/app/vanna_client.py +32 -0
  84. package/templates/dataif/services/api/requirements.txt +9 -0
  85. package/templates/dataif/services/vanna/.dockerignore +18 -0
  86. package/templates/dataif/services/vanna/Dockerfile +12 -0
  87. package/templates/dataif/services/vanna/app/config.py +57 -0
  88. package/templates/dataif/services/vanna/app/main.py +108 -0
  89. package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
  90. package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
  91. package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
  92. package/templates/dataif/services/vanna/requirements.txt +8 -0
  93. package/templates/dataif/services/web/.dockerignore +13 -0
  94. package/templates/dataif/services/web/Dockerfile +16 -0
  95. package/templates/dataif/services/web/index.html +12 -0
  96. package/templates/dataif/services/web/nginx.conf +74 -0
  97. package/templates/dataif/services/web/package-lock.json +4397 -0
  98. package/templates/dataif/services/web/package.json +32 -0
  99. package/templates/dataif/services/web/postcss.config.mjs +5 -0
  100. package/templates/dataif/services/web/src/App.jsx +2817 -0
  101. package/templates/dataif/services/web/src/adminAuth.js +245 -0
  102. package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
  103. package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
  104. package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
  105. package/templates/dataif/services/web/src/assets/if.svg +0 -0
  106. package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
  107. package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
  108. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
  109. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
  110. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
  111. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
  112. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
  113. package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
  114. package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
  115. package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
  116. package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
  117. package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
  118. package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
  119. package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
  120. package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
  121. package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
  122. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
  123. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
  124. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
  125. package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
  126. package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
  127. package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
  128. package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
  129. package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
  130. package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
  131. package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
  132. package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
  133. package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
  134. package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
  135. package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
  136. package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
  137. package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
  138. package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
  139. package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
  140. package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
  141. package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
  142. package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
  143. package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
  144. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
  145. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
  146. package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
  147. package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
  148. package/templates/dataif/services/web/src/main.jsx +14 -0
  149. package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
  150. package/templates/dataif/services/web/src/styles/globals.css +60 -0
  151. package/templates/dataif/services/web/src/styles/theme.css +1326 -0
  152. package/templates/dataif/services/web/src/styles/typography.css +430 -0
  153. package/templates/dataif/services/web/src/styles.css +1287 -0
  154. package/templates/dataif/services/web/src/utils/cx.ts +24 -0
  155. package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
  156. package/templates/dataif/services/web/vite.config.js +14 -0
  157. package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
  158. package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
  159. package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
  160. package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
  161. package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
  162. package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
  163. package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
  164. package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
  165. package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
  166. package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
  167. package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
  168. package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
  169. package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
  170. package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
  171. package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
  172. package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  173. package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
  174. package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
  175. package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
  176. package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  177. package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  178. package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  179. package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  180. package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  181. package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  182. package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  183. package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
@@ -0,0 +1,73 @@
1
+ {
2
+ "realm": "dataif",
3
+ "enabled": true,
4
+ "displayName": "dataif",
5
+ "roles": {
6
+ "realm": [
7
+ {
8
+ "name": "admin"
9
+ },
10
+ {
11
+ "name": "viewer"
12
+ }
13
+ ]
14
+ },
15
+ "users": [
16
+ {
17
+ "username": "dataif-admin",
18
+ "enabled": true,
19
+ "emailVerified": true,
20
+ "firstName": "Dataif",
21
+ "lastName": "Admin",
22
+ "email": "admin@dataif.local",
23
+ "credentials": [
24
+ {
25
+ "type": "password",
26
+ "value": "admin",
27
+ "temporary": false
28
+ }
29
+ ],
30
+ "realmRoles": [
31
+ "admin"
32
+ ]
33
+ }
34
+ ],
35
+ "clients": [
36
+ {
37
+ "clientId": "dataif-web",
38
+ "enabled": true,
39
+ "publicClient": true,
40
+ "standardFlowEnabled": true,
41
+ "directAccessGrantsEnabled": true,
42
+ "redirectUris": [
43
+ "http://localhost:5173/*",
44
+ "http://localhost:5174/*"
45
+ ],
46
+ "webOrigins": [
47
+ "http://localhost:5173",
48
+ "http://localhost:5174"
49
+ ],
50
+ "protocolMappers": [
51
+ {
52
+ "name": "aud-dataif-api",
53
+ "protocol": "openid-connect",
54
+ "protocolMapper": "oidc-audience-mapper",
55
+ "consentRequired": false,
56
+ "config": {
57
+ "included.client.audience": "dataif-api",
58
+ "id.token.claim": "false",
59
+ "access.token.claim": "true"
60
+ }
61
+ }
62
+ ]
63
+ },
64
+ {
65
+ "clientId": "dataif-api",
66
+ "enabled": true,
67
+ "publicClient": false,
68
+ "serviceAccountsEnabled": true,
69
+ "secret": "dataif-api-secret",
70
+ "protocol": "openid-connect"
71
+ }
72
+ ]
73
+ }
@@ -0,0 +1,9 @@
1
+ FROM curlimages/curl:8.11.1
2
+
3
+ USER root
4
+ WORKDIR /bootstrap
5
+
6
+ COPY bootstrap-model.sh sabia-7b.Modelfile ./
7
+ RUN chmod +x /bootstrap/bootstrap-model.sh
8
+
9
+ ENTRYPOINT ["/bootstrap/bootstrap-model.sh"]
@@ -0,0 +1,100 @@
1
+ #!/bin/sh
2
+ set -eu
3
+
4
+ enabled="${OLLAMA_MODEL_BOOTSTRAP_ENABLED:-true}"
5
+ if [ "$enabled" = "false" ] || [ "$enabled" = "0" ] || [ "$enabled" = "no" ]; then
6
+ echo "Ollama model bootstrap disabled."
7
+ exit 0
8
+ fi
9
+
10
+ ollama_base_url="${OLLAMA_BASE_URL:-http://ollama:11434}"
11
+ model_name="${OLLAMA_MODEL_NAME:-${VANNA_OLLAMA_MODEL:-sabia-7b}}"
12
+ gguf_url="${OLLAMA_MODEL_GGUF_URL:-}"
13
+ gguf_file="${OLLAMA_MODEL_GGUF_FILE:-sabia-7b.Q4_K_M.gguf}"
14
+ model_dir="/models/${model_name}"
15
+ model_path="${model_dir}/${gguf_file}"
16
+ modelfile_template="${OLLAMA_MODELFILE_TEMPLATE:-/bootstrap/sabia-7b.Modelfile}"
17
+
18
+ if [ -z "$model_name" ]; then
19
+ echo "OLLAMA_MODEL_NAME or VANNA_OLLAMA_MODEL must be set." >&2
20
+ exit 2
21
+ fi
22
+
23
+ wait_for_ollama() {
24
+ tries="${OLLAMA_BOOTSTRAP_WAIT_RETRIES:-60}"
25
+ delay="${OLLAMA_BOOTSTRAP_WAIT_SECONDS:-2}"
26
+ i=1
27
+ while [ "$i" -le "$tries" ]; do
28
+ if curl -fsS "${ollama_base_url%/}/api/tags" >/tmp/ollama-tags.json; then
29
+ return 0
30
+ fi
31
+ echo "Waiting for Ollama at ${ollama_base_url} (${i}/${tries})..."
32
+ sleep "$delay"
33
+ i=$((i + 1))
34
+ done
35
+ echo "Ollama did not become reachable at ${ollama_base_url}." >&2
36
+ exit 1
37
+ }
38
+
39
+ model_exists() {
40
+ curl -fsS "${ollama_base_url%/}/api/tags" >/tmp/ollama-tags.json
41
+ grep -Eq "\"(name|model)\"[[:space:]]*:[[:space:]]*\"${model_name}(:latest)?\"" /tmp/ollama-tags.json
42
+ }
43
+
44
+ download_gguf() {
45
+ if [ -f "$model_path" ]; then
46
+ echo "GGUF already present at ${model_path}."
47
+ return 0
48
+ fi
49
+ if [ -z "$gguf_url" ]; then
50
+ echo "OLLAMA_MODEL_GGUF_URL is required because ${model_path} is missing." >&2
51
+ exit 2
52
+ fi
53
+
54
+ mkdir -p "$model_dir"
55
+ tmp_path="${model_path}.part"
56
+ echo "Downloading ${model_name} GGUF to ${model_path}..."
57
+ if [ -n "${HF_TOKEN:-}" ]; then
58
+ curl -fL --retry 5 --retry-delay 10 -H "Authorization: Bearer ${HF_TOKEN}" -o "$tmp_path" "$gguf_url"
59
+ else
60
+ curl -fL --retry 5 --retry-delay 10 -o "$tmp_path" "$gguf_url"
61
+ fi
62
+
63
+ mv "$tmp_path" "$model_path"
64
+ }
65
+
66
+ json_escape_file() {
67
+ sed "s#__MODEL_GGUF_PATH__#${model_path}#g" "$modelfile_template" \
68
+ | sed 's/\\/\\\\/g; s/"/\\"/g' \
69
+ | awk '{printf "%s\\n", $0}'
70
+ }
71
+
72
+ create_model() {
73
+ escaped_modelfile="$(json_escape_file)"
74
+ payload="/tmp/ollama-create-model.json"
75
+ printf '{"name":"%s","modelfile":"%s","stream":false}\n' "$model_name" "$escaped_modelfile" >"$payload"
76
+
77
+ echo "Creating Ollama model ${model_name} from ${model_path}..."
78
+ curl -fsS \
79
+ -H "Content-Type: application/json" \
80
+ --data-binary "@${payload}" \
81
+ "${ollama_base_url%/}/api/create"
82
+ echo
83
+ }
84
+
85
+ wait_for_ollama
86
+ if model_exists; then
87
+ echo "Ollama model ${model_name} already exists."
88
+ exit 0
89
+ fi
90
+
91
+ download_gguf
92
+ create_model
93
+
94
+ if model_exists; then
95
+ echo "Ollama model ${model_name} is ready."
96
+ exit 0
97
+ fi
98
+
99
+ echo "Ollama model ${model_name} was not visible after creation." >&2
100
+ exit 1
@@ -0,0 +1,14 @@
1
+ FROM __MODEL_GGUF_PATH__
2
+
3
+ PARAMETER temperature 0.1
4
+ PARAMETER top_p 0.9
5
+ PARAMETER top_k 40
6
+ PARAMETER num_ctx 2048
7
+ PARAMETER repeat_penalty 1.1
8
+
9
+ SYSTEM """
10
+ Voce gera SQL PostgreSQL para consultas analiticas sobre a camada curated.
11
+ Responda apenas com uma consulta SELECT.
12
+ Use somente tabelas e views explicitamente apresentadas no contexto.
13
+ Nao use DDL, DML, schemas operacionais ou multiplas statements.
14
+ """
@@ -0,0 +1,4 @@
1
+ FROM postgres:16-alpine
2
+
3
+ COPY infra/init-db /docker-entrypoint-initdb.d
4
+
@@ -0,0 +1,9 @@
1
+ from dataif_pipelines.airflow.pnp_pipeline_factory import build_pipeline_dag
2
+
3
+
4
+ dag = build_pipeline_dag(
5
+ dag_id='2020_financeiro_fcc6f1f3_sync',
6
+ pipeline_id='fcc6f1f3-ca8f-480e-bc7a-d48dcc8516ee',
7
+ instance_key='pnp_pipe_pnp_2020_financeiro',
8
+ schedule=None,
9
+ )
@@ -0,0 +1 @@
1
+ """Pipelines package for dataif."""
@@ -0,0 +1,167 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timedelta
4
+
5
+ from airflow.decorators import dag, task
6
+ from airflow.operators.python import get_current_context
7
+ from airflow.utils.trigger_rule import TriggerRule
8
+
9
+ from dataif_pipelines.orchestration import pnp_workflow
10
+
11
+
12
+ def _context_metadata() -> dict[str, object]:
13
+ context = get_current_context()
14
+ dag_run = context["dag_run"]
15
+ task_instance = context["ti"]
16
+ logical_date = context.get("logical_date")
17
+ return {
18
+ "dag_id": dag_run.dag_id,
19
+ "dag_run_id": dag_run.run_id,
20
+ "logical_date": logical_date.isoformat() if logical_date else None,
21
+ "task_id": task_instance.task_id,
22
+ "map_index": getattr(task_instance, "map_index", -1),
23
+ }
24
+
25
+
26
+ def _task_states() -> tuple[str, dict[str, str]]:
27
+ context = get_current_context()
28
+ dag_run = context["dag_run"]
29
+ current_task_id = context["ti"].task_id
30
+ task_states: dict[str, str] = {}
31
+ for task_instance in dag_run.get_task_instances():
32
+ if task_instance.task_id == current_task_id:
33
+ continue
34
+ task_states[task_instance.task_id] = str(task_instance.state)
35
+ failed = any(state in {"failed", "upstream_failed"} for state in task_states.values())
36
+ return ("failed" if failed else "success", task_states)
37
+
38
+
39
+ def build_pipeline_dag(*, dag_id: str, pipeline_id: str | None = None, instance_key: str, schedule: str | None):
40
+ @dag(
41
+ dag_id=dag_id,
42
+ start_date=datetime(2025, 1, 1),
43
+ schedule=schedule,
44
+ catchup=False,
45
+ max_active_runs=1,
46
+ default_args={
47
+ "owner": "dataif",
48
+ "depends_on_past": False,
49
+ "retries": 1,
50
+ "retry_delay": timedelta(minutes=5),
51
+ },
52
+ tags=["dataif", "governo", "nilo_pecanha", "pnp", "pipeline", instance_key, *( [pipeline_id] if pipeline_id else [] )],
53
+ )
54
+ def _build():
55
+ @task
56
+ def register_run() -> dict[str, object]:
57
+ context = get_current_context()
58
+ dag_run = context["dag_run"]
59
+ conf = getattr(dag_run, "conf", None) or {}
60
+ metadata = _context_metadata()
61
+ operation = str(conf.get("operation") or "sync").strip().lower() or "sync"
62
+ trigger_origin = "manual" if conf else "scheduled"
63
+ return pnp_workflow.register_pipeline_run(
64
+ {
65
+ "instance_key": instance_key,
66
+ "pipeline_id": pipeline_id,
67
+ "operation": operation,
68
+ "trigger_mode": f"airflow_{trigger_origin}_{operation}",
69
+ "requested_by": str(conf.get("requested_by") or f"airflow.{metadata['dag_id']}"),
70
+ "dag_id": str(metadata["dag_id"]),
71
+ "dag_run_id": str(metadata["dag_run_id"]),
72
+ "logical_date": metadata["logical_date"],
73
+ },
74
+ task_id=str(metadata["task_id"]),
75
+ map_index=int(metadata["map_index"]),
76
+ )
77
+
78
+ @task
79
+ def load_instance_config(run_ref: dict[str, object]) -> dict[str, object]:
80
+ metadata = _context_metadata()
81
+ return pnp_workflow.load_instance_config(
82
+ run_ref,
83
+ task_id=str(metadata["task_id"]),
84
+ map_index=int(metadata["map_index"]),
85
+ )
86
+
87
+ @task
88
+ def resolve_powerbi_catalog(run_ref: dict[str, object], instance_config: dict[str, object]) -> dict[str, object]:
89
+ metadata = _context_metadata()
90
+ return pnp_workflow.resolve_catalog(
91
+ run_ref,
92
+ instance_config,
93
+ task_id=str(metadata["task_id"]),
94
+ map_index=int(metadata["map_index"]),
95
+ )
96
+
97
+ @task.branch
98
+ def select_execution_path(run_ref: dict[str, object]) -> str:
99
+ operation = str(run_ref.get("operation") or "sync").strip().lower()
100
+ if operation == "validate":
101
+ return "finalize_run"
102
+ return "extract_raw"
103
+
104
+ @task
105
+ def extract_raw(run_ref: dict[str, object], instance_config: dict[str, object]) -> dict[str, object]:
106
+ metadata = _context_metadata()
107
+ return pnp_workflow.sync_raw(
108
+ run_ref,
109
+ instance_config,
110
+ task_id=str(metadata["task_id"]),
111
+ map_index=int(metadata["map_index"]),
112
+ )
113
+
114
+ @task
115
+ def materialize_staging(run_ref: dict[str, object]) -> dict[str, object]:
116
+ metadata = _context_metadata()
117
+ return pnp_workflow.materialize_staging(
118
+ run_ref,
119
+ task_id=str(metadata["task_id"]),
120
+ map_index=int(metadata["map_index"]),
121
+ )
122
+
123
+ @task
124
+ def build_curated_views(run_ref: dict[str, object]) -> dict[str, object]:
125
+ metadata = _context_metadata()
126
+ return pnp_workflow.materialize_curated(
127
+ run_ref,
128
+ task_id=str(metadata["task_id"]),
129
+ map_index=int(metadata["map_index"]),
130
+ )
131
+
132
+ @task
133
+ def run_quality_checks(run_ref: dict[str, object]) -> dict[str, object]:
134
+ metadata = _context_metadata()
135
+ return pnp_workflow.run_quality_checks(
136
+ run_ref,
137
+ task_id=str(metadata["task_id"]),
138
+ map_index=int(metadata["map_index"]),
139
+ )
140
+
141
+ @task(trigger_rule=TriggerRule.ALL_DONE)
142
+ def finalize_run(run_ref: dict[str, object]) -> dict[str, object]:
143
+ metadata = _context_metadata()
144
+ dag_status, task_states = _task_states()
145
+ return pnp_workflow.finalize_run(
146
+ run_ref,
147
+ dag_status=dag_status,
148
+ task_states=task_states,
149
+ task_id=str(metadata["task_id"]),
150
+ map_index=int(metadata["map_index"]),
151
+ )
152
+
153
+ run_ref = register_run()
154
+ instance_config = load_instance_config(run_ref)
155
+ catalog = resolve_powerbi_catalog(run_ref, instance_config)
156
+ execution_path = select_execution_path(run_ref)
157
+ raw = extract_raw(run_ref, instance_config)
158
+ staging = materialize_staging(run_ref)
159
+ curated = build_curated_views(run_ref)
160
+ quality = run_quality_checks(run_ref)
161
+ final = finalize_run(run_ref)
162
+
163
+ run_ref >> instance_config >> catalog >> execution_path
164
+ execution_path >> raw >> staging >> curated >> quality >> final
165
+ execution_path >> final
166
+
167
+ return _build()
@@ -0,0 +1 @@
1
+ """Connectors for dataif pipelines."""
@@ -0,0 +1 @@
1
+ """Base connector abstractions."""
@@ -0,0 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import Iterable
5
+
6
+ from .types import NormalizedRecord, RawRecord, RunContext
7
+
8
+
9
+ class BaseConnector(ABC):
10
+ @abstractmethod
11
+ def connector_id(self) -> str:
12
+ raise NotImplementedError
13
+
14
+ @abstractmethod
15
+ def fetch(self, run_context: RunContext) -> list[RawRecord]:
16
+ raise NotImplementedError
17
+
18
+ @abstractmethod
19
+ def normalize(self, raw_records: list[RawRecord], run_context: RunContext) -> list[NormalizedRecord]:
20
+ raise NotImplementedError
21
+
22
+ @abstractmethod
23
+ def load_raw(self, normalized_records: list[NormalizedRecord], run_context: RunContext) -> int:
24
+ raise NotImplementedError
25
+
26
+ @abstractmethod
27
+ def post_load_checks(self, run_id: str) -> dict[str, object]:
28
+ raise NotImplementedError
@@ -0,0 +1,14 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Any
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class RunContext:
8
+ run_id: str
9
+ started_at: datetime
10
+ source_url: str
11
+
12
+
13
+ RawRecord = dict[str, Any]
14
+ NormalizedRecord = dict[str, Any]
@@ -0,0 +1 @@
1
+ """Nilo Pecanha connector."""
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass
5
+
6
+ from dataif_pipelines.connectors.nilo_pecanha.powerbi_microdados import DEFAULT_POWERBI_MICRODADOS_URL
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class NiloConfig:
11
+ endpoint: str
12
+ timeout_seconds: int
13
+
14
+
15
+ def load_config() -> NiloConfig:
16
+ return NiloConfig(
17
+ endpoint=os.getenv("NILO_PECANHA_ENDPOINT", DEFAULT_POWERBI_MICRODADOS_URL),
18
+ timeout_seconds=int(os.getenv("NILO_TIMEOUT_SECONDS", "60")),
19
+ )