@dataif/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/bin/dataif.js +623 -0
- package/package.json +26 -0
- package/scripts/build-template.mjs +72 -0
- package/templates/dataif/README.md +157 -0
- package/templates/dataif/infra/.env.example +119 -0
- package/templates/dataif/infra/.env.stg.example +119 -0
- package/templates/dataif/infra/airflow/Dockerfile +11 -0
- package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
- package/templates/dataif/infra/airflow/requirements.txt +3 -0
- package/templates/dataif/infra/docker-compose.yml +306 -0
- package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
- package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
- package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
- package/templates/dataif/infra/keycloak/Dockerfile +4 -0
- package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
- package/templates/dataif/infra/ollama/Dockerfile +9 -0
- package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
- package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
- package/templates/dataif/infra/postgres/Dockerfile +4 -0
- package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
- package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
- package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
- package/templates/dataif/scripts/configure-env.sh +149 -0
- package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
- package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
- package/templates/dataif/scripts/deploy.sh +79 -0
- package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
- package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
- package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
- package/templates/dataif/scripts/publish-images.sh +31 -0
- package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
- package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
- package/templates/dataif/services/api/.dockerignore +18 -0
- package/templates/dataif/services/api/Dockerfile +12 -0
- package/templates/dataif/services/api/app/__init__.py +1 -0
- package/templates/dataif/services/api/app/auth.py +48 -0
- package/templates/dataif/services/api/app/config.py +59 -0
- package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
- package/templates/dataif/services/api/app/main.py +2432 -0
- package/templates/dataif/services/api/app/metabase_admin.py +191 -0
- package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
- package/templates/dataif/services/api/app/metabase_embed.py +15 -0
- package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
- package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
- package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
- package/templates/dataif/services/api/app/vanna_client.py +32 -0
- package/templates/dataif/services/api/requirements.txt +9 -0
- package/templates/dataif/services/vanna/.dockerignore +18 -0
- package/templates/dataif/services/vanna/Dockerfile +12 -0
- package/templates/dataif/services/vanna/app/config.py +57 -0
- package/templates/dataif/services/vanna/app/main.py +108 -0
- package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
- package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
- package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
- package/templates/dataif/services/vanna/requirements.txt +8 -0
- package/templates/dataif/services/web/.dockerignore +13 -0
- package/templates/dataif/services/web/Dockerfile +16 -0
- package/templates/dataif/services/web/index.html +12 -0
- package/templates/dataif/services/web/nginx.conf +74 -0
- package/templates/dataif/services/web/package-lock.json +4397 -0
- package/templates/dataif/services/web/package.json +32 -0
- package/templates/dataif/services/web/postcss.config.mjs +5 -0
- package/templates/dataif/services/web/src/App.jsx +2817 -0
- package/templates/dataif/services/web/src/adminAuth.js +245 -0
- package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
- package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
- package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
- package/templates/dataif/services/web/src/assets/if.svg +0 -0
- package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
- package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
- package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
- package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
- package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
- package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
- package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
- package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
- package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
- package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
- package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
- package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
- package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
- package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
- package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
- package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
- package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
- package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
- package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
- package/templates/dataif/services/web/src/main.jsx +14 -0
- package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
- package/templates/dataif/services/web/src/styles/globals.css +60 -0
- package/templates/dataif/services/web/src/styles/theme.css +1326 -0
- package/templates/dataif/services/web/src/styles/typography.css +430 -0
- package/templates/dataif/services/web/src/styles.css +1287 -0
- package/templates/dataif/services/web/src/utils/cx.ts +24 -0
- package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
- package/templates/dataif/services/web/vite.config.js +14 -0
- package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
- package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
- package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
- package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
- package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
- package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
- package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
- package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
- package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
- package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
- package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
- package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
- package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
- package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
{
|
|
2
|
+
"realm": "dataif",
|
|
3
|
+
"enabled": true,
|
|
4
|
+
"displayName": "dataif",
|
|
5
|
+
"roles": {
|
|
6
|
+
"realm": [
|
|
7
|
+
{
|
|
8
|
+
"name": "admin"
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"name": "viewer"
|
|
12
|
+
}
|
|
13
|
+
]
|
|
14
|
+
},
|
|
15
|
+
"users": [
|
|
16
|
+
{
|
|
17
|
+
"username": "dataif-admin",
|
|
18
|
+
"enabled": true,
|
|
19
|
+
"emailVerified": true,
|
|
20
|
+
"firstName": "Dataif",
|
|
21
|
+
"lastName": "Admin",
|
|
22
|
+
"email": "admin@dataif.local",
|
|
23
|
+
"credentials": [
|
|
24
|
+
{
|
|
25
|
+
"type": "password",
|
|
26
|
+
"value": "admin",
|
|
27
|
+
"temporary": false
|
|
28
|
+
}
|
|
29
|
+
],
|
|
30
|
+
"realmRoles": [
|
|
31
|
+
"admin"
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
],
|
|
35
|
+
"clients": [
|
|
36
|
+
{
|
|
37
|
+
"clientId": "dataif-web",
|
|
38
|
+
"enabled": true,
|
|
39
|
+
"publicClient": true,
|
|
40
|
+
"standardFlowEnabled": true,
|
|
41
|
+
"directAccessGrantsEnabled": true,
|
|
42
|
+
"redirectUris": [
|
|
43
|
+
"http://localhost:5173/*",
|
|
44
|
+
"http://localhost:5174/*"
|
|
45
|
+
],
|
|
46
|
+
"webOrigins": [
|
|
47
|
+
"http://localhost:5173",
|
|
48
|
+
"http://localhost:5174"
|
|
49
|
+
],
|
|
50
|
+
"protocolMappers": [
|
|
51
|
+
{
|
|
52
|
+
"name": "aud-dataif-api",
|
|
53
|
+
"protocol": "openid-connect",
|
|
54
|
+
"protocolMapper": "oidc-audience-mapper",
|
|
55
|
+
"consentRequired": false,
|
|
56
|
+
"config": {
|
|
57
|
+
"included.client.audience": "dataif-api",
|
|
58
|
+
"id.token.claim": "false",
|
|
59
|
+
"access.token.claim": "true"
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
]
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"clientId": "dataif-api",
|
|
66
|
+
"enabled": true,
|
|
67
|
+
"publicClient": false,
|
|
68
|
+
"serviceAccountsEnabled": true,
|
|
69
|
+
"secret": "dataif-api-secret",
|
|
70
|
+
"protocol": "openid-connect"
|
|
71
|
+
}
|
|
72
|
+
]
|
|
73
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#!/bin/sh
|
|
2
|
+
set -eu
|
|
3
|
+
|
|
4
|
+
enabled="${OLLAMA_MODEL_BOOTSTRAP_ENABLED:-true}"
|
|
5
|
+
if [ "$enabled" = "false" ] || [ "$enabled" = "0" ] || [ "$enabled" = "no" ]; then
|
|
6
|
+
echo "Ollama model bootstrap disabled."
|
|
7
|
+
exit 0
|
|
8
|
+
fi
|
|
9
|
+
|
|
10
|
+
ollama_base_url="${OLLAMA_BASE_URL:-http://ollama:11434}"
|
|
11
|
+
model_name="${OLLAMA_MODEL_NAME:-${VANNA_OLLAMA_MODEL:-sabia-7b}}"
|
|
12
|
+
gguf_url="${OLLAMA_MODEL_GGUF_URL:-}"
|
|
13
|
+
gguf_file="${OLLAMA_MODEL_GGUF_FILE:-sabia-7b.Q4_K_M.gguf}"
|
|
14
|
+
model_dir="/models/${model_name}"
|
|
15
|
+
model_path="${model_dir}/${gguf_file}"
|
|
16
|
+
modelfile_template="${OLLAMA_MODELFILE_TEMPLATE:-/bootstrap/sabia-7b.Modelfile}"
|
|
17
|
+
|
|
18
|
+
if [ -z "$model_name" ]; then
|
|
19
|
+
echo "OLLAMA_MODEL_NAME or VANNA_OLLAMA_MODEL must be set." >&2
|
|
20
|
+
exit 2
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
wait_for_ollama() {
|
|
24
|
+
tries="${OLLAMA_BOOTSTRAP_WAIT_RETRIES:-60}"
|
|
25
|
+
delay="${OLLAMA_BOOTSTRAP_WAIT_SECONDS:-2}"
|
|
26
|
+
i=1
|
|
27
|
+
while [ "$i" -le "$tries" ]; do
|
|
28
|
+
if curl -fsS "${ollama_base_url%/}/api/tags" >/tmp/ollama-tags.json; then
|
|
29
|
+
return 0
|
|
30
|
+
fi
|
|
31
|
+
echo "Waiting for Ollama at ${ollama_base_url} (${i}/${tries})..."
|
|
32
|
+
sleep "$delay"
|
|
33
|
+
i=$((i + 1))
|
|
34
|
+
done
|
|
35
|
+
echo "Ollama did not become reachable at ${ollama_base_url}." >&2
|
|
36
|
+
exit 1
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
model_exists() {
|
|
40
|
+
curl -fsS "${ollama_base_url%/}/api/tags" >/tmp/ollama-tags.json
|
|
41
|
+
grep -Eq "\"(name|model)\"[[:space:]]*:[[:space:]]*\"${model_name}(:latest)?\"" /tmp/ollama-tags.json
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
download_gguf() {
|
|
45
|
+
if [ -f "$model_path" ]; then
|
|
46
|
+
echo "GGUF already present at ${model_path}."
|
|
47
|
+
return 0
|
|
48
|
+
fi
|
|
49
|
+
if [ -z "$gguf_url" ]; then
|
|
50
|
+
echo "OLLAMA_MODEL_GGUF_URL is required because ${model_path} is missing." >&2
|
|
51
|
+
exit 2
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
mkdir -p "$model_dir"
|
|
55
|
+
tmp_path="${model_path}.part"
|
|
56
|
+
echo "Downloading ${model_name} GGUF to ${model_path}..."
|
|
57
|
+
if [ -n "${HF_TOKEN:-}" ]; then
|
|
58
|
+
curl -fL --retry 5 --retry-delay 10 -H "Authorization: Bearer ${HF_TOKEN}" -o "$tmp_path" "$gguf_url"
|
|
59
|
+
else
|
|
60
|
+
curl -fL --retry 5 --retry-delay 10 -o "$tmp_path" "$gguf_url"
|
|
61
|
+
fi
|
|
62
|
+
|
|
63
|
+
mv "$tmp_path" "$model_path"
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
json_escape_file() {
|
|
67
|
+
sed "s#__MODEL_GGUF_PATH__#${model_path}#g" "$modelfile_template" \
|
|
68
|
+
| sed 's/\\/\\\\/g; s/"/\\"/g' \
|
|
69
|
+
| awk '{printf "%s\\n", $0}'
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
create_model() {
|
|
73
|
+
escaped_modelfile="$(json_escape_file)"
|
|
74
|
+
payload="/tmp/ollama-create-model.json"
|
|
75
|
+
printf '{"name":"%s","modelfile":"%s","stream":false}\n' "$model_name" "$escaped_modelfile" >"$payload"
|
|
76
|
+
|
|
77
|
+
echo "Creating Ollama model ${model_name} from ${model_path}..."
|
|
78
|
+
curl -fsS \
|
|
79
|
+
-H "Content-Type: application/json" \
|
|
80
|
+
--data-binary "@${payload}" \
|
|
81
|
+
"${ollama_base_url%/}/api/create"
|
|
82
|
+
echo
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
wait_for_ollama
|
|
86
|
+
if model_exists; then
|
|
87
|
+
echo "Ollama model ${model_name} already exists."
|
|
88
|
+
exit 0
|
|
89
|
+
fi
|
|
90
|
+
|
|
91
|
+
download_gguf
|
|
92
|
+
create_model
|
|
93
|
+
|
|
94
|
+
if model_exists; then
|
|
95
|
+
echo "Ollama model ${model_name} is ready."
|
|
96
|
+
exit 0
|
|
97
|
+
fi
|
|
98
|
+
|
|
99
|
+
echo "Ollama model ${model_name} was not visible after creation." >&2
|
|
100
|
+
exit 1
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
FROM __MODEL_GGUF_PATH__
|
|
2
|
+
|
|
3
|
+
PARAMETER temperature 0.1
|
|
4
|
+
PARAMETER top_p 0.9
|
|
5
|
+
PARAMETER top_k 40
|
|
6
|
+
PARAMETER num_ctx 2048
|
|
7
|
+
PARAMETER repeat_penalty 1.1
|
|
8
|
+
|
|
9
|
+
SYSTEM """
|
|
10
|
+
Voce gera SQL PostgreSQL para consultas analiticas sobre a camada curated.
|
|
11
|
+
Responda apenas com uma consulta SELECT.
|
|
12
|
+
Use somente tabelas e views explicitamente apresentadas no contexto.
|
|
13
|
+
Nao use DDL, DML, schemas operacionais ou multiplas statements.
|
|
14
|
+
"""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from dataif_pipelines.airflow.pnp_pipeline_factory import build_pipeline_dag
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
dag = build_pipeline_dag(
|
|
5
|
+
dag_id='2020_financeiro_fcc6f1f3_sync',
|
|
6
|
+
pipeline_id='fcc6f1f3-ca8f-480e-bc7a-d48dcc8516ee',
|
|
7
|
+
instance_key='pnp_pipe_pnp_2020_financeiro',
|
|
8
|
+
schedule=None,
|
|
9
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Pipelines package for dataif."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
|
|
5
|
+
from airflow.decorators import dag, task
|
|
6
|
+
from airflow.operators.python import get_current_context
|
|
7
|
+
from airflow.utils.trigger_rule import TriggerRule
|
|
8
|
+
|
|
9
|
+
from dataif_pipelines.orchestration import pnp_workflow
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _context_metadata() -> dict[str, object]:
|
|
13
|
+
context = get_current_context()
|
|
14
|
+
dag_run = context["dag_run"]
|
|
15
|
+
task_instance = context["ti"]
|
|
16
|
+
logical_date = context.get("logical_date")
|
|
17
|
+
return {
|
|
18
|
+
"dag_id": dag_run.dag_id,
|
|
19
|
+
"dag_run_id": dag_run.run_id,
|
|
20
|
+
"logical_date": logical_date.isoformat() if logical_date else None,
|
|
21
|
+
"task_id": task_instance.task_id,
|
|
22
|
+
"map_index": getattr(task_instance, "map_index", -1),
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _task_states() -> tuple[str, dict[str, str]]:
|
|
27
|
+
context = get_current_context()
|
|
28
|
+
dag_run = context["dag_run"]
|
|
29
|
+
current_task_id = context["ti"].task_id
|
|
30
|
+
task_states: dict[str, str] = {}
|
|
31
|
+
for task_instance in dag_run.get_task_instances():
|
|
32
|
+
if task_instance.task_id == current_task_id:
|
|
33
|
+
continue
|
|
34
|
+
task_states[task_instance.task_id] = str(task_instance.state)
|
|
35
|
+
failed = any(state in {"failed", "upstream_failed"} for state in task_states.values())
|
|
36
|
+
return ("failed" if failed else "success", task_states)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def build_pipeline_dag(*, dag_id: str, pipeline_id: str | None = None, instance_key: str, schedule: str | None):
|
|
40
|
+
@dag(
|
|
41
|
+
dag_id=dag_id,
|
|
42
|
+
start_date=datetime(2025, 1, 1),
|
|
43
|
+
schedule=schedule,
|
|
44
|
+
catchup=False,
|
|
45
|
+
max_active_runs=1,
|
|
46
|
+
default_args={
|
|
47
|
+
"owner": "dataif",
|
|
48
|
+
"depends_on_past": False,
|
|
49
|
+
"retries": 1,
|
|
50
|
+
"retry_delay": timedelta(minutes=5),
|
|
51
|
+
},
|
|
52
|
+
tags=["dataif", "governo", "nilo_pecanha", "pnp", "pipeline", instance_key, *( [pipeline_id] if pipeline_id else [] )],
|
|
53
|
+
)
|
|
54
|
+
def _build():
|
|
55
|
+
@task
|
|
56
|
+
def register_run() -> dict[str, object]:
|
|
57
|
+
context = get_current_context()
|
|
58
|
+
dag_run = context["dag_run"]
|
|
59
|
+
conf = getattr(dag_run, "conf", None) or {}
|
|
60
|
+
metadata = _context_metadata()
|
|
61
|
+
operation = str(conf.get("operation") or "sync").strip().lower() or "sync"
|
|
62
|
+
trigger_origin = "manual" if conf else "scheduled"
|
|
63
|
+
return pnp_workflow.register_pipeline_run(
|
|
64
|
+
{
|
|
65
|
+
"instance_key": instance_key,
|
|
66
|
+
"pipeline_id": pipeline_id,
|
|
67
|
+
"operation": operation,
|
|
68
|
+
"trigger_mode": f"airflow_{trigger_origin}_{operation}",
|
|
69
|
+
"requested_by": str(conf.get("requested_by") or f"airflow.{metadata['dag_id']}"),
|
|
70
|
+
"dag_id": str(metadata["dag_id"]),
|
|
71
|
+
"dag_run_id": str(metadata["dag_run_id"]),
|
|
72
|
+
"logical_date": metadata["logical_date"],
|
|
73
|
+
},
|
|
74
|
+
task_id=str(metadata["task_id"]),
|
|
75
|
+
map_index=int(metadata["map_index"]),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
@task
|
|
79
|
+
def load_instance_config(run_ref: dict[str, object]) -> dict[str, object]:
|
|
80
|
+
metadata = _context_metadata()
|
|
81
|
+
return pnp_workflow.load_instance_config(
|
|
82
|
+
run_ref,
|
|
83
|
+
task_id=str(metadata["task_id"]),
|
|
84
|
+
map_index=int(metadata["map_index"]),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
@task
|
|
88
|
+
def resolve_powerbi_catalog(run_ref: dict[str, object], instance_config: dict[str, object]) -> dict[str, object]:
|
|
89
|
+
metadata = _context_metadata()
|
|
90
|
+
return pnp_workflow.resolve_catalog(
|
|
91
|
+
run_ref,
|
|
92
|
+
instance_config,
|
|
93
|
+
task_id=str(metadata["task_id"]),
|
|
94
|
+
map_index=int(metadata["map_index"]),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
@task.branch
|
|
98
|
+
def select_execution_path(run_ref: dict[str, object]) -> str:
|
|
99
|
+
operation = str(run_ref.get("operation") or "sync").strip().lower()
|
|
100
|
+
if operation == "validate":
|
|
101
|
+
return "finalize_run"
|
|
102
|
+
return "extract_raw"
|
|
103
|
+
|
|
104
|
+
@task
|
|
105
|
+
def extract_raw(run_ref: dict[str, object], instance_config: dict[str, object]) -> dict[str, object]:
|
|
106
|
+
metadata = _context_metadata()
|
|
107
|
+
return pnp_workflow.sync_raw(
|
|
108
|
+
run_ref,
|
|
109
|
+
instance_config,
|
|
110
|
+
task_id=str(metadata["task_id"]),
|
|
111
|
+
map_index=int(metadata["map_index"]),
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
@task
|
|
115
|
+
def materialize_staging(run_ref: dict[str, object]) -> dict[str, object]:
|
|
116
|
+
metadata = _context_metadata()
|
|
117
|
+
return pnp_workflow.materialize_staging(
|
|
118
|
+
run_ref,
|
|
119
|
+
task_id=str(metadata["task_id"]),
|
|
120
|
+
map_index=int(metadata["map_index"]),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
@task
|
|
124
|
+
def build_curated_views(run_ref: dict[str, object]) -> dict[str, object]:
|
|
125
|
+
metadata = _context_metadata()
|
|
126
|
+
return pnp_workflow.materialize_curated(
|
|
127
|
+
run_ref,
|
|
128
|
+
task_id=str(metadata["task_id"]),
|
|
129
|
+
map_index=int(metadata["map_index"]),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
@task
|
|
133
|
+
def run_quality_checks(run_ref: dict[str, object]) -> dict[str, object]:
|
|
134
|
+
metadata = _context_metadata()
|
|
135
|
+
return pnp_workflow.run_quality_checks(
|
|
136
|
+
run_ref,
|
|
137
|
+
task_id=str(metadata["task_id"]),
|
|
138
|
+
map_index=int(metadata["map_index"]),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
@task(trigger_rule=TriggerRule.ALL_DONE)
|
|
142
|
+
def finalize_run(run_ref: dict[str, object]) -> dict[str, object]:
|
|
143
|
+
metadata = _context_metadata()
|
|
144
|
+
dag_status, task_states = _task_states()
|
|
145
|
+
return pnp_workflow.finalize_run(
|
|
146
|
+
run_ref,
|
|
147
|
+
dag_status=dag_status,
|
|
148
|
+
task_states=task_states,
|
|
149
|
+
task_id=str(metadata["task_id"]),
|
|
150
|
+
map_index=int(metadata["map_index"]),
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
run_ref = register_run()
|
|
154
|
+
instance_config = load_instance_config(run_ref)
|
|
155
|
+
catalog = resolve_powerbi_catalog(run_ref, instance_config)
|
|
156
|
+
execution_path = select_execution_path(run_ref)
|
|
157
|
+
raw = extract_raw(run_ref, instance_config)
|
|
158
|
+
staging = materialize_staging(run_ref)
|
|
159
|
+
curated = build_curated_views(run_ref)
|
|
160
|
+
quality = run_quality_checks(run_ref)
|
|
161
|
+
final = finalize_run(run_ref)
|
|
162
|
+
|
|
163
|
+
run_ref >> instance_config >> catalog >> execution_path
|
|
164
|
+
execution_path >> raw >> staging >> curated >> quality >> final
|
|
165
|
+
execution_path >> final
|
|
166
|
+
|
|
167
|
+
return _build()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Connectors for dataif pipelines."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Base connector abstractions."""
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
|
|
6
|
+
from .types import NormalizedRecord, RawRecord, RunContext
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BaseConnector(ABC):
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def connector_id(self) -> str:
|
|
12
|
+
raise NotImplementedError
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def fetch(self, run_context: RunContext) -> list[RawRecord]:
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
def normalize(self, raw_records: list[RawRecord], run_context: RunContext) -> list[NormalizedRecord]:
|
|
20
|
+
raise NotImplementedError
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def load_raw(self, normalized_records: list[NormalizedRecord], run_context: RunContext) -> int:
|
|
24
|
+
raise NotImplementedError
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def post_load_checks(self, run_id: str) -> dict[str, object]:
|
|
28
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass(frozen=True)
|
|
7
|
+
class RunContext:
|
|
8
|
+
run_id: str
|
|
9
|
+
started_at: datetime
|
|
10
|
+
source_url: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
RawRecord = dict[str, Any]
|
|
14
|
+
NormalizedRecord = dict[str, Any]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Nilo Pecanha connector."""
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from dataif_pipelines.connectors.nilo_pecanha.powerbi_microdados import DEFAULT_POWERBI_MICRODADOS_URL
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class NiloConfig:
|
|
11
|
+
endpoint: str
|
|
12
|
+
timeout_seconds: int
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_config() -> NiloConfig:
|
|
16
|
+
return NiloConfig(
|
|
17
|
+
endpoint=os.getenv("NILO_PECANHA_ENDPOINT", DEFAULT_POWERBI_MICRODADOS_URL),
|
|
18
|
+
timeout_seconds=int(os.getenv("NILO_TIMEOUT_SECONDS", "60")),
|
|
19
|
+
)
|