@dataif/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/bin/dataif.js +623 -0
- package/package.json +26 -0
- package/scripts/build-template.mjs +72 -0
- package/templates/dataif/README.md +157 -0
- package/templates/dataif/infra/.env.example +119 -0
- package/templates/dataif/infra/.env.stg.example +119 -0
- package/templates/dataif/infra/airflow/Dockerfile +11 -0
- package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
- package/templates/dataif/infra/airflow/requirements.txt +3 -0
- package/templates/dataif/infra/docker-compose.yml +306 -0
- package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
- package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
- package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
- package/templates/dataif/infra/keycloak/Dockerfile +4 -0
- package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
- package/templates/dataif/infra/ollama/Dockerfile +9 -0
- package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
- package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
- package/templates/dataif/infra/postgres/Dockerfile +4 -0
- package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
- package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
- package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
- package/templates/dataif/scripts/configure-env.sh +149 -0
- package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
- package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
- package/templates/dataif/scripts/deploy.sh +79 -0
- package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
- package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
- package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
- package/templates/dataif/scripts/publish-images.sh +31 -0
- package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
- package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
- package/templates/dataif/services/api/.dockerignore +18 -0
- package/templates/dataif/services/api/Dockerfile +12 -0
- package/templates/dataif/services/api/app/__init__.py +1 -0
- package/templates/dataif/services/api/app/auth.py +48 -0
- package/templates/dataif/services/api/app/config.py +59 -0
- package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
- package/templates/dataif/services/api/app/main.py +2432 -0
- package/templates/dataif/services/api/app/metabase_admin.py +191 -0
- package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
- package/templates/dataif/services/api/app/metabase_embed.py +15 -0
- package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
- package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
- package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
- package/templates/dataif/services/api/app/vanna_client.py +32 -0
- package/templates/dataif/services/api/requirements.txt +9 -0
- package/templates/dataif/services/vanna/.dockerignore +18 -0
- package/templates/dataif/services/vanna/Dockerfile +12 -0
- package/templates/dataif/services/vanna/app/config.py +57 -0
- package/templates/dataif/services/vanna/app/main.py +108 -0
- package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
- package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
- package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
- package/templates/dataif/services/vanna/requirements.txt +8 -0
- package/templates/dataif/services/web/.dockerignore +13 -0
- package/templates/dataif/services/web/Dockerfile +16 -0
- package/templates/dataif/services/web/index.html +12 -0
- package/templates/dataif/services/web/nginx.conf +74 -0
- package/templates/dataif/services/web/package-lock.json +4397 -0
- package/templates/dataif/services/web/package.json +32 -0
- package/templates/dataif/services/web/postcss.config.mjs +5 -0
- package/templates/dataif/services/web/src/App.jsx +2817 -0
- package/templates/dataif/services/web/src/adminAuth.js +245 -0
- package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
- package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
- package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
- package/templates/dataif/services/web/src/assets/if.svg +0 -0
- package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
- package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
- package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
- package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
- package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
- package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
- package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
- package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
- package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
- package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
- package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
- package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
- package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
- package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
- package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
- package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
- package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
- package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
- package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
- package/templates/dataif/services/web/src/main.jsx +14 -0
- package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
- package/templates/dataif/services/web/src/styles/globals.css +60 -0
- package/templates/dataif/services/web/src/styles/theme.css +1326 -0
- package/templates/dataif/services/web/src/styles/typography.css +430 -0
- package/templates/dataif/services/web/src/styles.css +1287 -0
- package/templates/dataif/services/web/src/utils/cx.ts +24 -0
- package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
- package/templates/dataif/services/web/vite.config.js +14 -0
- package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
- package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
- package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
- package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
- package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
- package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
- package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
- package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
- package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
- package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
- package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
- package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
- package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
- package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import fs from "node:fs";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
|
|
6
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
const packageRoot = path.resolve(__dirname, "..");
|
|
8
|
+
const repoRoot = path.resolve(packageRoot, "..", "..");
|
|
9
|
+
const templateRoot = path.join(packageRoot, "templates", "dataif");
|
|
10
|
+
|
|
11
|
+
const entries = [
|
|
12
|
+
"infra",
|
|
13
|
+
"pipelines",
|
|
14
|
+
"scripts",
|
|
15
|
+
"services",
|
|
16
|
+
"sql",
|
|
17
|
+
"README.md"
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
const ignoredNames = new Set([
|
|
21
|
+
".env",
|
|
22
|
+
".git",
|
|
23
|
+
".pytest_cache",
|
|
24
|
+
"__pycache__",
|
|
25
|
+
"node_modules",
|
|
26
|
+
"dist",
|
|
27
|
+
"build",
|
|
28
|
+
".DS_Store"
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
function shouldCopy(src) {
|
|
32
|
+
const name = path.basename(src);
|
|
33
|
+
if (ignoredNames.has(name)) {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
if (name.endsWith(".pyc")) {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
return true;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function copyRecursive(src, dest) {
|
|
43
|
+
if (!shouldCopy(src)) {
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const stat = fs.statSync(src);
|
|
48
|
+
if (stat.isDirectory()) {
|
|
49
|
+
fs.mkdirSync(dest, { recursive: true });
|
|
50
|
+
for (const entry of fs.readdirSync(src)) {
|
|
51
|
+
copyRecursive(path.join(src, entry), path.join(dest, entry));
|
|
52
|
+
}
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
fs.mkdirSync(path.dirname(dest), { recursive: true });
|
|
57
|
+
fs.copyFileSync(src, dest);
|
|
58
|
+
fs.chmodSync(dest, stat.mode);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
fs.rmSync(templateRoot, { recursive: true, force: true });
|
|
62
|
+
fs.mkdirSync(templateRoot, { recursive: true });
|
|
63
|
+
|
|
64
|
+
for (const entry of entries) {
|
|
65
|
+
const src = path.join(repoRoot, entry);
|
|
66
|
+
if (!fs.existsSync(src)) {
|
|
67
|
+
throw new Error(`Arquivo esperado nao encontrado: ${src}`);
|
|
68
|
+
}
|
|
69
|
+
copyRecursive(src, path.join(templateRoot, entry));
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
console.log(`Template DataIF gerado em ${templateRoot}`);
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# dataif
|
|
2
|
+
|
|
3
|
+
Plataforma conteinerizada para ingestão de dados governamentais em PostgreSQL, com operacao administrativa via API e UI, ingestão no Airflow, dashboards no Metabase e consulta assistida via Vanna.
|
|
4
|
+
|
|
5
|
+
## Estado atual da PNP
|
|
6
|
+
- O conector da Plataforma Nilo Pecanha opera somente em `powerbi_microdados`.
|
|
7
|
+
- A origem principal e o relatorio publico do Power BI com os links de microdados.
|
|
8
|
+
- O fluxo legado com browser assistido foi removido da trilha operacional.
|
|
9
|
+
- O Airflow ficou restrito a validar fontes e carregar dados na camada `raw`.
|
|
10
|
+
- O tratamento analitico posterior em `staging` e a publicacao em `curated` acontecem manualmente via SGBD, fora do Airflow.
|
|
11
|
+
|
|
12
|
+
## Stack
|
|
13
|
+
- PostgreSQL
|
|
14
|
+
- Apache Airflow
|
|
15
|
+
- FastAPI
|
|
16
|
+
- React + Vite
|
|
17
|
+
- Metabase 60
|
|
18
|
+
- Vanna
|
|
19
|
+
- Keycloak
|
|
20
|
+
|
|
21
|
+
## Estrutura
|
|
22
|
+
- `infra/`: Docker Compose, imagens e bootstrap da stack
|
|
23
|
+
- `pipelines/`: DAGs e conectores
|
|
24
|
+
- `services/api/`: API administrativa e embeds
|
|
25
|
+
- `services/web/`: frontend React
|
|
26
|
+
- `services/vanna/`: servico de NL2SQL
|
|
27
|
+
- `sql/`: schemas, tabelas e views curadas
|
|
28
|
+
- `docs/`: arquitetura e material de apoio
|
|
29
|
+
|
|
30
|
+
## Subida rapida
|
|
31
|
+
1. `./scripts/deploy.sh stg`
|
|
32
|
+
2. Para producao local em nova maquina: `./scripts/deploy.sh prod`
|
|
33
|
+
3. Acessos padrao:
|
|
34
|
+
- Staging Web: `http://localhost:15173`
|
|
35
|
+
- Producao Web: porta definida no configurador
|
|
36
|
+
- API: `/api` via Web ou porta configurada
|
|
37
|
+
- Airflow via Web: `/airflow/`
|
|
38
|
+
- Metabase via Web: `/metabase/`
|
|
39
|
+
|
|
40
|
+
Versao padrao do Metabase:
|
|
41
|
+
- `METABASE_IMAGE_TAG=v0.60.1`
|
|
42
|
+
|
|
43
|
+
## Instalador npm
|
|
44
|
+
|
|
45
|
+
Tambem existe uma CLI npm para preparar uma maquina nova sem exigir que o usuario conheca os scripts internos:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
npx @dataif/cli install
|
|
49
|
+
npx @dataif/cli deploy
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
O instalador cria uma copia local da stack em `~/.dataif/current`, valida Docker/Docker Compose, coleta as credenciais de forma interativa e entao sobe os containers. Para usar uma pasta especifica:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
npx @dataif/cli install --dir ./dataif-local
|
|
56
|
+
npx @dataif/cli deploy --dir ./dataif-local --mode prod
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Durante o desenvolvimento do pacote:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
cd packages/dataif-cli
|
|
63
|
+
npm run smoke
|
|
64
|
+
npm pack --dry-run
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Guia de uso local
|
|
68
|
+
|
|
69
|
+
Pre-requisitos:
|
|
70
|
+
- Docker Engine com Docker Compose v2
|
|
71
|
+
- 6 GB de RAM livres para stack basica
|
|
72
|
+
- 12 GB de RAM livres se usar Ollama local
|
|
73
|
+
|
|
74
|
+
Subir ambiente de teste/staging:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
./scripts/deploy.sh stg
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Esse modo usa `infra/.env.stg.example`, cria `infra/.env` com valores presetados e sobe a mesma stack Docker do projeto. Use para desenvolvimento, testes e demonstracoes locais. Para recriar `infra/.env` de staging:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
DATAIF_FORCE_ENV=true ./scripts/deploy.sh stg
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Subir producao local em nova maquina:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
./scripts/deploy.sh prod
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Esse modo usa `infra/.env.example` apenas como template versionado, chama `scripts/configure-env.sh`, gera segredos e grava `infra/.env`. Nao edite `infra/.env.example` para uma instancia real. Configure senhas e `METABASE_EMBED_SECRET` antes do primeiro `up`, pois o Postgres inicializa usuarios somente na criacao do volume.
|
|
93
|
+
|
|
94
|
+
Validar configuração sem subir:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
cd infra
|
|
98
|
+
docker compose --env-file .env config >/dev/null
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Ativar LLM local com Ollama:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
./scripts/deploy.sh stg --llm
|
|
105
|
+
# ou
|
|
106
|
+
./scripts/deploy.sh prod --llm
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Refazer do zero na maquina local:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
cd infra
|
|
113
|
+
docker compose --env-file .env down -v
|
|
114
|
+
cd ..
|
|
115
|
+
./scripts/deploy.sh stg
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Depois da instalacao, o provider/modelo do Vanna pode ser ajustado pela tela `Configurações Admin`. Sem Ollama ativo e sem chave Maritaca, o servico Vanna permanece disponivel, mas respostas por LLM ficam indisponiveis ate configurar um provider.
|
|
119
|
+
|
|
120
|
+
## Fluxo de dados da PNP
|
|
121
|
+
1. O admin acessa a area administrativa via Keycloak.
|
|
122
|
+
2. A UI consulta o catalogo publico da PNP no Power BI.
|
|
123
|
+
3. O admin cria uma conexão selecionando anos, tipos e cron.
|
|
124
|
+
4. O Airflow dispara a validação ou a ingestão da instancia.
|
|
125
|
+
5. O conector baixa os arquivos publicos, grava manifestos em `raw.nilo_pecanha_assets` e linhas parseadas em `raw.nilo_pecanha_records`.
|
|
126
|
+
6. O tratamento de `raw` para `staging` e a promocao final para `curated` sao feitos manualmente via SGBD.
|
|
127
|
+
7. Metabase e Vanna consomem a camada `curated`.
|
|
128
|
+
|
|
129
|
+
## Vanna AI local
|
|
130
|
+
O Vanna usa apenas relacoes qualificadas no schema `curated` e e chamado pela tela `Inicio`.
|
|
131
|
+
|
|
132
|
+
Para usar LLM local com Ollama:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
./scripts/use-vanna-ollama.sh
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
O comando define `VANNA_LLM_PROVIDER=ollama`, preserva `VANNA_MARITACA_API_KEY`, sobe o servico Ollama, carrega/importa o modelo configurado e reinicia o Vanna. Use `./scripts/use-vanna-ollama.sh --no-bootstrap` quando o modelo ja existir no Ollama e voce quiser pular apenas o bootstrap.
|
|
139
|
+
|
|
140
|
+
Mantenha `VANNA_ALLOWED_SCHEMA=curated`; novas tabelas, views e materialized views em `curated` entram no treinamento quando o `vanna_user` tiver `SELECT`.
|
|
141
|
+
|
|
142
|
+
A configuração efetiva de provider/modelo tambem pode ser ajustada pela tela `Configurações Admin`. Os overrides ficam persistidos em banco e passam a valer sem editar `.env`; o `.env` continua como bootstrap inicial para credenciais, portas e valores default.
|
|
143
|
+
|
|
144
|
+
Tambem e possivel usar a API da Maritaca com `VANNA_LLM_PROVIDER=maritaca`, `VANNA_MARITACA_API_KEY` e `VANNA_MARITACA_MODEL=sabia-4`. O modelo Sabiá local via Ollama/GGUF continua opcional e nao e redistribuido nas imagens DockerHub do projeto.
|
|
145
|
+
|
|
146
|
+
## Admins e Metabase
|
|
147
|
+
A tela `Configurações Admin` cria e remove usuarios administrativos em dois sistemas:
|
|
148
|
+
|
|
149
|
+
- Keycloak, como identidade de login do produto
|
|
150
|
+
- Metabase, como administradores da instancia analitica
|
|
151
|
+
|
|
152
|
+
O vinculo entre eles usa o email do usuario. Se a criacao no Metabase falhar, a API tenta desfazer a criacao correspondente no Keycloak para evitar estado parcial.
|
|
153
|
+
|
|
154
|
+
## Observacoes
|
|
155
|
+
- O armazenamento operacional continua em `config.connector_endpoints`.
|
|
156
|
+
- O frontend ja foi reorganizado em paginas de `Inicio`, `Pipelines`, `Conexoes`, `Dashboards` e `SQL`.
|
|
157
|
+
- A documentacao em `docs/` descreve o fluxo oficial atual: `Power BI -> Airflow -> raw`, com `staging` e `curated` fora da orquestração.
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Imagens
|
|
2
|
+
COMPOSE_PROJECT_NAME=dataif
|
|
3
|
+
DATAIF_IMAGE_REGISTRY=docker.io/dataif
|
|
4
|
+
DATAIF_IMAGE_TAG=latest
|
|
5
|
+
|
|
6
|
+
POSTGRES_HOST=postgres
|
|
7
|
+
# Postgres
|
|
8
|
+
POSTGRES_PORT=5432
|
|
9
|
+
POSTGRES_EXPOSE_PORT=5433
|
|
10
|
+
POSTGRES_SUPERUSER=postgres
|
|
11
|
+
POSTGRES_SUPERUSER_PASSWORD=postgres
|
|
12
|
+
|
|
13
|
+
# Bancos
|
|
14
|
+
DATAIF_DB_NAME=dataif
|
|
15
|
+
AIRFLOW_DB_NAME=airflow
|
|
16
|
+
METABASE_APP_DB_NAME=metabaseapp
|
|
17
|
+
|
|
18
|
+
# Usuarios banco
|
|
19
|
+
DATAIF_ETL_USER=etl_user
|
|
20
|
+
DATAIF_ETL_PASSWORD=etl_password
|
|
21
|
+
DATAIF_METABASE_USER=metabase_user
|
|
22
|
+
DATAIF_METABASE_PASSWORD=metabase_password
|
|
23
|
+
DATAIF_VANNA_USER=vanna_user
|
|
24
|
+
DATAIF_VANNA_PASSWORD=vanna_password
|
|
25
|
+
AIRFLOW_DB_USER=airflow
|
|
26
|
+
AIRFLOW_DB_PASSWORD=airflow
|
|
27
|
+
METABASE_APP_DB_USER=metabase_app
|
|
28
|
+
METABASE_APP_DB_PASSWORD=metabase_app
|
|
29
|
+
|
|
30
|
+
# Airflow
|
|
31
|
+
AIRFLOW_UID=50000
|
|
32
|
+
AIRFLOW_ADMIN_USER=admin
|
|
33
|
+
AIRFLOW_ADMIN_PASSWORD=admin
|
|
34
|
+
AIRFLOW_ADMIN_EMAIL=admin@dataif.local
|
|
35
|
+
AIRFLOW_API_URL=http://airflow-webserver:8080
|
|
36
|
+
|
|
37
|
+
# Metabase
|
|
38
|
+
METABASE_SITE_URL=http://localhost:5173/metabase
|
|
39
|
+
METABASE_API_URL=http://metabase:3000
|
|
40
|
+
METABASE_IMAGE_TAG=v0.60.1
|
|
41
|
+
METABASE_EMBED_SECRET=replace_with_secure_secret
|
|
42
|
+
METABASE_ALLOWED_DASHBOARD_IDS=2
|
|
43
|
+
METABASE_DEFAULT_DASHBOARD_ID=2
|
|
44
|
+
METABASE_ADMIN_EMAIL=admin@dataif.local
|
|
45
|
+
METABASE_ADMIN_PASSWORD=admin
|
|
46
|
+
METABASE_ADMIN_FIRST_NAME=DataIF
|
|
47
|
+
METABASE_ADMIN_LAST_NAME=Metabase
|
|
48
|
+
METABASE_SITE_NAME=dataif
|
|
49
|
+
METABASE_ALLOW_TRACKING=false
|
|
50
|
+
|
|
51
|
+
# Keycloak
|
|
52
|
+
KEYCLOAK_URL=http://keycloak:8080
|
|
53
|
+
KEYCLOAK_REALM=dataif
|
|
54
|
+
KEYCLOAK_CLIENT_ID=dataif-web
|
|
55
|
+
KEYCLOAK_AUDIENCE=dataif-api
|
|
56
|
+
KEYCLOAK_ADMIN_REALM=master
|
|
57
|
+
KEYCLOAK_ADMIN_CLIENT_ID=admin-cli
|
|
58
|
+
KEYCLOAK_ADMIN=admin
|
|
59
|
+
KEYCLOAK_ADMIN_PASSWORD=admin
|
|
60
|
+
|
|
61
|
+
# Portas
|
|
62
|
+
API_PORT=8000
|
|
63
|
+
WEB_PORT=5173
|
|
64
|
+
AIRFLOW_PORT=8088
|
|
65
|
+
CONNECTOR_AUTH_WEB_PORT=5174
|
|
66
|
+
AUTH_WORKER_PORT=8090
|
|
67
|
+
AUTH_WORKER_VNC_PORT=5900
|
|
68
|
+
AUTH_WORKER_NOVNC_PORT=6080
|
|
69
|
+
METABASE_PORT=3000
|
|
70
|
+
KEYCLOAK_PORT=8081
|
|
71
|
+
VANNA_PORT=9000
|
|
72
|
+
|
|
73
|
+
# Vanna
|
|
74
|
+
VANNA_LLM_PROVIDER=ollama
|
|
75
|
+
VANNA_OLLAMA_BASE_URL=http://ollama:11434
|
|
76
|
+
VANNA_OLLAMA_MODEL=sabia-7b
|
|
77
|
+
VANNA_MARITACA_API_URL=https://chat.maritaca.ai/api/chat/completions
|
|
78
|
+
VANNA_MARITACA_API_KEY=
|
|
79
|
+
VANNA_MARITACA_MODEL=sabia-4
|
|
80
|
+
VANNA_MARITACA_TIMEOUT_SECONDS=60
|
|
81
|
+
VANNA_VECTORSTORE_PATH=/data/vanna/chroma
|
|
82
|
+
VANNA_AUTO_TRAIN=true
|
|
83
|
+
VANNA_ALLOWED_SCHEMA=curated
|
|
84
|
+
ALLOWED_CURATED_VIEWS=
|
|
85
|
+
VANNA_MAX_ROWS=200
|
|
86
|
+
|
|
87
|
+
# Ollama
|
|
88
|
+
OLLAMA_IMAGE_TAG=latest
|
|
89
|
+
OLLAMA_PORT=11434
|
|
90
|
+
OLLAMA_MEM_LIMIT=8192m
|
|
91
|
+
OLLAMA_MODEL_BOOTSTRAP_ENABLED=true
|
|
92
|
+
OLLAMA_MODEL_NAME=sabia-7b
|
|
93
|
+
OLLAMA_MODEL_GGUF_URL=https://huggingface.co/QuantFactory/sabia-7b-GGUF/resolve/main/sabia-7b.Q4_K_M.gguf
|
|
94
|
+
OLLAMA_MODEL_GGUF_FILE=sabia-7b.Q4_K_M.gguf
|
|
95
|
+
HF_TOKEN=
|
|
96
|
+
|
|
97
|
+
# PNP
|
|
98
|
+
NILO_PECANHA_ENDPOINT=https://dadosabertos.example.gov.br/nilo-pecanha
|
|
99
|
+
NILO_PAGE_SIZE=500
|
|
100
|
+
NILO_MAX_PAGES=100
|
|
101
|
+
NILO_TIMEOUT_SECONDS=60
|
|
102
|
+
|
|
103
|
+
# Auth worker
|
|
104
|
+
AUTH_SESSION_TTL_SECONDS=21600
|
|
105
|
+
NILO_CRAWLER_SELENIUM_ENABLED=true
|
|
106
|
+
NILO_CRAWLER_SELENIUM_REMOTE_URL=http://selenium:4444/wd/hub
|
|
107
|
+
NILO_CRAWLER_SELENIUM_TIMEOUT_SECONDS=45
|
|
108
|
+
AUTH_WORKER_HEADLESS=false
|
|
109
|
+
AUTH_WORKER_STORAGE_DIR=/data/auth-worker
|
|
110
|
+
AUTH_WORKER_NAVIGATION_TIMEOUT_MS=45000
|
|
111
|
+
AUTH_WORKER_SCREEN_GEOMETRY=1440x960x24
|
|
112
|
+
AUTH_WORKER_PUBLIC_BASE_URL=http://localhost:6080
|
|
113
|
+
AUTH_WORKER_BROWSER_LOCALE=pt-BR
|
|
114
|
+
AUTH_WORKER_BROWSER_TIMEZONE=America/Sao_Paulo
|
|
115
|
+
AUTH_WORKER_EXTRA_ARGS=--disable-blink-features=AutomationControlled
|
|
116
|
+
AUTH_WORKER_EXTERNAL_CDP_URL=
|
|
117
|
+
|
|
118
|
+
# Selenium
|
|
119
|
+
SELENIUM_PORT=4444
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Imagens
|
|
2
|
+
COMPOSE_PROJECT_NAME=dataif-stg
|
|
3
|
+
DATAIF_IMAGE_REGISTRY=docker.io/dataif
|
|
4
|
+
DATAIF_IMAGE_TAG=latest
|
|
5
|
+
|
|
6
|
+
# Postgres
|
|
7
|
+
POSTGRES_HOST=postgres
|
|
8
|
+
POSTGRES_PORT=5432
|
|
9
|
+
POSTGRES_EXPOSE_PORT=15433
|
|
10
|
+
POSTGRES_SUPERUSER=postgres
|
|
11
|
+
POSTGRES_SUPERUSER_PASSWORD=postgres_stg
|
|
12
|
+
|
|
13
|
+
# Bancos
|
|
14
|
+
DATAIF_DB_NAME=dataif
|
|
15
|
+
AIRFLOW_DB_NAME=airflow
|
|
16
|
+
METABASE_APP_DB_NAME=metabaseapp
|
|
17
|
+
|
|
18
|
+
# Usuarios banco
|
|
19
|
+
DATAIF_ETL_USER=etl_user
|
|
20
|
+
DATAIF_ETL_PASSWORD=etl_password_stg
|
|
21
|
+
DATAIF_METABASE_USER=metabase_user
|
|
22
|
+
DATAIF_METABASE_PASSWORD=metabase_password_stg
|
|
23
|
+
DATAIF_VANNA_USER=vanna_user
|
|
24
|
+
DATAIF_VANNA_PASSWORD=vanna_password_stg
|
|
25
|
+
AIRFLOW_DB_USER=airflow
|
|
26
|
+
AIRFLOW_DB_PASSWORD=airflow_stg
|
|
27
|
+
METABASE_APP_DB_USER=metabase_app
|
|
28
|
+
METABASE_APP_DB_PASSWORD=metabase_app_stg
|
|
29
|
+
|
|
30
|
+
# Airflow
|
|
31
|
+
AIRFLOW_UID=50000
|
|
32
|
+
AIRFLOW_ADMIN_USER=admin
|
|
33
|
+
AIRFLOW_ADMIN_PASSWORD=admin_stg
|
|
34
|
+
AIRFLOW_ADMIN_EMAIL=admin-stg@dataif.local
|
|
35
|
+
AIRFLOW_API_URL=http://airflow-webserver:8080
|
|
36
|
+
|
|
37
|
+
# Metabase
|
|
38
|
+
METABASE_SITE_URL=http://localhost:15173/metabase
|
|
39
|
+
METABASE_API_URL=http://metabase:3000
|
|
40
|
+
METABASE_IMAGE_TAG=v0.60.1
|
|
41
|
+
METABASE_EMBED_SECRET=stg_embed_secret_change_if_shared
|
|
42
|
+
METABASE_ALLOWED_DASHBOARD_IDS=2
|
|
43
|
+
METABASE_DEFAULT_DASHBOARD_ID=2
|
|
44
|
+
METABASE_ADMIN_EMAIL=admin-stg@dataif.local
|
|
45
|
+
METABASE_ADMIN_PASSWORD=admin_stg
|
|
46
|
+
METABASE_ADMIN_FIRST_NAME=DataIF
|
|
47
|
+
METABASE_ADMIN_LAST_NAME=Staging
|
|
48
|
+
METABASE_SITE_NAME=dataif-stg
|
|
49
|
+
METABASE_ALLOW_TRACKING=false
|
|
50
|
+
|
|
51
|
+
# Keycloak
|
|
52
|
+
KEYCLOAK_URL=http://keycloak:8080
|
|
53
|
+
KEYCLOAK_REALM=dataif
|
|
54
|
+
KEYCLOAK_CLIENT_ID=dataif-web
|
|
55
|
+
KEYCLOAK_AUDIENCE=dataif-api
|
|
56
|
+
KEYCLOAK_ADMIN_REALM=master
|
|
57
|
+
KEYCLOAK_ADMIN_CLIENT_ID=admin-cli
|
|
58
|
+
KEYCLOAK_ADMIN=admin
|
|
59
|
+
KEYCLOAK_ADMIN_PASSWORD=admin_stg
|
|
60
|
+
|
|
61
|
+
# Portas
|
|
62
|
+
API_PORT=18000
|
|
63
|
+
WEB_PORT=15173
|
|
64
|
+
AIRFLOW_PORT=18088
|
|
65
|
+
CONNECTOR_AUTH_WEB_PORT=15174
|
|
66
|
+
AUTH_WORKER_PORT=18090
|
|
67
|
+
AUTH_WORKER_VNC_PORT=15900
|
|
68
|
+
AUTH_WORKER_NOVNC_PORT=16080
|
|
69
|
+
METABASE_PORT=13000
|
|
70
|
+
KEYCLOAK_PORT=18081
|
|
71
|
+
VANNA_PORT=19000
|
|
72
|
+
|
|
73
|
+
# Vanna
|
|
74
|
+
VANNA_LLM_PROVIDER=ollama
|
|
75
|
+
VANNA_OLLAMA_BASE_URL=http://ollama:11434
|
|
76
|
+
VANNA_OLLAMA_MODEL=sabia-7b
|
|
77
|
+
VANNA_MARITACA_API_URL=https://chat.maritaca.ai/api/chat/completions
|
|
78
|
+
VANNA_MARITACA_API_KEY=
|
|
79
|
+
VANNA_MARITACA_MODEL=sabia-4
|
|
80
|
+
VANNA_MARITACA_TIMEOUT_SECONDS=60
|
|
81
|
+
VANNA_VECTORSTORE_PATH=/data/vanna/chroma
|
|
82
|
+
VANNA_AUTO_TRAIN=true
|
|
83
|
+
VANNA_ALLOWED_SCHEMA=curated
|
|
84
|
+
ALLOWED_CURATED_VIEWS=
|
|
85
|
+
VANNA_MAX_ROWS=200
|
|
86
|
+
|
|
87
|
+
# Ollama
|
|
88
|
+
OLLAMA_IMAGE_TAG=latest
|
|
89
|
+
OLLAMA_PORT=11134
|
|
90
|
+
OLLAMA_MEM_LIMIT=8192m
|
|
91
|
+
OLLAMA_MODEL_BOOTSTRAP_ENABLED=true
|
|
92
|
+
OLLAMA_MODEL_NAME=sabia-7b
|
|
93
|
+
OLLAMA_MODEL_GGUF_URL=https://huggingface.co/QuantFactory/sabia-7b-GGUF/resolve/main/sabia-7b.Q4_K_M.gguf
|
|
94
|
+
OLLAMA_MODEL_GGUF_FILE=sabia-7b.Q4_K_M.gguf
|
|
95
|
+
HF_TOKEN=
|
|
96
|
+
|
|
97
|
+
# PNP
|
|
98
|
+
NILO_PECANHA_ENDPOINT=https://dadosabertos.example.gov.br/nilo-pecanha
|
|
99
|
+
NILO_PAGE_SIZE=500
|
|
100
|
+
NILO_MAX_PAGES=100
|
|
101
|
+
NILO_TIMEOUT_SECONDS=60
|
|
102
|
+
|
|
103
|
+
# Auth worker
|
|
104
|
+
AUTH_SESSION_TTL_SECONDS=21600
|
|
105
|
+
NILO_CRAWLER_SELENIUM_ENABLED=true
|
|
106
|
+
NILO_CRAWLER_SELENIUM_REMOTE_URL=http://selenium:4444/wd/hub
|
|
107
|
+
NILO_CRAWLER_SELENIUM_TIMEOUT_SECONDS=45
|
|
108
|
+
AUTH_WORKER_HEADLESS=false
|
|
109
|
+
AUTH_WORKER_STORAGE_DIR=/data/auth-worker
|
|
110
|
+
AUTH_WORKER_NAVIGATION_TIMEOUT_MS=45000
|
|
111
|
+
AUTH_WORKER_SCREEN_GEOMETRY=1440x960x24
|
|
112
|
+
AUTH_WORKER_PUBLIC_BASE_URL=http://localhost:16080
|
|
113
|
+
AUTH_WORKER_BROWSER_LOCALE=pt-BR
|
|
114
|
+
AUTH_WORKER_BROWSER_TIMEZONE=America/Sao_Paulo
|
|
115
|
+
AUTH_WORKER_EXTRA_ARGS=--disable-blink-features=AutomationControlled
|
|
116
|
+
AUTH_WORKER_EXTERNAL_CDP_URL=
|
|
117
|
+
|
|
118
|
+
# Selenium
|
|
119
|
+
SELENIUM_PORT=14444
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
FROM apache/airflow:2.9.3-python3.11
|
|
2
|
+
|
|
3
|
+
USER root
|
|
4
|
+
RUN apt-get update \
|
|
5
|
+
&& apt-get install -y --no-install-recommends build-essential \
|
|
6
|
+
&& apt-get clean \
|
|
7
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
8
|
+
|
|
9
|
+
USER airflow
|
|
10
|
+
COPY requirements.txt /requirements.txt
|
|
11
|
+
RUN pip install --no-cache-dir -r /requirements.txt
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
FROM apache/airflow:2.9.3-python3.11
|
|
2
|
+
|
|
3
|
+
USER root
|
|
4
|
+
RUN apt-get update \
|
|
5
|
+
&& apt-get install -y --no-install-recommends build-essential \
|
|
6
|
+
&& apt-get clean \
|
|
7
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
8
|
+
|
|
9
|
+
USER airflow
|
|
10
|
+
COPY infra/airflow/requirements.txt /requirements.txt
|
|
11
|
+
RUN pip install --no-cache-dir -r /requirements.txt
|
|
12
|
+
|
|
13
|
+
COPY --chown=airflow:root pipelines/airflow/dags /opt/airflow/dags
|
|
14
|
+
COPY --chown=airflow:root pipelines/dataif_pipelines /opt/airflow/pipelines/dataif_pipelines
|
|
15
|
+
COPY --chown=airflow:root pipelines/sql /opt/airflow/pipelines/sql
|
|
16
|
+
|
|
17
|
+
ENV PYTHONPATH=/opt/airflow/pipelines
|