@dataif/cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/README.md +16 -0
  2. package/bin/dataif.js +623 -0
  3. package/package.json +26 -0
  4. package/scripts/build-template.mjs +72 -0
  5. package/templates/dataif/README.md +157 -0
  6. package/templates/dataif/infra/.env.example +119 -0
  7. package/templates/dataif/infra/.env.stg.example +119 -0
  8. package/templates/dataif/infra/airflow/Dockerfile +11 -0
  9. package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
  10. package/templates/dataif/infra/airflow/requirements.txt +3 -0
  11. package/templates/dataif/infra/docker-compose.yml +306 -0
  12. package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
  13. package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
  14. package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
  15. package/templates/dataif/infra/keycloak/Dockerfile +4 -0
  16. package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
  17. package/templates/dataif/infra/ollama/Dockerfile +9 -0
  18. package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
  19. package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
  20. package/templates/dataif/infra/postgres/Dockerfile +4 -0
  21. package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
  22. package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
  23. package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
  24. package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
  25. package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
  26. package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
  27. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
  28. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
  29. package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
  30. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
  31. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
  32. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
  33. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
  34. package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
  35. package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
  36. package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
  37. package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
  38. package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
  39. package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
  40. package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
  41. package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
  42. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
  43. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
  44. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
  45. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
  46. package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
  47. package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
  48. package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
  49. package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  50. package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
  51. package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
  52. package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  53. package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  54. package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  55. package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  56. package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  57. package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  58. package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  59. package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
  60. package/templates/dataif/scripts/configure-env.sh +149 -0
  61. package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
  62. package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
  63. package/templates/dataif/scripts/deploy.sh +79 -0
  64. package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
  65. package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
  66. package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
  67. package/templates/dataif/scripts/publish-images.sh +31 -0
  68. package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
  69. package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
  70. package/templates/dataif/services/api/.dockerignore +18 -0
  71. package/templates/dataif/services/api/Dockerfile +12 -0
  72. package/templates/dataif/services/api/app/__init__.py +1 -0
  73. package/templates/dataif/services/api/app/auth.py +48 -0
  74. package/templates/dataif/services/api/app/config.py +59 -0
  75. package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
  76. package/templates/dataif/services/api/app/main.py +2432 -0
  77. package/templates/dataif/services/api/app/metabase_admin.py +191 -0
  78. package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
  79. package/templates/dataif/services/api/app/metabase_embed.py +15 -0
  80. package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
  81. package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
  82. package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
  83. package/templates/dataif/services/api/app/vanna_client.py +32 -0
  84. package/templates/dataif/services/api/requirements.txt +9 -0
  85. package/templates/dataif/services/vanna/.dockerignore +18 -0
  86. package/templates/dataif/services/vanna/Dockerfile +12 -0
  87. package/templates/dataif/services/vanna/app/config.py +57 -0
  88. package/templates/dataif/services/vanna/app/main.py +108 -0
  89. package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
  90. package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
  91. package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
  92. package/templates/dataif/services/vanna/requirements.txt +8 -0
  93. package/templates/dataif/services/web/.dockerignore +13 -0
  94. package/templates/dataif/services/web/Dockerfile +16 -0
  95. package/templates/dataif/services/web/index.html +12 -0
  96. package/templates/dataif/services/web/nginx.conf +74 -0
  97. package/templates/dataif/services/web/package-lock.json +4397 -0
  98. package/templates/dataif/services/web/package.json +32 -0
  99. package/templates/dataif/services/web/postcss.config.mjs +5 -0
  100. package/templates/dataif/services/web/src/App.jsx +2817 -0
  101. package/templates/dataif/services/web/src/adminAuth.js +245 -0
  102. package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
  103. package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
  104. package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
  105. package/templates/dataif/services/web/src/assets/if.svg +0 -0
  106. package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
  107. package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
  108. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
  109. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
  110. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
  111. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
  112. package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
  113. package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
  114. package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
  115. package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
  116. package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
  117. package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
  118. package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
  119. package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
  120. package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
  121. package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
  122. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
  123. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
  124. package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
  125. package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
  126. package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
  127. package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
  128. package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
  129. package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
  130. package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
  131. package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
  132. package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
  133. package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
  134. package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
  135. package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
  136. package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
  137. package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
  138. package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
  139. package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
  140. package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
  141. package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
  142. package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
  143. package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
  144. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
  145. package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
  146. package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
  147. package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
  148. package/templates/dataif/services/web/src/main.jsx +14 -0
  149. package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
  150. package/templates/dataif/services/web/src/styles/globals.css +60 -0
  151. package/templates/dataif/services/web/src/styles/theme.css +1326 -0
  152. package/templates/dataif/services/web/src/styles/typography.css +430 -0
  153. package/templates/dataif/services/web/src/styles.css +1287 -0
  154. package/templates/dataif/services/web/src/utils/cx.ts +24 -0
  155. package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
  156. package/templates/dataif/services/web/vite.config.js +14 -0
  157. package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
  158. package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
  159. package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
  160. package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
  161. package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
  162. package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
  163. package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
  164. package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
  165. package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
  166. package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
  167. package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
  168. package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
  169. package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
  170. package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
  171. package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
  172. package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
  173. package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
  174. package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
  175. package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
  176. package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
  177. package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
  178. package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
  179. package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
  180. package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
  181. package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
  182. package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
  183. package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env node
2
+ import fs from "node:fs";
3
+ import path from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+
6
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
7
+ const packageRoot = path.resolve(__dirname, "..");
8
+ const repoRoot = path.resolve(packageRoot, "..", "..");
9
+ const templateRoot = path.join(packageRoot, "templates", "dataif");
10
+
11
+ const entries = [
12
+ "infra",
13
+ "pipelines",
14
+ "scripts",
15
+ "services",
16
+ "sql",
17
+ "README.md"
18
+ ];
19
+
20
+ const ignoredNames = new Set([
21
+ ".env",
22
+ ".git",
23
+ ".pytest_cache",
24
+ "__pycache__",
25
+ "node_modules",
26
+ "dist",
27
+ "build",
28
+ ".DS_Store"
29
+ ]);
30
+
31
+ function shouldCopy(src) {
32
+ const name = path.basename(src);
33
+ if (ignoredNames.has(name)) {
34
+ return false;
35
+ }
36
+ if (name.endsWith(".pyc")) {
37
+ return false;
38
+ }
39
+ return true;
40
+ }
41
+
42
+ function copyRecursive(src, dest) {
43
+ if (!shouldCopy(src)) {
44
+ return;
45
+ }
46
+
47
+ const stat = fs.statSync(src);
48
+ if (stat.isDirectory()) {
49
+ fs.mkdirSync(dest, { recursive: true });
50
+ for (const entry of fs.readdirSync(src)) {
51
+ copyRecursive(path.join(src, entry), path.join(dest, entry));
52
+ }
53
+ return;
54
+ }
55
+
56
+ fs.mkdirSync(path.dirname(dest), { recursive: true });
57
+ fs.copyFileSync(src, dest);
58
+ fs.chmodSync(dest, stat.mode);
59
+ }
60
+
61
+ fs.rmSync(templateRoot, { recursive: true, force: true });
62
+ fs.mkdirSync(templateRoot, { recursive: true });
63
+
64
+ for (const entry of entries) {
65
+ const src = path.join(repoRoot, entry);
66
+ if (!fs.existsSync(src)) {
67
+ throw new Error(`Arquivo esperado nao encontrado: ${src}`);
68
+ }
69
+ copyRecursive(src, path.join(templateRoot, entry));
70
+ }
71
+
72
+ console.log(`Template DataIF gerado em ${templateRoot}`);
@@ -0,0 +1,157 @@
1
+ # dataif
2
+
3
+ Plataforma conteinerizada para ingestão de dados governamentais em PostgreSQL, com operacao administrativa via API e UI, ingestão no Airflow, dashboards no Metabase e consulta assistida via Vanna.
4
+
5
+ ## Estado atual da PNP
6
+ - O conector da Plataforma Nilo Pecanha opera somente em `powerbi_microdados`.
7
+ - A origem principal e o relatorio publico do Power BI com os links de microdados.
8
+ - O fluxo legado com browser assistido foi removido da trilha operacional.
9
+ - O Airflow ficou restrito a validar fontes e carregar dados na camada `raw`.
10
+ - O tratamento analitico posterior em `staging` e a publicacao em `curated` acontecem manualmente via SGBD, fora do Airflow.
11
+
12
+ ## Stack
13
+ - PostgreSQL
14
+ - Apache Airflow
15
+ - FastAPI
16
+ - React + Vite
17
+ - Metabase 60
18
+ - Vanna
19
+ - Keycloak
20
+
21
+ ## Estrutura
22
+ - `infra/`: Docker Compose, imagens e bootstrap da stack
23
+ - `pipelines/`: DAGs e conectores
24
+ - `services/api/`: API administrativa e embeds
25
+ - `services/web/`: frontend React
26
+ - `services/vanna/`: servico de NL2SQL
27
+ - `sql/`: schemas, tabelas e views curadas
28
+ - `docs/`: arquitetura e material de apoio
29
+
30
+ ## Subida rapida
31
+ 1. `./scripts/deploy.sh stg`
32
+ 2. Para producao local em nova maquina: `./scripts/deploy.sh prod`
33
+ 3. Acessos padrao:
34
+ - Staging Web: `http://localhost:15173`
35
+ - Producao Web: porta definida no configurador
36
+ - API: `/api` via Web ou porta configurada
37
+ - Airflow via Web: `/airflow/`
38
+ - Metabase via Web: `/metabase/`
39
+
40
+ Versao padrao do Metabase:
41
+ - `METABASE_IMAGE_TAG=v0.60.1`
42
+
43
+ ## Instalador npm
44
+
45
+ Tambem existe uma CLI npm para preparar uma maquina nova sem exigir que o usuario conheca os scripts internos:
46
+
47
+ ```bash
48
+ npx @dataif/cli install
49
+ npx @dataif/cli deploy
50
+ ```
51
+
52
+ O instalador cria uma copia local da stack em `~/.dataif/current`, valida Docker/Docker Compose, coleta as credenciais de forma interativa e entao sobe os containers. Para usar uma pasta especifica:
53
+
54
+ ```bash
55
+ npx @dataif/cli install --dir ./dataif-local
56
+ npx @dataif/cli deploy --dir ./dataif-local --mode prod
57
+ ```
58
+
59
+ Durante o desenvolvimento do pacote:
60
+
61
+ ```bash
62
+ cd packages/dataif-cli
63
+ npm run smoke
64
+ npm pack --dry-run
65
+ ```
66
+
67
+ ## Guia de uso local
68
+
69
+ Pre-requisitos:
70
+ - Docker Engine com Docker Compose v2
71
+ - 6 GB de RAM livres para stack basica
72
+ - 12 GB de RAM livres se usar Ollama local
73
+
74
+ Subir ambiente de teste/staging:
75
+
76
+ ```bash
77
+ ./scripts/deploy.sh stg
78
+ ```
79
+
80
+ Esse modo usa `infra/.env.stg.example`, cria `infra/.env` com valores presetados e sobe a mesma stack Docker do projeto. Use para desenvolvimento, testes e demonstracoes locais. Para recriar `infra/.env` de staging:
81
+
82
+ ```bash
83
+ DATAIF_FORCE_ENV=true ./scripts/deploy.sh stg
84
+ ```
85
+
86
+ Subir producao local em nova maquina:
87
+
88
+ ```bash
89
+ ./scripts/deploy.sh prod
90
+ ```
91
+
92
+ Esse modo usa `infra/.env.example` apenas como template versionado, chama `scripts/configure-env.sh`, gera segredos e grava `infra/.env`. Nao edite `infra/.env.example` para uma instancia real. Configure senhas e `METABASE_EMBED_SECRET` antes do primeiro `up`, pois o Postgres inicializa usuarios somente na criacao do volume.
93
+
94
+ Validar configuração sem subir:
95
+
96
+ ```bash
97
+ cd infra
98
+ docker compose --env-file .env config >/dev/null
99
+ ```
100
+
101
+ Ativar LLM local com Ollama:
102
+
103
+ ```bash
104
+ ./scripts/deploy.sh stg --llm
105
+ # ou
106
+ ./scripts/deploy.sh prod --llm
107
+ ```
108
+
109
+ Refazer do zero na maquina local:
110
+
111
+ ```bash
112
+ cd infra
113
+ docker compose --env-file .env down -v
114
+ cd ..
115
+ ./scripts/deploy.sh stg
116
+ ```
117
+
118
+ Depois da instalacao, o provider/modelo do Vanna pode ser ajustado pela tela `Configurações Admin`. Sem Ollama ativo e sem chave Maritaca, o servico Vanna permanece disponivel, mas respostas por LLM ficam indisponiveis ate configurar um provider.
119
+
120
+ ## Fluxo de dados da PNP
121
+ 1. O admin acessa a area administrativa via Keycloak.
122
+ 2. A UI consulta o catalogo publico da PNP no Power BI.
123
+ 3. O admin cria uma conexão selecionando anos, tipos e cron.
124
+ 4. O Airflow dispara a validação ou a ingestão da instancia.
125
+ 5. O conector baixa os arquivos publicos, grava manifestos em `raw.nilo_pecanha_assets` e linhas parseadas em `raw.nilo_pecanha_records`.
126
+ 6. O tratamento de `raw` para `staging` e a promocao final para `curated` sao feitos manualmente via SGBD.
127
+ 7. Metabase e Vanna consomem a camada `curated`.
128
+
129
+ ## Vanna AI local
130
+ O Vanna usa apenas relacoes qualificadas no schema `curated` e e chamado pela tela `Inicio`.
131
+
132
+ Para usar LLM local com Ollama:
133
+
134
+ ```bash
135
+ ./scripts/use-vanna-ollama.sh
136
+ ```
137
+
138
+ O comando define `VANNA_LLM_PROVIDER=ollama`, preserva `VANNA_MARITACA_API_KEY`, sobe o servico Ollama, carrega/importa o modelo configurado e reinicia o Vanna. Use `./scripts/use-vanna-ollama.sh --no-bootstrap` quando o modelo ja existir no Ollama e voce quiser pular apenas o bootstrap.
139
+
140
+ Mantenha `VANNA_ALLOWED_SCHEMA=curated`; novas tabelas, views e materialized views em `curated` entram no treinamento quando o `vanna_user` tiver `SELECT`.
141
+
142
+ A configuração efetiva de provider/modelo tambem pode ser ajustada pela tela `Configurações Admin`. Os overrides ficam persistidos em banco e passam a valer sem editar `.env`; o `.env` continua como bootstrap inicial para credenciais, portas e valores default.
143
+
144
+ Tambem e possivel usar a API da Maritaca com `VANNA_LLM_PROVIDER=maritaca`, `VANNA_MARITACA_API_KEY` e `VANNA_MARITACA_MODEL=sabia-4`. O modelo Sabiá local via Ollama/GGUF continua opcional e nao e redistribuido nas imagens DockerHub do projeto.
145
+
146
+ ## Admins e Metabase
147
+ A tela `Configurações Admin` cria e remove usuarios administrativos em dois sistemas:
148
+
149
+ - Keycloak, como identidade de login do produto
150
+ - Metabase, como administradores da instancia analitica
151
+
152
+ O vinculo entre eles usa o email do usuario. Se a criacao no Metabase falhar, a API tenta desfazer a criacao correspondente no Keycloak para evitar estado parcial.
153
+
154
+ ## Observacoes
155
+ - O armazenamento operacional continua em `config.connector_endpoints`.
156
+ - O frontend ja foi reorganizado em paginas de `Inicio`, `Pipelines`, `Conexoes`, `Dashboards` e `SQL`.
157
+ - A documentacao em `docs/` descreve o fluxo oficial atual: `Power BI -> Airflow -> raw`, com `staging` e `curated` fora da orquestração.
@@ -0,0 +1,119 @@
1
+ # Imagens
2
+ COMPOSE_PROJECT_NAME=dataif
3
+ DATAIF_IMAGE_REGISTRY=docker.io/dataif
4
+ DATAIF_IMAGE_TAG=latest
5
+
6
+ POSTGRES_HOST=postgres
7
+ # Postgres
8
+ POSTGRES_PORT=5432
9
+ POSTGRES_EXPOSE_PORT=5433
10
+ POSTGRES_SUPERUSER=postgres
11
+ POSTGRES_SUPERUSER_PASSWORD=postgres
12
+
13
+ # Bancos
14
+ DATAIF_DB_NAME=dataif
15
+ AIRFLOW_DB_NAME=airflow
16
+ METABASE_APP_DB_NAME=metabaseapp
17
+
18
+ # Usuarios banco
19
+ DATAIF_ETL_USER=etl_user
20
+ DATAIF_ETL_PASSWORD=etl_password
21
+ DATAIF_METABASE_USER=metabase_user
22
+ DATAIF_METABASE_PASSWORD=metabase_password
23
+ DATAIF_VANNA_USER=vanna_user
24
+ DATAIF_VANNA_PASSWORD=vanna_password
25
+ AIRFLOW_DB_USER=airflow
26
+ AIRFLOW_DB_PASSWORD=airflow
27
+ METABASE_APP_DB_USER=metabase_app
28
+ METABASE_APP_DB_PASSWORD=metabase_app
29
+
30
+ # Airflow
31
+ AIRFLOW_UID=50000
32
+ AIRFLOW_ADMIN_USER=admin
33
+ AIRFLOW_ADMIN_PASSWORD=admin
34
+ AIRFLOW_ADMIN_EMAIL=admin@dataif.local
35
+ AIRFLOW_API_URL=http://airflow-webserver:8080
36
+
37
+ # Metabase
38
+ METABASE_SITE_URL=http://localhost:5173/metabase
39
+ METABASE_API_URL=http://metabase:3000
40
+ METABASE_IMAGE_TAG=v0.60.1
41
+ METABASE_EMBED_SECRET=replace_with_secure_secret
42
+ METABASE_ALLOWED_DASHBOARD_IDS=2
43
+ METABASE_DEFAULT_DASHBOARD_ID=2
44
+ METABASE_ADMIN_EMAIL=admin@dataif.local
45
+ METABASE_ADMIN_PASSWORD=admin
46
+ METABASE_ADMIN_FIRST_NAME=DataIF
47
+ METABASE_ADMIN_LAST_NAME=Metabase
48
+ METABASE_SITE_NAME=dataif
49
+ METABASE_ALLOW_TRACKING=false
50
+
51
+ # Keycloak
52
+ KEYCLOAK_URL=http://keycloak:8080
53
+ KEYCLOAK_REALM=dataif
54
+ KEYCLOAK_CLIENT_ID=dataif-web
55
+ KEYCLOAK_AUDIENCE=dataif-api
56
+ KEYCLOAK_ADMIN_REALM=master
57
+ KEYCLOAK_ADMIN_CLIENT_ID=admin-cli
58
+ KEYCLOAK_ADMIN=admin
59
+ KEYCLOAK_ADMIN_PASSWORD=admin
60
+
61
+ # Portas
62
+ API_PORT=8000
63
+ WEB_PORT=5173
64
+ AIRFLOW_PORT=8088
65
+ CONNECTOR_AUTH_WEB_PORT=5174
66
+ AUTH_WORKER_PORT=8090
67
+ AUTH_WORKER_VNC_PORT=5900
68
+ AUTH_WORKER_NOVNC_PORT=6080
69
+ METABASE_PORT=3000
70
+ KEYCLOAK_PORT=8081
71
+ VANNA_PORT=9000
72
+
73
+ # Vanna
74
+ VANNA_LLM_PROVIDER=ollama
75
+ VANNA_OLLAMA_BASE_URL=http://ollama:11434
76
+ VANNA_OLLAMA_MODEL=sabia-7b
77
+ VANNA_MARITACA_API_URL=https://chat.maritaca.ai/api/chat/completions
78
+ VANNA_MARITACA_API_KEY=
79
+ VANNA_MARITACA_MODEL=sabia-4
80
+ VANNA_MARITACA_TIMEOUT_SECONDS=60
81
+ VANNA_VECTORSTORE_PATH=/data/vanna/chroma
82
+ VANNA_AUTO_TRAIN=true
83
+ VANNA_ALLOWED_SCHEMA=curated
84
+ ALLOWED_CURATED_VIEWS=
85
+ VANNA_MAX_ROWS=200
86
+
87
+ # Ollama
88
+ OLLAMA_IMAGE_TAG=latest
89
+ OLLAMA_PORT=11434
90
+ OLLAMA_MEM_LIMIT=8192m
91
+ OLLAMA_MODEL_BOOTSTRAP_ENABLED=true
92
+ OLLAMA_MODEL_NAME=sabia-7b
93
+ OLLAMA_MODEL_GGUF_URL=https://huggingface.co/QuantFactory/sabia-7b-GGUF/resolve/main/sabia-7b.Q4_K_M.gguf
94
+ OLLAMA_MODEL_GGUF_FILE=sabia-7b.Q4_K_M.gguf
95
+ HF_TOKEN=
96
+
97
+ # PNP
98
+ NILO_PECANHA_ENDPOINT=https://dadosabertos.example.gov.br/nilo-pecanha
99
+ NILO_PAGE_SIZE=500
100
+ NILO_MAX_PAGES=100
101
+ NILO_TIMEOUT_SECONDS=60
102
+
103
+ # Auth worker
104
+ AUTH_SESSION_TTL_SECONDS=21600
105
+ NILO_CRAWLER_SELENIUM_ENABLED=true
106
+ NILO_CRAWLER_SELENIUM_REMOTE_URL=http://selenium:4444/wd/hub
107
+ NILO_CRAWLER_SELENIUM_TIMEOUT_SECONDS=45
108
+ AUTH_WORKER_HEADLESS=false
109
+ AUTH_WORKER_STORAGE_DIR=/data/auth-worker
110
+ AUTH_WORKER_NAVIGATION_TIMEOUT_MS=45000
111
+ AUTH_WORKER_SCREEN_GEOMETRY=1440x960x24
112
+ AUTH_WORKER_PUBLIC_BASE_URL=http://localhost:6080
113
+ AUTH_WORKER_BROWSER_LOCALE=pt-BR
114
+ AUTH_WORKER_BROWSER_TIMEZONE=America/Sao_Paulo
115
+ AUTH_WORKER_EXTRA_ARGS=--disable-blink-features=AutomationControlled
116
+ AUTH_WORKER_EXTERNAL_CDP_URL=
117
+
118
+ # Selenium
119
+ SELENIUM_PORT=4444
@@ -0,0 +1,119 @@
1
+ # Imagens
2
+ COMPOSE_PROJECT_NAME=dataif-stg
3
+ DATAIF_IMAGE_REGISTRY=docker.io/dataif
4
+ DATAIF_IMAGE_TAG=latest
5
+
6
+ # Postgres
7
+ POSTGRES_HOST=postgres
8
+ POSTGRES_PORT=5432
9
+ POSTGRES_EXPOSE_PORT=15433
10
+ POSTGRES_SUPERUSER=postgres
11
+ POSTGRES_SUPERUSER_PASSWORD=postgres_stg
12
+
13
+ # Bancos
14
+ DATAIF_DB_NAME=dataif
15
+ AIRFLOW_DB_NAME=airflow
16
+ METABASE_APP_DB_NAME=metabaseapp
17
+
18
+ # Usuarios banco
19
+ DATAIF_ETL_USER=etl_user
20
+ DATAIF_ETL_PASSWORD=etl_password_stg
21
+ DATAIF_METABASE_USER=metabase_user
22
+ DATAIF_METABASE_PASSWORD=metabase_password_stg
23
+ DATAIF_VANNA_USER=vanna_user
24
+ DATAIF_VANNA_PASSWORD=vanna_password_stg
25
+ AIRFLOW_DB_USER=airflow
26
+ AIRFLOW_DB_PASSWORD=airflow_stg
27
+ METABASE_APP_DB_USER=metabase_app
28
+ METABASE_APP_DB_PASSWORD=metabase_app_stg
29
+
30
+ # Airflow
31
+ AIRFLOW_UID=50000
32
+ AIRFLOW_ADMIN_USER=admin
33
+ AIRFLOW_ADMIN_PASSWORD=admin_stg
34
+ AIRFLOW_ADMIN_EMAIL=admin-stg@dataif.local
35
+ AIRFLOW_API_URL=http://airflow-webserver:8080
36
+
37
+ # Metabase
38
+ METABASE_SITE_URL=http://localhost:15173/metabase
39
+ METABASE_API_URL=http://metabase:3000
40
+ METABASE_IMAGE_TAG=v0.60.1
41
+ METABASE_EMBED_SECRET=stg_embed_secret_change_if_shared
42
+ METABASE_ALLOWED_DASHBOARD_IDS=2
43
+ METABASE_DEFAULT_DASHBOARD_ID=2
44
+ METABASE_ADMIN_EMAIL=admin-stg@dataif.local
45
+ METABASE_ADMIN_PASSWORD=admin_stg
46
+ METABASE_ADMIN_FIRST_NAME=DataIF
47
+ METABASE_ADMIN_LAST_NAME=Staging
48
+ METABASE_SITE_NAME=dataif-stg
49
+ METABASE_ALLOW_TRACKING=false
50
+
51
+ # Keycloak
52
+ KEYCLOAK_URL=http://keycloak:8080
53
+ KEYCLOAK_REALM=dataif
54
+ KEYCLOAK_CLIENT_ID=dataif-web
55
+ KEYCLOAK_AUDIENCE=dataif-api
56
+ KEYCLOAK_ADMIN_REALM=master
57
+ KEYCLOAK_ADMIN_CLIENT_ID=admin-cli
58
+ KEYCLOAK_ADMIN=admin
59
+ KEYCLOAK_ADMIN_PASSWORD=admin_stg
60
+
61
+ # Portas
62
+ API_PORT=18000
63
+ WEB_PORT=15173
64
+ AIRFLOW_PORT=18088
65
+ CONNECTOR_AUTH_WEB_PORT=15174
66
+ AUTH_WORKER_PORT=18090
67
+ AUTH_WORKER_VNC_PORT=15900
68
+ AUTH_WORKER_NOVNC_PORT=16080
69
+ METABASE_PORT=13000
70
+ KEYCLOAK_PORT=18081
71
+ VANNA_PORT=19000
72
+
73
+ # Vanna
74
+ VANNA_LLM_PROVIDER=ollama
75
+ VANNA_OLLAMA_BASE_URL=http://ollama:11434
76
+ VANNA_OLLAMA_MODEL=sabia-7b
77
+ VANNA_MARITACA_API_URL=https://chat.maritaca.ai/api/chat/completions
78
+ VANNA_MARITACA_API_KEY=
79
+ VANNA_MARITACA_MODEL=sabia-4
80
+ VANNA_MARITACA_TIMEOUT_SECONDS=60
81
+ VANNA_VECTORSTORE_PATH=/data/vanna/chroma
82
+ VANNA_AUTO_TRAIN=true
83
+ VANNA_ALLOWED_SCHEMA=curated
84
+ ALLOWED_CURATED_VIEWS=
85
+ VANNA_MAX_ROWS=200
86
+
87
+ # Ollama
88
+ OLLAMA_IMAGE_TAG=latest
89
+ OLLAMA_PORT=11134
90
+ OLLAMA_MEM_LIMIT=8192m
91
+ OLLAMA_MODEL_BOOTSTRAP_ENABLED=true
92
+ OLLAMA_MODEL_NAME=sabia-7b
93
+ OLLAMA_MODEL_GGUF_URL=https://huggingface.co/QuantFactory/sabia-7b-GGUF/resolve/main/sabia-7b.Q4_K_M.gguf
94
+ OLLAMA_MODEL_GGUF_FILE=sabia-7b.Q4_K_M.gguf
95
+ HF_TOKEN=
96
+
97
+ # PNP
98
+ NILO_PECANHA_ENDPOINT=https://dadosabertos.example.gov.br/nilo-pecanha
99
+ NILO_PAGE_SIZE=500
100
+ NILO_MAX_PAGES=100
101
+ NILO_TIMEOUT_SECONDS=60
102
+
103
+ # Auth worker
104
+ AUTH_SESSION_TTL_SECONDS=21600
105
+ NILO_CRAWLER_SELENIUM_ENABLED=true
106
+ NILO_CRAWLER_SELENIUM_REMOTE_URL=http://selenium:4444/wd/hub
107
+ NILO_CRAWLER_SELENIUM_TIMEOUT_SECONDS=45
108
+ AUTH_WORKER_HEADLESS=false
109
+ AUTH_WORKER_STORAGE_DIR=/data/auth-worker
110
+ AUTH_WORKER_NAVIGATION_TIMEOUT_MS=45000
111
+ AUTH_WORKER_SCREEN_GEOMETRY=1440x960x24
112
+ AUTH_WORKER_PUBLIC_BASE_URL=http://localhost:16080
113
+ AUTH_WORKER_BROWSER_LOCALE=pt-BR
114
+ AUTH_WORKER_BROWSER_TIMEZONE=America/Sao_Paulo
115
+ AUTH_WORKER_EXTRA_ARGS=--disable-blink-features=AutomationControlled
116
+ AUTH_WORKER_EXTERNAL_CDP_URL=
117
+
118
+ # Selenium
119
+ SELENIUM_PORT=14444
@@ -0,0 +1,11 @@
1
+ FROM apache/airflow:2.9.3-python3.11
2
+
3
+ USER root
4
+ RUN apt-get update \
5
+ && apt-get install -y --no-install-recommends build-essential \
6
+ && apt-get clean \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ USER airflow
10
+ COPY requirements.txt /requirements.txt
11
+ RUN pip install --no-cache-dir -r /requirements.txt
@@ -0,0 +1,17 @@
1
+ FROM apache/airflow:2.9.3-python3.11
2
+
3
+ USER root
4
+ RUN apt-get update \
5
+ && apt-get install -y --no-install-recommends build-essential \
6
+ && apt-get clean \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ USER airflow
10
+ COPY infra/airflow/requirements.txt /requirements.txt
11
+ RUN pip install --no-cache-dir -r /requirements.txt
12
+
13
+ COPY --chown=airflow:root pipelines/airflow/dags /opt/airflow/dags
14
+ COPY --chown=airflow:root pipelines/dataif_pipelines /opt/airflow/pipelines/dataif_pipelines
15
+ COPY --chown=airflow:root pipelines/sql /opt/airflow/pipelines/sql
16
+
17
+ ENV PYTHONPATH=/opt/airflow/pipelines
@@ -0,0 +1,3 @@
1
+ psycopg2-binary==2.9.9
2
+ requests==2.32.3
3
+ python-dateutil==2.9.0.post0