@dataif/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/bin/dataif.js +623 -0
- package/package.json +26 -0
- package/scripts/build-template.mjs +72 -0
- package/templates/dataif/README.md +157 -0
- package/templates/dataif/infra/.env.example +119 -0
- package/templates/dataif/infra/.env.stg.example +119 -0
- package/templates/dataif/infra/airflow/Dockerfile +11 -0
- package/templates/dataif/infra/airflow/Dockerfile.release +17 -0
- package/templates/dataif/infra/airflow/requirements.txt +3 -0
- package/templates/dataif/infra/docker-compose.yml +306 -0
- package/templates/dataif/infra/init-db/01-init-dataif.sh +129 -0
- package/templates/dataif/infra/init-db/pnp-curated-views.sqlinc +444 -0
- package/templates/dataif/infra/init-db/pnp-raw-staging-curated.sqlinc +701 -0
- package/templates/dataif/infra/keycloak/Dockerfile +4 -0
- package/templates/dataif/infra/keycloak/realm-dataif.json +73 -0
- package/templates/dataif/infra/ollama/Dockerfile +9 -0
- package/templates/dataif/infra/ollama/bootstrap-model.sh +100 -0
- package/templates/dataif/infra/ollama/sabia-7b.Modelfile +14 -0
- package/templates/dataif/infra/postgres/Dockerfile +4 -0
- package/templates/dataif/pipelines/airflow/dags/generated/.gitkeep +1 -0
- package/templates/dataif/pipelines/airflow/dags/generated/2020_financeiro_fcc6f1f3_sync.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/airflow/pnp_pipeline_factory.py +167 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/connector.py +28 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/base/types.py +14 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/config.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/connector.py +558 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/powerbi_microdados.py +728 -0
- package/templates/dataif/pipelines/dataif_pipelines/connectors/nilo_pecanha/transform.py +296 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/jobs/nilo_pipeline.py +112 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/__init__.py +21 -0
- package/templates/dataif/pipelines/dataif_pipelines/orchestration/pnp_workflow.py +783 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/__init__.py +1 -0
- package/templates/dataif/pipelines/dataif_pipelines/repositories/pnp_raw_repository.py +860 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/__init__.py +19 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_curated_service.py +66 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_download_service.py +534 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_quality_service.py +9 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_raw_ingestion_service.py +124 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/pnp_staging_service.py +271 -0
- package/templates/dataif/pipelines/dataif_pipelines/services/powerbi_catalog_service.py +159 -0
- package/templates/dataif/pipelines/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/pipelines/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/pipelines/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/pipelines/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/pipelines/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/pipelines/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/pipelines/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/pipelines/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/pipelines/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/pipelines/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/pipelines/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/pipelines/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
- package/templates/dataif/scripts/configure-env.sh +149 -0
- package/templates/dataif/scripts/create_metabase_pnp_dashboard.py +943 -0
- package/templates/dataif/scripts/create_metabase_pnp_matriculas_dashboard.py +580 -0
- package/templates/dataif/scripts/deploy.sh +79 -0
- package/templates/dataif/scripts/fix_metabase_template_tag_ids.py +91 -0
- package/templates/dataif/scripts/pnp_powerbi_microdados_probe.py +14 -0
- package/templates/dataif/scripts/pnp_validate_raw_run.py +330 -0
- package/templates/dataif/scripts/publish-images.sh +31 -0
- package/templates/dataif/scripts/sync_metabase_dashboard_field_filters.py +241 -0
- package/templates/dataif/scripts/use-vanna-ollama.sh +139 -0
- package/templates/dataif/services/api/.dockerignore +18 -0
- package/templates/dataif/services/api/Dockerfile +12 -0
- package/templates/dataif/services/api/app/__init__.py +1 -0
- package/templates/dataif/services/api/app/auth.py +48 -0
- package/templates/dataif/services/api/app/config.py +59 -0
- package/templates/dataif/services/api/app/keycloak_admin.py +215 -0
- package/templates/dataif/services/api/app/main.py +2432 -0
- package/templates/dataif/services/api/app/metabase_admin.py +191 -0
- package/templates/dataif/services/api/app/metabase_bootstrap.py +44 -0
- package/templates/dataif/services/api/app/metabase_embed.py +15 -0
- package/templates/dataif/services/api/app/pnp_dag_provisioner.py +113 -0
- package/templates/dataif/services/api/app/pnp_instance_repository.py +951 -0
- package/templates/dataif/services/api/app/pnp_powerbi.py +438 -0
- package/templates/dataif/services/api/app/vanna_client.py +32 -0
- package/templates/dataif/services/api/requirements.txt +9 -0
- package/templates/dataif/services/vanna/.dockerignore +18 -0
- package/templates/dataif/services/vanna/Dockerfile +12 -0
- package/templates/dataif/services/vanna/app/config.py +57 -0
- package/templates/dataif/services/vanna/app/main.py +108 -0
- package/templates/dataif/services/vanna/app/runtime_config.py +114 -0
- package/templates/dataif/services/vanna/app/sql_guard.py +123 -0
- package/templates/dataif/services/vanna/app/vanna_engine.py +382 -0
- package/templates/dataif/services/vanna/requirements.txt +8 -0
- package/templates/dataif/services/web/.dockerignore +13 -0
- package/templates/dataif/services/web/Dockerfile +16 -0
- package/templates/dataif/services/web/index.html +12 -0
- package/templates/dataif/services/web/nginx.conf +74 -0
- package/templates/dataif/services/web/package-lock.json +4397 -0
- package/templates/dataif/services/web/package.json +32 -0
- package/templates/dataif/services/web/postcss.config.mjs +5 -0
- package/templates/dataif/services/web/src/App.jsx +2817 -0
- package/templates/dataif/services/web/src/adminAuth.js +245 -0
- package/templates/dataif/services/web/src/assets/avatar_placeholder.png +0 -0
- package/templates/dataif/services/web/src/assets/github_logo_icon_229278.svg +1 -0
- package/templates/dataif/services/web/src/assets/if-logo.png +0 -0
- package/templates/dataif/services/web/src/assets/if.svg +0 -0
- package/templates/dataif/services/web/src/assets/pnp-horizontal.svg +1 -0
- package/templates/dataif/services/web/src/components/AppHeader.jsx +233 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/mobile-header.tsx +56 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-account-card.tsx +209 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item-button.tsx +67 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-item.tsx +108 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/base-components/nav-list.tsx +83 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/config.ts +23 -0
- package/templates/dataif/services/web/src/components/application/app-navigation/header-navigation.tsx +240 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-base.tsx +376 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-dot.tsx +52 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination-line.tsx +48 -0
- package/templates/dataif/services/web/src/components/application/pagination/pagination.tsx +328 -0
- package/templates/dataif/services/web/src/components/application/tabs/tabs.tsx +223 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar-label-group.tsx +28 -0
- package/templates/dataif/services/web/src/components/base/avatar/avatar.tsx +129 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-add-button.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-company-icon.tsx +24 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/avatar-online-indicator.tsx +29 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/index.tsx +4 -0
- package/templates/dataif/services/web/src/components/base/avatar/base-components/verified-tick.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/badges/badge-types.ts +264 -0
- package/templates/dataif/services/web/src/components/base/badges/badges.tsx +415 -0
- package/templates/dataif/services/web/src/components/base/button-group/button-group.tsx +104 -0
- package/templates/dataif/services/web/src/components/base/buttons/button.tsx +267 -0
- package/templates/dataif/services/web/src/components/base/input/hint-text.tsx +31 -0
- package/templates/dataif/services/web/src/components/base/input/input.tsx +269 -0
- package/templates/dataif/services/web/src/components/base/input/label.tsx +48 -0
- package/templates/dataif/services/web/src/components/base/radio-buttons/radio-buttons.tsx +127 -0
- package/templates/dataif/services/web/src/components/base/select/combobox.tsx +150 -0
- package/templates/dataif/services/web/src/components/base/select/multi-select.tsx +361 -0
- package/templates/dataif/services/web/src/components/base/select/popover.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/select/select-item.tsx +95 -0
- package/templates/dataif/services/web/src/components/base/select/select-native.tsx +67 -0
- package/templates/dataif/services/web/src/components/base/select/select.tsx +144 -0
- package/templates/dataif/services/web/src/components/base/tags/base-components/tag-close-x.tsx +32 -0
- package/templates/dataif/services/web/src/components/base/tooltip/tooltip.tsx +107 -0
- package/templates/dataif/services/web/src/components/foundations/dot-icon.tsx +22 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo-minimal.tsx +170 -0
- package/templates/dataif/services/web/src/components/foundations/logo/untitledui-logo.tsx +58 -0
- package/templates/dataif/services/web/src/hooks/use-breakpoint.ts +34 -0
- package/templates/dataif/services/web/src/hooks/use-resize-observer.ts +67 -0
- package/templates/dataif/services/web/src/main.jsx +14 -0
- package/templates/dataif/services/web/src/providers/theme-provider.jsx +62 -0
- package/templates/dataif/services/web/src/styles/globals.css +60 -0
- package/templates/dataif/services/web/src/styles/theme.css +1326 -0
- package/templates/dataif/services/web/src/styles/typography.css +430 -0
- package/templates/dataif/services/web/src/styles.css +1287 -0
- package/templates/dataif/services/web/src/utils/cx.ts +24 -0
- package/templates/dataif/services/web/src/utils/is-react-component.ts +33 -0
- package/templates/dataif/services/web/vite.config.js +14 -0
- package/templates/dataif/sql/ddl/001_schemas.sql +6 -0
- package/templates/dataif/sql/ddl/003_pnp_raw_staging_curated.sql +699 -0
- package/templates/dataif/sql/migrations/001_pnp_phase1_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/002_pnp_phase2_admin_config_backfill.sql +184 -0
- package/templates/dataif/sql/migrations/003_pnp_phase3_raw_tabular_backfill.sql +3 -0
- package/templates/dataif/sql/migrations/004_pnp_phase3_raw_backfill_support_index.sql +3 -0
- package/templates/dataif/sql/migrations/005_pnp_phase7_staging_support_indexes.sql +2 -0
- package/templates/dataif/sql/migrations/006_pnp_phase7_staging_autovacuum_tuning.sql +2 -0
- package/templates/dataif/sql/migrations/007_pnp_phase7b_run_packages.sql +20 -0
- package/templates/dataif/sql/migrations/008_pnp_phase7a_pipeline_endpoints.sql +169 -0
- package/templates/dataif/sql/migrations/009_pnp_phase8_curated.sql +35 -0
- package/templates/dataif/sql/migrations/010_pnp_phase10_staging_incremental_upsert.sql +3 -0
- package/templates/dataif/sql/migrations/010_pnp_pipeline_uuid.sql +51 -0
- package/templates/dataif/sql/migrations/011_app_settings.sql +7 -0
- package/templates/dataif/sql/staging/020_pnp_matriculas.sql +112 -0
- package/templates/dataif/sql/staging/030_pnp_eficiencia_academica.sql +83 -0
- package/templates/dataif/sql/staging/040_pnp_servidores.sql +90 -0
- package/templates/dataif/sql/staging/050_pnp_financeiro.sql +72 -0
- package/templates/dataif/sql/views_curated/003_vw_pnp_microdados_admin.sql +160 -0
- package/templates/dataif/sql/views_curated/004_mv_pnp_dashboard_fast.sql +204 -0
- package/templates/dataif/sql/views_curated/010_vw_pnp_admin_ingestao.sql +51 -0
- package/templates/dataif/sql/views_curated/020_vw_pnp_qualidade_dados.sql +114 -0
- package/templates/dataif/sql/views_curated/030_vw_pnp_matriculas.sql +67 -0
- package/templates/dataif/sql/views_curated/040_vw_pnp_eficiencia.sql +33 -0
- package/templates/dataif/sql/views_curated/050_vw_pnp_servidores.sql +30 -0
- package/templates/dataif/sql/views_curated/060_vw_pnp_financeiro.sql +22 -0
- package/templates/dataif/sql/views_curated/070_vw_pnp_vanna.sql +115 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import Engine, text
|
|
7
|
+
from sqlalchemy.exc import SQLAlchemyError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
VANNA_LLM_SETTING_KEY = "vanna.llm_config"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class RuntimeVannaConfig:
|
|
15
|
+
provider: str
|
|
16
|
+
ollama_base_url: str
|
|
17
|
+
ollama_model: str
|
|
18
|
+
maritaca_api_url: str
|
|
19
|
+
maritaca_api_key: str
|
|
20
|
+
maritaca_model: str
|
|
21
|
+
maritaca_timeout_seconds: int
|
|
22
|
+
allowed_schema: str
|
|
23
|
+
vectorstore_path: str
|
|
24
|
+
auto_train: bool
|
|
25
|
+
max_rows: int
|
|
26
|
+
|
|
27
|
+
def signature(self) -> tuple[object, ...]:
|
|
28
|
+
return (
|
|
29
|
+
self.provider,
|
|
30
|
+
self.ollama_base_url,
|
|
31
|
+
self.ollama_model,
|
|
32
|
+
self.maritaca_api_url,
|
|
33
|
+
self.maritaca_api_key,
|
|
34
|
+
self.maritaca_model,
|
|
35
|
+
self.maritaca_timeout_seconds,
|
|
36
|
+
self.allowed_schema,
|
|
37
|
+
self.vectorstore_path,
|
|
38
|
+
self.auto_train,
|
|
39
|
+
self.max_rows,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def model_name(self) -> str:
|
|
43
|
+
if self.provider == "maritaca":
|
|
44
|
+
return self.maritaca_model
|
|
45
|
+
return self.ollama_model
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def load_runtime_vanna_config(base_settings: Any, engine: Engine) -> RuntimeVannaConfig:
|
|
49
|
+
defaults = {
|
|
50
|
+
"provider": str(base_settings.vanna_llm_provider).strip().lower() or "ollama",
|
|
51
|
+
"ollama_base_url": str(base_settings.vanna_ollama_base_url).strip() or "http://ollama:11434",
|
|
52
|
+
"ollama_model": str(base_settings.vanna_ollama_model).strip() or "sabia-7b",
|
|
53
|
+
"maritaca_api_url": str(base_settings.vanna_maritaca_api_url).strip(),
|
|
54
|
+
"maritaca_api_key": str(base_settings.vanna_maritaca_api_key),
|
|
55
|
+
"maritaca_model": str(base_settings.vanna_maritaca_model).strip() or "sabia-4",
|
|
56
|
+
"maritaca_timeout_seconds": int(base_settings.vanna_maritaca_timeout_seconds),
|
|
57
|
+
"allowed_schema": str(base_settings.effective_allowed_schema()).strip().lower() or "curated",
|
|
58
|
+
"vectorstore_path": str(base_settings.vanna_vectorstore_path).strip() or "/data/vanna/chroma",
|
|
59
|
+
"auto_train": bool(base_settings.vanna_auto_train),
|
|
60
|
+
"max_rows": int(base_settings.vanna_max_rows),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
persisted = _read_persisted_llm_settings(engine)
|
|
64
|
+
if isinstance(persisted, dict):
|
|
65
|
+
provider = str(persisted.get("provider") or defaults["provider"]).strip().lower() or "ollama"
|
|
66
|
+
ollama = persisted.get("ollama") if isinstance(persisted.get("ollama"), dict) else {}
|
|
67
|
+
maritaca = persisted.get("maritaca") if isinstance(persisted.get("maritaca"), dict) else {}
|
|
68
|
+
defaults.update(
|
|
69
|
+
{
|
|
70
|
+
"provider": provider,
|
|
71
|
+
"ollama_base_url": str(ollama.get("base_url") or defaults["ollama_base_url"]).strip()
|
|
72
|
+
or defaults["ollama_base_url"],
|
|
73
|
+
"ollama_model": str(ollama.get("model") or defaults["ollama_model"]).strip() or defaults["ollama_model"],
|
|
74
|
+
"maritaca_api_url": str(maritaca.get("api_url") or defaults["maritaca_api_url"]).strip()
|
|
75
|
+
or defaults["maritaca_api_url"],
|
|
76
|
+
"maritaca_api_key": str(maritaca.get("api_key") or defaults["maritaca_api_key"]),
|
|
77
|
+
"maritaca_model": str(maritaca.get("model") or defaults["maritaca_model"]).strip()
|
|
78
|
+
or defaults["maritaca_model"],
|
|
79
|
+
"maritaca_timeout_seconds": _coerce_positive_int(
|
|
80
|
+
maritaca.get("timeout_seconds"), defaults["maritaca_timeout_seconds"]
|
|
81
|
+
),
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return RuntimeVannaConfig(**defaults)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _read_persisted_llm_settings(engine: Engine) -> dict[str, Any] | None:
|
|
89
|
+
try:
|
|
90
|
+
with engine.begin() as conn:
|
|
91
|
+
row = conn.execute(
|
|
92
|
+
text("SELECT setting_value FROM config.app_settings WHERE setting_key = :setting_key"),
|
|
93
|
+
{"setting_key": VANNA_LLM_SETTING_KEY},
|
|
94
|
+
).mappings().first()
|
|
95
|
+
except SQLAlchemyError:
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
if not row:
|
|
99
|
+
return None
|
|
100
|
+
value = row.get("setting_value")
|
|
101
|
+
return value if isinstance(value, dict) else None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _coerce_positive_int(value: object, default: int) -> int:
|
|
105
|
+
if isinstance(value, bool):
|
|
106
|
+
return int(value) or default
|
|
107
|
+
if isinstance(value, int):
|
|
108
|
+
return value if value > 0 else default
|
|
109
|
+
if isinstance(value, str):
|
|
110
|
+
normalized = value.strip()
|
|
111
|
+
if normalized.isdigit():
|
|
112
|
+
parsed = int(normalized)
|
|
113
|
+
return parsed if parsed > 0 else default
|
|
114
|
+
return default
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SQLGuard:
|
|
7
|
+
FORBIDDEN_SCHEMAS = {
|
|
8
|
+
"audit",
|
|
9
|
+
"config",
|
|
10
|
+
"information_schema",
|
|
11
|
+
"mart",
|
|
12
|
+
"pg_catalog",
|
|
13
|
+
"public",
|
|
14
|
+
"raw",
|
|
15
|
+
"staging",
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
RELATION_PATTERN = re.compile(
|
|
19
|
+
r"\b(?:from|join)\s+("
|
|
20
|
+
r"(?:\"[^\"]+\"|[a-zA-Z_][a-zA-Z0-9_]*)"
|
|
21
|
+
r"(?:\s*\.\s*(?:\"[^\"]+\"|[a-zA-Z_][a-zA-Z0-9_]*))?"
|
|
22
|
+
r")",
|
|
23
|
+
flags=re.IGNORECASE,
|
|
24
|
+
)
|
|
25
|
+
FROM_CLAUSE_PATTERN = re.compile(
|
|
26
|
+
r"\bfrom\b\s+(.*?)(?="
|
|
27
|
+
r"\bwhere\b|\bgroup\s+by\b|\border\s+by\b|\blimit\b|\boffset\b|"
|
|
28
|
+
r"\bunion\b|\bexcept\b|\bintersect\b|\bhaving\b|$"
|
|
29
|
+
r")",
|
|
30
|
+
flags=re.IGNORECASE,
|
|
31
|
+
)
|
|
32
|
+
COMMA_RELATION_PATTERN = re.compile(
|
|
33
|
+
r",\s*("
|
|
34
|
+
r"(?:\"[^\"]+\"|[a-zA-Z_][a-zA-Z0-9_]*)"
|
|
35
|
+
r"(?:\s*\.\s*(?:\"[^\"]+\"|[a-zA-Z_][a-zA-Z0-9_]*))?"
|
|
36
|
+
r")",
|
|
37
|
+
flags=re.IGNORECASE,
|
|
38
|
+
)
|
|
39
|
+
QUALIFIED_SCHEMA_PATTERN = re.compile(
|
|
40
|
+
r"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\.\s*[a-zA-Z_][a-zA-Z0-9_]*\s*\.",
|
|
41
|
+
flags=re.IGNORECASE,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def __init__(self, allowed_schemas: set[str] | list[str] | tuple[str, ...] | None = None) -> None:
|
|
45
|
+
schemas = allowed_schemas or {"curated"}
|
|
46
|
+
self.allowed_schemas = {schema.lower().strip() for schema in schemas if schema.strip()}
|
|
47
|
+
|
|
48
|
+
def validate(self, sql: str) -> None:
|
|
49
|
+
stripped = self._normalize(sql)
|
|
50
|
+
if not stripped.startswith("select"):
|
|
51
|
+
raise ValueError("Only SELECT statements are allowed")
|
|
52
|
+
if ";" in stripped.rstrip(";"):
|
|
53
|
+
raise ValueError("Only one SQL statement is allowed")
|
|
54
|
+
|
|
55
|
+
forbidden_patterns = [
|
|
56
|
+
r"\binsert\b",
|
|
57
|
+
r"\bupdate\b",
|
|
58
|
+
r"\bdelete\b",
|
|
59
|
+
r"\bdrop\b",
|
|
60
|
+
r"\balter\b",
|
|
61
|
+
r"\btruncate\b",
|
|
62
|
+
r"\bcreate\b",
|
|
63
|
+
r"\bgrant\b",
|
|
64
|
+
r"\brevoke\b",
|
|
65
|
+
]
|
|
66
|
+
for pattern in forbidden_patterns:
|
|
67
|
+
if re.search(pattern, stripped):
|
|
68
|
+
raise ValueError("Forbidden SQL keyword")
|
|
69
|
+
|
|
70
|
+
sql_without_literals = self._strip_string_literals(stripped)
|
|
71
|
+
schema_refs = set(re.findall(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\.", sql_without_literals))
|
|
72
|
+
blocked_schemas = schema_refs.intersection(self.FORBIDDEN_SCHEMAS)
|
|
73
|
+
if blocked_schemas:
|
|
74
|
+
raise ValueError(f"Schema not allowed: {sorted(blocked_schemas)[0]}")
|
|
75
|
+
|
|
76
|
+
for schema in self.QUALIFIED_SCHEMA_PATTERN.findall(sql_without_literals):
|
|
77
|
+
if schema.lower() not in self.allowed_schemas:
|
|
78
|
+
raise ValueError(f"Schema not allowed: {schema.lower()}")
|
|
79
|
+
|
|
80
|
+
matched_relations = self._relation_references(sql_without_literals)
|
|
81
|
+
if not matched_relations:
|
|
82
|
+
raise ValueError("SQL must reference at least one allowed relation")
|
|
83
|
+
|
|
84
|
+
for relation in matched_relations:
|
|
85
|
+
relation_clean = self._clean_identifier(relation)
|
|
86
|
+
parts = [part for part in relation_clean.split(".") if part]
|
|
87
|
+
if len(parts) != 2:
|
|
88
|
+
raise ValueError(f"Relation must be schema-qualified: {relation_clean}")
|
|
89
|
+
schema = parts[0]
|
|
90
|
+
if schema not in self.allowed_schemas:
|
|
91
|
+
raise ValueError(f"Schema not allowed: {schema}")
|
|
92
|
+
|
|
93
|
+
def enforce_limit(self, sql: str, max_rows: int) -> str:
|
|
94
|
+
self.validate(sql)
|
|
95
|
+
stripped = self._compact_original(sql).rstrip(";")
|
|
96
|
+
if re.search(r"\blimit\s+\d+\b", stripped, flags=re.IGNORECASE):
|
|
97
|
+
return stripped
|
|
98
|
+
return f"{stripped} LIMIT {max_rows}"
|
|
99
|
+
|
|
100
|
+
@staticmethod
|
|
101
|
+
def _normalize(sql: str) -> str:
|
|
102
|
+
return SQLGuard._compact_original(sql).lower()
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def _compact_original(sql: str) -> str:
|
|
106
|
+
without_block_comments = re.sub(r"/\*.*?\*/", " ", sql, flags=re.DOTALL)
|
|
107
|
+
without_line_comments = re.sub(r"--.*?$", " ", without_block_comments, flags=re.MULTILINE)
|
|
108
|
+
return re.sub(r"\s+", " ", without_line_comments).strip()
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def _relation_references(cls, sql: str) -> list[str]:
|
|
112
|
+
relations = [match.group(1) for match in cls.RELATION_PATTERN.finditer(sql)]
|
|
113
|
+
for clause_match in cls.FROM_CLAUSE_PATTERN.finditer(sql):
|
|
114
|
+
relations.extend(match.group(1) for match in cls.COMMA_RELATION_PATTERN.finditer(clause_match.group(1)))
|
|
115
|
+
return relations
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def _clean_identifier(identifier: str) -> str:
|
|
119
|
+
return re.sub(r'\s+', "", identifier.strip().rstrip(";")).replace('"', "").lower()
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
def _strip_string_literals(sql: str) -> str:
|
|
123
|
+
return re.sub(r"'(?:''|[^'])*'", "''", sql)
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass, replace
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from threading import Lock
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
from urllib.error import HTTPError, URLError
|
|
10
|
+
from urllib.request import Request, urlopen
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from .config import Settings
|
|
14
|
+
from sqlalchemy import Engine
|
|
15
|
+
|
|
16
|
+
from .runtime_config import RuntimeVannaConfig, load_runtime_vanna_config
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class CuratedRelation:
|
|
21
|
+
schema_name: str
|
|
22
|
+
relation_name: str
|
|
23
|
+
relation_type: str
|
|
24
|
+
columns: tuple[tuple[str, str], ...]
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def full_name(self) -> str:
|
|
28
|
+
return f"{self.schema_name}.{self.relation_name}"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MaritacaChat:
|
|
32
|
+
def __init__(self, config: dict[str, Any] | None = None):
|
|
33
|
+
self.config = config or {}
|
|
34
|
+
|
|
35
|
+
def system_message(self, message: str) -> dict[str, str]:
|
|
36
|
+
return {"role": "system", "content": message}
|
|
37
|
+
|
|
38
|
+
def user_message(self, message: str) -> dict[str, str]:
|
|
39
|
+
return {"role": "user", "content": message}
|
|
40
|
+
|
|
41
|
+
def assistant_message(self, message: str) -> dict[str, str]:
|
|
42
|
+
return {"role": "assistant", "content": message}
|
|
43
|
+
|
|
44
|
+
def submit_prompt(self, prompt: Any, **_: Any) -> str:
|
|
45
|
+
api_key = str(self.config.get("maritaca_api_key") or "").strip()
|
|
46
|
+
if not api_key:
|
|
47
|
+
raise RuntimeError("VANNA_MARITACA_API_KEY is required for VANNA_LLM_PROVIDER=maritaca")
|
|
48
|
+
|
|
49
|
+
api_url = str(
|
|
50
|
+
self.config.get("maritaca_api_url") or "https://chat.maritaca.ai/api/chat/completions"
|
|
51
|
+
).strip()
|
|
52
|
+
model = str(self.config.get("model") or "sabia-4").strip()
|
|
53
|
+
timeout = int(self.config.get("maritaca_timeout_seconds") or 60)
|
|
54
|
+
payload = json.dumps({"model": model, "messages": self._messages(prompt)}).encode("utf-8")
|
|
55
|
+
request = Request(
|
|
56
|
+
api_url,
|
|
57
|
+
data=payload,
|
|
58
|
+
headers={
|
|
59
|
+
"Authorization": f"Bearer {api_key}",
|
|
60
|
+
"Content-Type": "application/json",
|
|
61
|
+
"Accept": "application/json",
|
|
62
|
+
},
|
|
63
|
+
method="POST",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
with urlopen(request, timeout=timeout) as response:
|
|
68
|
+
raw = response.read()
|
|
69
|
+
except HTTPError as exc:
|
|
70
|
+
detail = exc.read().decode("utf-8", errors="replace")
|
|
71
|
+
raise RuntimeError(f"Maritaca API returned HTTP {exc.code}: {detail}") from exc
|
|
72
|
+
except (OSError, TimeoutError, URLError) as exc:
|
|
73
|
+
raise RuntimeError(f"Maritaca API request failed: {exc}") from exc
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
data = json.loads(raw.decode("utf-8"))
|
|
77
|
+
return data["choices"][0]["message"]["content"]
|
|
78
|
+
except (KeyError, IndexError, TypeError, json.JSONDecodeError) as exc:
|
|
79
|
+
raise RuntimeError("Maritaca API returned an invalid response") from exc
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _messages(prompt: Any) -> list[dict[str, str]]:
|
|
83
|
+
if isinstance(prompt, list):
|
|
84
|
+
messages: list[dict[str, str]] = []
|
|
85
|
+
for item in prompt:
|
|
86
|
+
if isinstance(item, dict) and "role" in item and "content" in item:
|
|
87
|
+
messages.append({"role": str(item["role"]), "content": str(item["content"])})
|
|
88
|
+
else:
|
|
89
|
+
messages.append({"role": "user", "content": str(item)})
|
|
90
|
+
return messages or [{"role": "user", "content": ""}]
|
|
91
|
+
return [{"role": "user", "content": str(prompt)}]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class DataifVannaEngine:
|
|
95
|
+
def __init__(self, settings: Settings, engine: Engine, allowed_schema: str) -> None:
|
|
96
|
+
self.settings = settings
|
|
97
|
+
self.engine = engine
|
|
98
|
+
self.allowed_schema = allowed_schema.strip().lower()
|
|
99
|
+
self._lock = Lock()
|
|
100
|
+
self._trained = False
|
|
101
|
+
self.vn: Any | None = None
|
|
102
|
+
self._vanna_class: type | None = None
|
|
103
|
+
self._runtime_config: RuntimeVannaConfig | None = None
|
|
104
|
+
self._config_signature: tuple[object, ...] | None = None
|
|
105
|
+
Path(settings.vanna_vectorstore_path).mkdir(parents=True, exist_ok=True)
|
|
106
|
+
self._ensure_runtime_config()
|
|
107
|
+
|
|
108
|
+
def _client(self) -> Any:
|
|
109
|
+
self._ensure_runtime_config()
|
|
110
|
+
if self.vn is not None:
|
|
111
|
+
return self.vn
|
|
112
|
+
if self._vanna_class is None or self._runtime_config is None:
|
|
113
|
+
raise RuntimeError("Vanna runtime configuration is unavailable")
|
|
114
|
+
self.vn = self._vanna_class(config=self._provider_config(self._runtime_config))
|
|
115
|
+
self.vn.run_sql = self._run_sql_dataframe
|
|
116
|
+
self.vn.run_sql_is_set = True
|
|
117
|
+
return self.vn
|
|
118
|
+
|
|
119
|
+
def generate_sql(self, question: str, runtime_override: dict[str, Any] | None = None) -> str:
|
|
120
|
+
runtime = self._runtime_from_override(runtime_override)
|
|
121
|
+
if not self._is_runtime_available(runtime):
|
|
122
|
+
raise RuntimeError(self._unavailable_message(runtime))
|
|
123
|
+
if runtime.auto_train:
|
|
124
|
+
self.train_once()
|
|
125
|
+
client = self._client() if runtime.signature() == self.runtime_config().signature() else self._client_for_runtime(runtime)
|
|
126
|
+
return client.generate_sql(question=question, allow_llm_to_see_data=False)
|
|
127
|
+
|
|
128
|
+
def is_llm_available(self) -> bool:
|
|
129
|
+
return bool(self.provider_status()["available"])
|
|
130
|
+
|
|
131
|
+
def provider_status(self) -> dict[str, object]:
|
|
132
|
+
runtime = self.runtime_config()
|
|
133
|
+
return self._provider_status(runtime)
|
|
134
|
+
|
|
135
|
+
def _provider_status(self, runtime: RuntimeVannaConfig) -> dict[str, object]:
|
|
136
|
+
provider = runtime.provider
|
|
137
|
+
if provider == "maritaca":
|
|
138
|
+
if runtime.maritaca_api_key.strip():
|
|
139
|
+
return {"available": True, "detail": "Maritaca API key configured"}
|
|
140
|
+
return {"available": False, "detail": "Maritaca API key is not configured"}
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
with urlopen(f"{runtime.ollama_base_url.rstrip('/')}/api/tags", timeout=2) as response:
|
|
144
|
+
available = 200 <= response.status < 500
|
|
145
|
+
return {
|
|
146
|
+
"available": available,
|
|
147
|
+
"detail": f"Ollama responded at {runtime.ollama_base_url}" if available else "Ollama returned an error",
|
|
148
|
+
}
|
|
149
|
+
except (OSError, TimeoutError, URLError) as exc:
|
|
150
|
+
return {"available": False, "detail": f"Ollama is not reachable at {runtime.ollama_base_url}: {exc}"}
|
|
151
|
+
|
|
152
|
+
def _is_runtime_available(self, runtime: RuntimeVannaConfig) -> bool:
|
|
153
|
+
return bool(self._provider_status(runtime)["available"])
|
|
154
|
+
|
|
155
|
+
def train_once(self, force: bool = False) -> None:
|
|
156
|
+
with self._lock:
|
|
157
|
+
self._ensure_runtime_config()
|
|
158
|
+
if self._trained and not force:
|
|
159
|
+
return
|
|
160
|
+
vn = self._client()
|
|
161
|
+
for relation in self._load_allowed_relations():
|
|
162
|
+
vn.train(ddl=self._build_ddl(relation))
|
|
163
|
+
for item in self._load_catalog_documentation():
|
|
164
|
+
vn.train(documentation=item)
|
|
165
|
+
for query in self._approved_examples():
|
|
166
|
+
vn.train(sql=query)
|
|
167
|
+
self._trained = True
|
|
168
|
+
|
|
169
|
+
def _run_sql_dataframe(self, sql: str) -> Any:
|
|
170
|
+
import pandas as pd
|
|
171
|
+
from sqlalchemy import text
|
|
172
|
+
|
|
173
|
+
with self.engine.begin() as conn:
|
|
174
|
+
return pd.read_sql_query(sql=text(sql), con=conn)
|
|
175
|
+
|
|
176
|
+
def _load_allowed_relations(self) -> list[CuratedRelation]:
|
|
177
|
+
from sqlalchemy import text
|
|
178
|
+
|
|
179
|
+
with self.engine.begin() as conn:
|
|
180
|
+
rows = conn.execute(
|
|
181
|
+
text(
|
|
182
|
+
"""
|
|
183
|
+
SELECT
|
|
184
|
+
c.table_schema,
|
|
185
|
+
c.table_name,
|
|
186
|
+
c.column_name,
|
|
187
|
+
c.data_type,
|
|
188
|
+
COALESCE(
|
|
189
|
+
t.table_type,
|
|
190
|
+
CASE WHEN mv.matviewname IS NOT NULL THEN 'MATERIALIZED VIEW' ELSE 'RELATION' END
|
|
191
|
+
) AS relation_type
|
|
192
|
+
FROM information_schema.columns c
|
|
193
|
+
LEFT JOIN information_schema.tables t
|
|
194
|
+
ON t.table_schema = c.table_schema
|
|
195
|
+
AND t.table_name = c.table_name
|
|
196
|
+
LEFT JOIN pg_catalog.pg_matviews mv
|
|
197
|
+
ON mv.schemaname = c.table_schema
|
|
198
|
+
AND mv.matviewname = c.table_name
|
|
199
|
+
WHERE c.table_schema = :allowed_schema
|
|
200
|
+
ORDER BY table_schema, table_name, ordinal_position
|
|
201
|
+
"""
|
|
202
|
+
),
|
|
203
|
+
{"allowed_schema": self.allowed_schema},
|
|
204
|
+
).mappings()
|
|
205
|
+
grouped: dict[tuple[str, str, str], list[tuple[str, str]]] = {}
|
|
206
|
+
for row in rows:
|
|
207
|
+
key = (row["table_schema"], row["table_name"], row["relation_type"])
|
|
208
|
+
grouped.setdefault(key, []).append((row["column_name"], row["data_type"]))
|
|
209
|
+
|
|
210
|
+
return [
|
|
211
|
+
CuratedRelation(
|
|
212
|
+
schema_name=schema,
|
|
213
|
+
relation_name=name,
|
|
214
|
+
relation_type=relation_type,
|
|
215
|
+
columns=tuple(columns),
|
|
216
|
+
)
|
|
217
|
+
for (schema, name, relation_type), columns in grouped.items()
|
|
218
|
+
]
|
|
219
|
+
|
|
220
|
+
def _load_catalog_documentation(self) -> list[str]:
|
|
221
|
+
from sqlalchemy import text
|
|
222
|
+
|
|
223
|
+
catalog_name = f"{self.allowed_schema}.vw_pnp_vanna_catalogo"
|
|
224
|
+
schema_sql = self._quote_identifier(self.allowed_schema)
|
|
225
|
+
with self.engine.begin() as conn:
|
|
226
|
+
exists = conn.execute(
|
|
227
|
+
text("SELECT to_regclass(:catalog_name)"),
|
|
228
|
+
{"catalog_name": catalog_name},
|
|
229
|
+
).scalar()
|
|
230
|
+
if not exists:
|
|
231
|
+
return []
|
|
232
|
+
|
|
233
|
+
rows = conn.execute(
|
|
234
|
+
text(
|
|
235
|
+
f"""
|
|
236
|
+
SELECT relation_group, relation_name, relation_description
|
|
237
|
+
FROM {schema_sql}.vw_pnp_vanna_catalogo
|
|
238
|
+
ORDER BY relation_group, relation_name
|
|
239
|
+
"""
|
|
240
|
+
)
|
|
241
|
+
).mappings()
|
|
242
|
+
return [
|
|
243
|
+
(
|
|
244
|
+
f"Relacao {self.allowed_schema}.{row['relation_name']} pertence ao dominio "
|
|
245
|
+
f"{row['relation_group']}: {row['relation_description']}"
|
|
246
|
+
)
|
|
247
|
+
for row in rows
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
def _build_vanna_class(self, provider: str | None = None) -> type:
|
|
251
|
+
from vanna.chromadb import ChromaDB_VectorStore
|
|
252
|
+
|
|
253
|
+
provider = provider or self.runtime_config().provider
|
|
254
|
+
if provider == "ollama":
|
|
255
|
+
from vanna.ollama import Ollama
|
|
256
|
+
|
|
257
|
+
class OllamaVanna(ChromaDB_VectorStore, Ollama):
|
|
258
|
+
def __init__(self, config=None):
|
|
259
|
+
ChromaDB_VectorStore.__init__(self, config=config)
|
|
260
|
+
Ollama.__init__(self, config=config)
|
|
261
|
+
|
|
262
|
+
return OllamaVanna
|
|
263
|
+
|
|
264
|
+
if provider == "maritaca":
|
|
265
|
+
class MaritacaVanna(MaritacaChat, ChromaDB_VectorStore):
|
|
266
|
+
def __init__(self, config=None):
|
|
267
|
+
ChromaDB_VectorStore.__init__(self, config=config)
|
|
268
|
+
MaritacaChat.__init__(self, config=config)
|
|
269
|
+
|
|
270
|
+
return MaritacaVanna
|
|
271
|
+
|
|
272
|
+
raise RuntimeError(f"Unsupported VANNA_LLM_PROVIDER: {self.settings.vanna_llm_provider}")
|
|
273
|
+
|
|
274
|
+
def _provider_config(self, runtime: RuntimeVannaConfig | None = None) -> dict[str, Any]:
|
|
275
|
+
runtime = runtime or self.runtime_config()
|
|
276
|
+
config: dict[str, Any] = {
|
|
277
|
+
"model": runtime.model_name(),
|
|
278
|
+
"path": runtime.vectorstore_path,
|
|
279
|
+
}
|
|
280
|
+
provider = runtime.provider
|
|
281
|
+
if provider == "ollama":
|
|
282
|
+
config["ollama_host"] = runtime.ollama_base_url
|
|
283
|
+
elif provider == "maritaca":
|
|
284
|
+
config["maritaca_api_url"] = runtime.maritaca_api_url
|
|
285
|
+
config["maritaca_api_key"] = runtime.maritaca_api_key
|
|
286
|
+
config["maritaca_timeout_seconds"] = runtime.maritaca_timeout_seconds
|
|
287
|
+
return config
|
|
288
|
+
|
|
289
|
+
def _client_for_runtime(self, runtime: RuntimeVannaConfig) -> Any:
|
|
290
|
+
client_class = self._build_vanna_class(runtime.provider)
|
|
291
|
+
client = client_class(config=self._provider_config(runtime))
|
|
292
|
+
client.run_sql = self._run_sql_dataframe
|
|
293
|
+
client.run_sql_is_set = True
|
|
294
|
+
return client
|
|
295
|
+
|
|
296
|
+
def _runtime_from_override(self, override: dict[str, Any] | None) -> RuntimeVannaConfig:
|
|
297
|
+
runtime = self.runtime_config()
|
|
298
|
+
if not override:
|
|
299
|
+
return runtime
|
|
300
|
+
|
|
301
|
+
provider = str(override.get("provider") or runtime.provider).strip().lower() or runtime.provider
|
|
302
|
+
ollama = override.get("ollama") if isinstance(override.get("ollama"), dict) else {}
|
|
303
|
+
maritaca = override.get("maritaca") if isinstance(override.get("maritaca"), dict) else {}
|
|
304
|
+
return replace(
|
|
305
|
+
runtime,
|
|
306
|
+
provider=provider,
|
|
307
|
+
ollama_base_url=str(ollama.get("base_url") or runtime.ollama_base_url).strip() or runtime.ollama_base_url,
|
|
308
|
+
ollama_model=str(ollama.get("model") or runtime.ollama_model).strip() or runtime.ollama_model,
|
|
309
|
+
maritaca_api_url=str(maritaca.get("api_url") or runtime.maritaca_api_url).strip() or runtime.maritaca_api_url,
|
|
310
|
+
maritaca_api_key=str(maritaca.get("api_key") or runtime.maritaca_api_key),
|
|
311
|
+
maritaca_model=str(maritaca.get("model") or runtime.maritaca_model).strip() or runtime.maritaca_model,
|
|
312
|
+
maritaca_timeout_seconds=_coerce_positive_int(
|
|
313
|
+
maritaca.get("timeout_seconds"),
|
|
314
|
+
runtime.maritaca_timeout_seconds,
|
|
315
|
+
),
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
def _unavailable_message(self, runtime: RuntimeVannaConfig | None = None) -> str:
|
|
319
|
+
runtime = runtime or self.runtime_config()
|
|
320
|
+
provider = runtime.provider
|
|
321
|
+
if provider == "maritaca":
|
|
322
|
+
return "Maritaca API key is not configured"
|
|
323
|
+
return f"Ollama is not reachable at {runtime.ollama_base_url}"
|
|
324
|
+
|
|
325
|
+
def runtime_config(self) -> RuntimeVannaConfig:
|
|
326
|
+
self._ensure_runtime_config()
|
|
327
|
+
if self._runtime_config is None:
|
|
328
|
+
raise RuntimeError("Vanna runtime configuration is unavailable")
|
|
329
|
+
return self._runtime_config
|
|
330
|
+
|
|
331
|
+
def _ensure_runtime_config(self) -> None:
|
|
332
|
+
runtime = load_runtime_vanna_config(self.settings, self.engine)
|
|
333
|
+
signature = runtime.signature()
|
|
334
|
+
if self._config_signature == signature:
|
|
335
|
+
return
|
|
336
|
+
|
|
337
|
+
Path(runtime.vectorstore_path).mkdir(parents=True, exist_ok=True)
|
|
338
|
+
self._runtime_config = runtime
|
|
339
|
+
self._config_signature = signature
|
|
340
|
+
self._vanna_class = self._build_vanna_class(runtime.provider)
|
|
341
|
+
self.vn = None
|
|
342
|
+
self._trained = False
|
|
343
|
+
|
|
344
|
+
@staticmethod
|
|
345
|
+
def _quote_identifier(identifier: str) -> str:
|
|
346
|
+
if not re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", identifier):
|
|
347
|
+
raise ValueError(f"Invalid SQL identifier: {identifier}")
|
|
348
|
+
return f'"{identifier}"'
|
|
349
|
+
|
|
350
|
+
@staticmethod
|
|
351
|
+
def _build_ddl(relation: CuratedRelation) -> str:
|
|
352
|
+
columns = ",\n ".join(f"{name} {data_type}" for name, data_type in relation.columns)
|
|
353
|
+
return f"-- RELATION TYPE: {relation.relation_type}\nCREATE TABLE {relation.full_name} (\n {columns}\n);"
|
|
354
|
+
|
|
355
|
+
def _approved_examples(self) -> list[str]:
|
|
356
|
+
if self.allowed_schema != "curated":
|
|
357
|
+
return []
|
|
358
|
+
return [
|
|
359
|
+
(
|
|
360
|
+
"SELECT dominio, indicador, ano, SUM(valor) AS total "
|
|
361
|
+
"FROM curated.vw_pnp_vanna_resumo "
|
|
362
|
+
"GROUP BY dominio, indicador, ano "
|
|
363
|
+
"ORDER BY ano DESC, dominio, indicador LIMIT 50"
|
|
364
|
+
),
|
|
365
|
+
(
|
|
366
|
+
"SELECT relation_group, relation_name, relation_description "
|
|
367
|
+
"FROM curated.vw_pnp_vanna_catalogo "
|
|
368
|
+
"ORDER BY relation_group, relation_name LIMIT 50"
|
|
369
|
+
),
|
|
370
|
+
(
|
|
371
|
+
"SELECT instituicao, ano, SUM(valor) AS total_matriculas "
|
|
372
|
+
"FROM curated.vw_pnp_vanna_resumo "
|
|
373
|
+
"WHERE dominio = 'matriculas' "
|
|
374
|
+
"GROUP BY instituicao, ano "
|
|
375
|
+
"ORDER BY ano DESC, total_matriculas DESC LIMIT 50"
|
|
376
|
+
),
|
|
377
|
+
(
|
|
378
|
+
"SELECT ano, SUM(matriculas) AS total_matriculas "
|
|
379
|
+
"FROM curated.mv_pnp_dashboard_matriculas "
|
|
380
|
+
"GROUP BY ano ORDER BY ano DESC LIMIT 50"
|
|
381
|
+
),
|
|
382
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
FROM node:20-alpine AS builder
|
|
2
|
+
|
|
3
|
+
ARG VITE_API_BASE_URL=
|
|
4
|
+
ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
|
|
5
|
+
|
|
6
|
+
WORKDIR /app
|
|
7
|
+
COPY package.json package-lock.json* ./
|
|
8
|
+
RUN npm install
|
|
9
|
+
COPY . .
|
|
10
|
+
RUN npm run build
|
|
11
|
+
|
|
12
|
+
FROM nginx:1.27-alpine
|
|
13
|
+
COPY nginx.conf /etc/nginx/nginx.conf
|
|
14
|
+
COPY --from=builder /app/dist /usr/share/nginx/html
|
|
15
|
+
EXPOSE 80
|
|
16
|
+
CMD ["nginx", "-g", "daemon off;"]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
<!doctype html>
|
|
2
|
+
<html lang="pt-BR">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8" />
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
|
+
<title>dataif</title>
|
|
7
|
+
</head>
|
|
8
|
+
<body>
|
|
9
|
+
<div id="root"></div>
|
|
10
|
+
<script type="module" src="/src/main.jsx"></script>
|
|
11
|
+
</body>
|
|
12
|
+
</html>
|