nao-core 0.0.38__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nao_core/__init__.py +2 -0
- nao_core/__init__.py.bak +2 -0
- nao_core/bin/build-info.json +5 -0
- nao_core/bin/fastapi/main.py +268 -0
- nao_core/bin/fastapi/test_main.py +156 -0
- nao_core/bin/migrations-postgres/0000_user_auth_and_chat_tables.sql +98 -0
- nao_core/bin/migrations-postgres/0001_message_feedback.sql +9 -0
- nao_core/bin/migrations-postgres/0002_chat_message_stop_reason_and_error_message.sql +2 -0
- nao_core/bin/migrations-postgres/0003_handle_slack_with_thread.sql +2 -0
- nao_core/bin/migrations-postgres/0004_input_and_output_tokens.sql +8 -0
- nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
- nao_core/bin/migrations-postgres/0006_llm_model_ids.sql +4 -0
- nao_core/bin/migrations-postgres/0007_chat_message_llm_info.sql +2 -0
- nao_core/bin/migrations-postgres/meta/0000_snapshot.json +707 -0
- nao_core/bin/migrations-postgres/meta/0001_snapshot.json +766 -0
- nao_core/bin/migrations-postgres/meta/0002_snapshot.json +778 -0
- nao_core/bin/migrations-postgres/meta/0003_snapshot.json +799 -0
- nao_core/bin/migrations-postgres/meta/0004_snapshot.json +847 -0
- nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
- nao_core/bin/migrations-postgres/meta/0006_snapshot.json +1141 -0
- nao_core/bin/migrations-postgres/meta/_journal.json +62 -0
- nao_core/bin/migrations-sqlite/0000_user_auth_and_chat_tables.sql +98 -0
- nao_core/bin/migrations-sqlite/0001_message_feedback.sql +8 -0
- nao_core/bin/migrations-sqlite/0002_chat_message_stop_reason_and_error_message.sql +2 -0
- nao_core/bin/migrations-sqlite/0003_handle_slack_with_thread.sql +2 -0
- nao_core/bin/migrations-sqlite/0004_input_and_output_tokens.sql +8 -0
- nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
- nao_core/bin/migrations-sqlite/0006_llm_model_ids.sql +4 -0
- nao_core/bin/migrations-sqlite/0007_chat_message_llm_info.sql +2 -0
- nao_core/bin/migrations-sqlite/meta/0000_snapshot.json +674 -0
- nao_core/bin/migrations-sqlite/meta/0001_snapshot.json +735 -0
- nao_core/bin/migrations-sqlite/meta/0002_snapshot.json +749 -0
- nao_core/bin/migrations-sqlite/meta/0003_snapshot.json +763 -0
- nao_core/bin/migrations-sqlite/meta/0004_snapshot.json +819 -0
- nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
- nao_core/bin/migrations-sqlite/meta/0006_snapshot.json +1100 -0
- nao_core/bin/migrations-sqlite/meta/_journal.json +62 -0
- nao_core/bin/nao-chat-server +0 -0
- nao_core/bin/public/assets/code-block-F6WJLWQG-CV0uOmNJ.js +153 -0
- nao_core/bin/public/assets/index-DcbndLHo.css +1 -0
- nao_core/bin/public/assets/index-t1hZI3nl.js +560 -0
- nao_core/bin/public/favicon.ico +0 -0
- nao_core/bin/public/index.html +18 -0
- nao_core/bin/rg +0 -0
- nao_core/commands/__init__.py +6 -0
- nao_core/commands/chat.py +225 -0
- nao_core/commands/debug.py +158 -0
- nao_core/commands/init.py +358 -0
- nao_core/commands/sync/__init__.py +124 -0
- nao_core/commands/sync/accessors.py +290 -0
- nao_core/commands/sync/cleanup.py +156 -0
- nao_core/commands/sync/providers/__init__.py +32 -0
- nao_core/commands/sync/providers/base.py +113 -0
- nao_core/commands/sync/providers/databases/__init__.py +17 -0
- nao_core/commands/sync/providers/databases/bigquery.py +79 -0
- nao_core/commands/sync/providers/databases/databricks.py +79 -0
- nao_core/commands/sync/providers/databases/duckdb.py +78 -0
- nao_core/commands/sync/providers/databases/postgres.py +79 -0
- nao_core/commands/sync/providers/databases/provider.py +129 -0
- nao_core/commands/sync/providers/databases/snowflake.py +79 -0
- nao_core/commands/sync/providers/notion/__init__.py +5 -0
- nao_core/commands/sync/providers/notion/provider.py +205 -0
- nao_core/commands/sync/providers/repositories/__init__.py +5 -0
- nao_core/commands/sync/providers/repositories/provider.py +134 -0
- nao_core/commands/sync/registry.py +23 -0
- nao_core/config/__init__.py +30 -0
- nao_core/config/base.py +100 -0
- nao_core/config/databases/__init__.py +55 -0
- nao_core/config/databases/base.py +85 -0
- nao_core/config/databases/bigquery.py +99 -0
- nao_core/config/databases/databricks.py +79 -0
- nao_core/config/databases/duckdb.py +41 -0
- nao_core/config/databases/postgres.py +83 -0
- nao_core/config/databases/snowflake.py +125 -0
- nao_core/config/exceptions.py +7 -0
- nao_core/config/llm/__init__.py +19 -0
- nao_core/config/notion/__init__.py +8 -0
- nao_core/config/repos/__init__.py +3 -0
- nao_core/config/repos/base.py +11 -0
- nao_core/config/slack/__init__.py +12 -0
- nao_core/context/__init__.py +54 -0
- nao_core/context/base.py +57 -0
- nao_core/context/git.py +177 -0
- nao_core/context/local.py +59 -0
- nao_core/main.py +13 -0
- nao_core/templates/__init__.py +41 -0
- nao_core/templates/context.py +193 -0
- nao_core/templates/defaults/databases/columns.md.j2 +23 -0
- nao_core/templates/defaults/databases/description.md.j2 +32 -0
- nao_core/templates/defaults/databases/preview.md.j2 +22 -0
- nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
- nao_core/templates/engine.py +133 -0
- nao_core/templates/render.py +196 -0
- nao_core-0.0.38.dist-info/METADATA +150 -0
- nao_core-0.0.38.dist-info/RECORD +98 -0
- nao_core-0.0.38.dist-info/WHEEL +4 -0
- nao_core-0.0.38.dist-info/entry_points.txt +2 -0
- nao_core-0.0.38.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Repository sync provider implementation."""
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
from nao_core.commands.sync.cleanup import cleanup_stale_repos
|
|
10
|
+
from nao_core.config import NaoConfig
|
|
11
|
+
from nao_core.config.repos import RepoConfig
|
|
12
|
+
|
|
13
|
+
from ..base import SyncProvider, SyncResult
|
|
14
|
+
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def clone_or_pull_repo(repo: RepoConfig, base_path: Path) -> bool:
|
|
19
|
+
"""Clone a repository if it doesn't exist, or pull latest changes if it does.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
repo: Repository configuration
|
|
23
|
+
base_path: Base path where repositories are stored
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
True if successful, False otherwise
|
|
27
|
+
"""
|
|
28
|
+
repo_path = base_path / repo.name
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
if repo_path.exists():
|
|
32
|
+
# Repository exists - pull latest changes
|
|
33
|
+
console.print(f" [dim]Pulling latest changes for[/dim] {repo.name}")
|
|
34
|
+
|
|
35
|
+
result = subprocess.run(
|
|
36
|
+
["git", "pull"],
|
|
37
|
+
cwd=repo_path,
|
|
38
|
+
capture_output=True,
|
|
39
|
+
text=True,
|
|
40
|
+
check=False,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if result.returncode != 0:
|
|
44
|
+
console.print(f" [yellow]⚠[/yellow] Failed to pull {repo.name}: {result.stderr.strip()}")
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
# If branch is specified, checkout that branch
|
|
48
|
+
if repo.branch:
|
|
49
|
+
subprocess.run(
|
|
50
|
+
["git", "checkout", repo.branch],
|
|
51
|
+
cwd=repo_path,
|
|
52
|
+
capture_output=True,
|
|
53
|
+
text=True,
|
|
54
|
+
check=False,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
else:
|
|
58
|
+
# Repository doesn't exist - clone it
|
|
59
|
+
console.print(f" [dim]Cloning[/dim] {repo.name}")
|
|
60
|
+
|
|
61
|
+
cmd = ["git", "clone"]
|
|
62
|
+
if repo.branch:
|
|
63
|
+
cmd.extend(["-b", repo.branch])
|
|
64
|
+
cmd.extend([repo.url, str(repo_path)])
|
|
65
|
+
|
|
66
|
+
result = subprocess.run(
|
|
67
|
+
cmd,
|
|
68
|
+
capture_output=True,
|
|
69
|
+
text=True,
|
|
70
|
+
check=False,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
if result.returncode != 0:
|
|
74
|
+
console.print(f" [yellow]⚠[/yellow] Failed to clone {repo.name}: {result.stderr.strip()}")
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
return True
|
|
78
|
+
|
|
79
|
+
except Exception as e:
|
|
80
|
+
console.print(f" [yellow]⚠[/yellow] Error syncing {repo.name}: {e}")
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class RepositorySyncProvider(SyncProvider):
|
|
85
|
+
"""Provider for syncing git repositories."""
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def name(self) -> str:
|
|
89
|
+
return "Repositories"
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def emoji(self) -> str:
|
|
93
|
+
return "📦"
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def default_output_dir(self) -> str:
|
|
97
|
+
return "repos"
|
|
98
|
+
|
|
99
|
+
def pre_sync(self, config: NaoConfig, output_path: Path) -> None:
|
|
100
|
+
"""
|
|
101
|
+
Always run before syncing.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
cleanup_stale_repos(config.repos, output_path, verbose=True)
|
|
105
|
+
|
|
106
|
+
def get_items(self, config: NaoConfig) -> list[RepoConfig]:
|
|
107
|
+
return config.repos
|
|
108
|
+
|
|
109
|
+
def sync(self, items: list[Any], output_path: Path, project_path: Path | None = None) -> SyncResult:
|
|
110
|
+
"""Sync all configured repositories.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
items: List of repository configurations
|
|
114
|
+
output_path: Base path where repositories are stored
|
|
115
|
+
project_path: Path to the nao project root (unused for repos)
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
SyncResult with number of successfully synced repositories
|
|
119
|
+
"""
|
|
120
|
+
if not items:
|
|
121
|
+
return SyncResult(provider_name=self.name, items_synced=0)
|
|
122
|
+
|
|
123
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
124
|
+
success_count = 0
|
|
125
|
+
|
|
126
|
+
console.print(f"\n[bold cyan]{self.emoji} Syncing {self.name}[/bold cyan]")
|
|
127
|
+
console.print(f"[dim]Location:[/dim] {output_path.absolute()}\n")
|
|
128
|
+
|
|
129
|
+
for repo in items:
|
|
130
|
+
if clone_or_pull_repo(repo, output_path):
|
|
131
|
+
success_count += 1
|
|
132
|
+
console.print(f" [green]✓[/green] {repo.name}")
|
|
133
|
+
|
|
134
|
+
return SyncResult(provider_name=self.name, items_synced=success_count)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Accessor registry for mapping accessor types to implementations."""
|
|
2
|
+
|
|
3
|
+
from nao_core.config import AccessorType
|
|
4
|
+
|
|
5
|
+
from .accessors import (
|
|
6
|
+
ColumnsAccessor,
|
|
7
|
+
DataAccessor,
|
|
8
|
+
DescriptionAccessor,
|
|
9
|
+
PreviewAccessor,
|
|
10
|
+
ProfilingAccessor,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
ACCESSOR_REGISTRY: dict[AccessorType, DataAccessor] = {
|
|
14
|
+
AccessorType.COLUMNS: ColumnsAccessor(),
|
|
15
|
+
AccessorType.PREVIEW: PreviewAccessor(num_rows=10),
|
|
16
|
+
AccessorType.DESCRIPTION: DescriptionAccessor(),
|
|
17
|
+
AccessorType.PROFILING: ProfilingAccessor(),
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_accessors(accessor_types: list[AccessorType]) -> list[DataAccessor]:
|
|
22
|
+
"""Get accessor instances for the given types."""
|
|
23
|
+
return [ACCESSOR_REGISTRY[t] for t in accessor_types if t in ACCESSOR_REGISTRY]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .base import NaoConfig
|
|
2
|
+
from .databases import (
|
|
3
|
+
AccessorType,
|
|
4
|
+
AnyDatabaseConfig,
|
|
5
|
+
BigQueryConfig,
|
|
6
|
+
DatabaseType,
|
|
7
|
+
DatabricksConfig,
|
|
8
|
+
DuckDBConfig,
|
|
9
|
+
PostgresConfig,
|
|
10
|
+
SnowflakeConfig,
|
|
11
|
+
)
|
|
12
|
+
from .exceptions import InitError
|
|
13
|
+
from .llm import LLMConfig, LLMProvider
|
|
14
|
+
from .slack import SlackConfig
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"NaoConfig",
|
|
18
|
+
"AccessorType",
|
|
19
|
+
"AnyDatabaseConfig",
|
|
20
|
+
"BigQueryConfig",
|
|
21
|
+
"DuckDBConfig",
|
|
22
|
+
"DatabricksConfig",
|
|
23
|
+
"SnowflakeConfig",
|
|
24
|
+
"PostgresConfig",
|
|
25
|
+
"DatabaseType",
|
|
26
|
+
"LLMConfig",
|
|
27
|
+
"LLMProvider",
|
|
28
|
+
"SlackConfig",
|
|
29
|
+
"InitError",
|
|
30
|
+
]
|
nao_core/config/base.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import dotenv
|
|
6
|
+
import yaml
|
|
7
|
+
from ibis import BaseBackend
|
|
8
|
+
from pydantic import BaseModel, Field, model_validator
|
|
9
|
+
|
|
10
|
+
from .databases import AnyDatabaseConfig, parse_database_config
|
|
11
|
+
from .llm import LLMConfig
|
|
12
|
+
from .notion import NotionConfig
|
|
13
|
+
from .repos import RepoConfig
|
|
14
|
+
from .slack import SlackConfig
|
|
15
|
+
|
|
16
|
+
dotenv.load_dotenv()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class NaoConfig(BaseModel):
|
|
20
|
+
"""nao project configuration."""
|
|
21
|
+
|
|
22
|
+
project_name: str = Field(description="The name of the nao project")
|
|
23
|
+
databases: list[AnyDatabaseConfig] = Field(default_factory=list, description="The databases to use")
|
|
24
|
+
repos: list[RepoConfig] = Field(default_factory=list, description="The repositories to use")
|
|
25
|
+
notion: NotionConfig | None = Field(default=None, description="The Notion configurations")
|
|
26
|
+
llm: LLMConfig | None = Field(default=None, description="The LLM configuration")
|
|
27
|
+
slack: SlackConfig | None = Field(default=None, description="The Slack configuration")
|
|
28
|
+
|
|
29
|
+
@model_validator(mode="before")
|
|
30
|
+
@classmethod
|
|
31
|
+
def parse_databases(cls, data: dict) -> dict:
|
|
32
|
+
"""Parse database configs into their specific types."""
|
|
33
|
+
if "databases" in data and isinstance(data["databases"], list):
|
|
34
|
+
data["databases"] = [parse_database_config(db) if isinstance(db, dict) else db for db in data["databases"]]
|
|
35
|
+
return data
|
|
36
|
+
|
|
37
|
+
def save(self, path: Path) -> None:
|
|
38
|
+
"""Save the configuration to a YAML file."""
|
|
39
|
+
config_file = path / "nao_config.yaml"
|
|
40
|
+
with config_file.open("w") as f:
|
|
41
|
+
yaml.dump(
|
|
42
|
+
self.model_dump(mode="json", by_alias=True),
|
|
43
|
+
f,
|
|
44
|
+
default_flow_style=False,
|
|
45
|
+
sort_keys=False,
|
|
46
|
+
allow_unicode=True,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def load(cls, path: Path) -> "NaoConfig":
|
|
51
|
+
"""Load the configuration from a YAML file."""
|
|
52
|
+
config_file = path / "nao_config.yaml"
|
|
53
|
+
content = config_file.read_text()
|
|
54
|
+
content = cls._process_env_vars(content)
|
|
55
|
+
data = yaml.safe_load(content)
|
|
56
|
+
return cls.model_validate(data)
|
|
57
|
+
|
|
58
|
+
def get_connection(self, name: str) -> BaseBackend:
|
|
59
|
+
"""Get an Ibis connection by database name."""
|
|
60
|
+
for db in self.databases:
|
|
61
|
+
if db.name == name:
|
|
62
|
+
return db.connect()
|
|
63
|
+
raise ValueError(f"Database '{name}' not found in configuration")
|
|
64
|
+
|
|
65
|
+
def get_all_connections(self) -> dict[str, BaseBackend]:
|
|
66
|
+
"""Get all Ibis connections as a dict keyed by name."""
|
|
67
|
+
return {db.name: db.connect() for db in self.databases}
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def try_load(cls, path: Path | None = None) -> "NaoConfig | None":
|
|
71
|
+
"""Try to load config from path, returns None if not found or invalid.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
path: Directory containing nao_config.yaml. Defaults to NAO_DEFAULT_PROJECT_PATH
|
|
75
|
+
environment variable if set, otherwise current directory.
|
|
76
|
+
"""
|
|
77
|
+
if path is None:
|
|
78
|
+
default_path = os.environ.get("NAO_DEFAULT_PROJECT_PATH")
|
|
79
|
+
path = Path(default_path) if default_path else Path.cwd()
|
|
80
|
+
try:
|
|
81
|
+
os.chdir(path)
|
|
82
|
+
return cls.load(path)
|
|
83
|
+
except (FileNotFoundError, ValueError, yaml.YAMLError):
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def json_schema(cls) -> dict:
|
|
88
|
+
"""Generate JSON schema for the configuration."""
|
|
89
|
+
return cls.model_json_schema()
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def _process_env_vars(content: str) -> str:
|
|
93
|
+
# Support both ${{ env('VAR') }} and {{ env('VAR') }} formats
|
|
94
|
+
regex = re.compile(r"\$?\{\{\s*env\(['\"]([^'\"]+)['\"]\)\s*\}\}")
|
|
95
|
+
|
|
96
|
+
def replacer(match: re.Match[str]) -> str:
|
|
97
|
+
env_var = match.group(1)
|
|
98
|
+
return os.environ.get(env_var, "")
|
|
99
|
+
|
|
100
|
+
return regex.sub(replacer, content)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from typing import Annotated, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import Discriminator, Tag
|
|
4
|
+
|
|
5
|
+
from .base import AccessorType, DatabaseConfig, DatabaseType
|
|
6
|
+
from .bigquery import BigQueryConfig
|
|
7
|
+
from .databricks import DatabricksConfig
|
|
8
|
+
from .duckdb import DuckDBConfig
|
|
9
|
+
from .postgres import PostgresConfig
|
|
10
|
+
from .snowflake import SnowflakeConfig
|
|
11
|
+
|
|
12
|
+
# =============================================================================
|
|
13
|
+
# Database Config Registry
|
|
14
|
+
# =============================================================================
|
|
15
|
+
|
|
16
|
+
AnyDatabaseConfig = Annotated[
|
|
17
|
+
Union[
|
|
18
|
+
Annotated[BigQueryConfig, Tag("bigquery")],
|
|
19
|
+
Annotated[DatabricksConfig, Tag("databricks")],
|
|
20
|
+
Annotated[SnowflakeConfig, Tag("snowflake")],
|
|
21
|
+
Annotated[DuckDBConfig, Tag("duckdb")],
|
|
22
|
+
Annotated[PostgresConfig, Tag("postgres")],
|
|
23
|
+
],
|
|
24
|
+
Discriminator("type"),
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def parse_database_config(data: dict) -> DatabaseConfig:
|
|
29
|
+
"""Parse a database config dict into the appropriate type."""
|
|
30
|
+
db_type = data.get("type")
|
|
31
|
+
if db_type == "bigquery":
|
|
32
|
+
return BigQueryConfig.model_validate(data)
|
|
33
|
+
elif db_type == "duckdb":
|
|
34
|
+
return DuckDBConfig.model_validate(data)
|
|
35
|
+
elif db_type == "databricks":
|
|
36
|
+
return DatabricksConfig.model_validate(data)
|
|
37
|
+
elif db_type == "snowflake":
|
|
38
|
+
return SnowflakeConfig.model_validate(data)
|
|
39
|
+
elif db_type == "postgres":
|
|
40
|
+
return PostgresConfig.model_validate(data)
|
|
41
|
+
else:
|
|
42
|
+
raise ValueError(f"Unknown database type: {db_type}")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
"AccessorType",
|
|
47
|
+
"AnyDatabaseConfig",
|
|
48
|
+
"BigQueryConfig",
|
|
49
|
+
"DuckDBConfig",
|
|
50
|
+
"DatabaseConfig",
|
|
51
|
+
"DatabaseType",
|
|
52
|
+
"DatabricksConfig",
|
|
53
|
+
"SnowflakeConfig",
|
|
54
|
+
"PostgresConfig",
|
|
55
|
+
]
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import fnmatch
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
from ibis import BaseBackend
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DatabaseType(str, Enum):
|
|
13
|
+
"""Supported database types."""
|
|
14
|
+
|
|
15
|
+
BIGQUERY = "bigquery"
|
|
16
|
+
DUCKDB = "duckdb"
|
|
17
|
+
DATABRICKS = "databricks"
|
|
18
|
+
SNOWFLAKE = "snowflake"
|
|
19
|
+
POSTGRES = "postgres"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AccessorType(str, Enum):
|
|
23
|
+
"""Available data accessors for sync."""
|
|
24
|
+
|
|
25
|
+
COLUMNS = "columns"
|
|
26
|
+
PREVIEW = "preview"
|
|
27
|
+
DESCRIPTION = "description"
|
|
28
|
+
PROFILING = "profiling"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DatabaseConfig(BaseModel, ABC):
|
|
32
|
+
"""Base configuration for all database backends."""
|
|
33
|
+
|
|
34
|
+
name: str = Field(description="A friendly name for this connection")
|
|
35
|
+
|
|
36
|
+
# Sync settings
|
|
37
|
+
accessors: list[AccessorType] = Field(
|
|
38
|
+
default=[AccessorType.COLUMNS, AccessorType.PREVIEW, AccessorType.DESCRIPTION],
|
|
39
|
+
description="List of accessors to run during sync (columns, preview, description, profiling)",
|
|
40
|
+
)
|
|
41
|
+
include: list[str] = Field(
|
|
42
|
+
default_factory=list,
|
|
43
|
+
description="Glob patterns for schemas/tables to include (e.g., 'prod_*.*', 'analytics.dim_*'). Empty means include all.",
|
|
44
|
+
)
|
|
45
|
+
exclude: list[str] = Field(
|
|
46
|
+
default_factory=list,
|
|
47
|
+
description="Glob patterns for schemas/tables to exclude (e.g., 'temp_*.*', '*.backup_*')",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def connect(self) -> BaseBackend:
|
|
52
|
+
"""Create an Ibis connection for this database."""
|
|
53
|
+
...
|
|
54
|
+
|
|
55
|
+
def matches_pattern(self, schema: str, table: str) -> bool:
|
|
56
|
+
"""Check if a schema.table matches the include/exclude patterns.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
schema: The schema/dataset name
|
|
60
|
+
table: The table name
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
True if the table should be included, False if excluded
|
|
64
|
+
"""
|
|
65
|
+
full_name = f"{schema}.{table}"
|
|
66
|
+
|
|
67
|
+
# If include patterns exist, table must match at least one
|
|
68
|
+
if self.include:
|
|
69
|
+
included = any(fnmatch.fnmatch(full_name, pattern) for pattern in self.include)
|
|
70
|
+
if not included:
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
# If exclude patterns exist, table must not match any
|
|
74
|
+
if self.exclude:
|
|
75
|
+
excluded = any(fnmatch.fnmatch(full_name, pattern) for pattern in self.exclude)
|
|
76
|
+
if excluded:
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def get_database_name(self) -> str:
|
|
83
|
+
"""Get the database name for this database type."""
|
|
84
|
+
|
|
85
|
+
...
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
import ibis
|
|
5
|
+
from ibis import BaseBackend
|
|
6
|
+
from pydantic import Field, field_validator
|
|
7
|
+
from rich.prompt import Prompt
|
|
8
|
+
|
|
9
|
+
from nao_core.config.exceptions import InitError
|
|
10
|
+
|
|
11
|
+
from .base import DatabaseConfig, console
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BigQueryConfig(DatabaseConfig):
|
|
15
|
+
"""BigQuery-specific configuration."""
|
|
16
|
+
|
|
17
|
+
type: Literal["bigquery"] = "bigquery"
|
|
18
|
+
project_id: str = Field(description="GCP project ID")
|
|
19
|
+
dataset_id: str | None = Field(default=None, description="Default BigQuery dataset")
|
|
20
|
+
credentials_path: str | None = Field(
|
|
21
|
+
default=None,
|
|
22
|
+
description="Path to service account JSON file. If not provided, uses Application Default Credentials (ADC)",
|
|
23
|
+
)
|
|
24
|
+
credentials_json: dict | None = Field(
|
|
25
|
+
default=None,
|
|
26
|
+
description="Service account credentials as a dict or JSON string. Takes precedence over credentials_path if both are provided",
|
|
27
|
+
)
|
|
28
|
+
sso: bool = Field(default=False, description="Use Single Sign-On (SSO) for authentication")
|
|
29
|
+
location: str | None = Field(default=None, description="BigQuery location")
|
|
30
|
+
|
|
31
|
+
@field_validator("credentials_json", mode="before")
|
|
32
|
+
@classmethod
|
|
33
|
+
def parse_credentials_json(cls, v: str | dict | None) -> dict | None:
|
|
34
|
+
if v is None:
|
|
35
|
+
return None
|
|
36
|
+
if isinstance(v, dict):
|
|
37
|
+
return v
|
|
38
|
+
if isinstance(v, str):
|
|
39
|
+
return json.loads(v)
|
|
40
|
+
raise ValueError("credentials_json must be a dict or JSON string")
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def promptConfig(cls) -> "BigQueryConfig":
|
|
44
|
+
"""Interactively prompt the user for BigQuery configuration."""
|
|
45
|
+
console.print("\n[bold cyan]BigQuery Configuration[/bold cyan]\n")
|
|
46
|
+
|
|
47
|
+
name = Prompt.ask("[bold]Connection name[/bold]", default="bigquery-prod")
|
|
48
|
+
|
|
49
|
+
project_id = Prompt.ask("[bold]GCP Project ID[/bold]")
|
|
50
|
+
if not project_id:
|
|
51
|
+
raise InitError("GCP Project ID cannot be empty.")
|
|
52
|
+
|
|
53
|
+
dataset_id = Prompt.ask("[bold]Default dataset[/bold] [dim](optional, press Enter to skip)[/dim]", default="")
|
|
54
|
+
|
|
55
|
+
credentials_path = Prompt.ask(
|
|
56
|
+
"[bold]Service account JSON path[/bold] [dim](optional, uses ADC if empty)[/dim]",
|
|
57
|
+
default="",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
return BigQueryConfig(
|
|
61
|
+
name=name,
|
|
62
|
+
project_id=project_id,
|
|
63
|
+
dataset_id=dataset_id or None,
|
|
64
|
+
credentials_path=credentials_path or None,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def connect(self) -> BaseBackend:
|
|
68
|
+
"""Create an Ibis BigQuery connection."""
|
|
69
|
+
kwargs: dict = {"project_id": self.project_id}
|
|
70
|
+
|
|
71
|
+
if self.dataset_id:
|
|
72
|
+
kwargs["dataset_id"] = self.dataset_id
|
|
73
|
+
|
|
74
|
+
if self.sso:
|
|
75
|
+
kwargs["auth_local_webserver"] = True
|
|
76
|
+
|
|
77
|
+
if self.credentials_json:
|
|
78
|
+
from google.oauth2 import service_account
|
|
79
|
+
|
|
80
|
+
credentials = service_account.Credentials.from_service_account_info(
|
|
81
|
+
self.credentials_json,
|
|
82
|
+
scopes=["https://www.googleapis.com/auth/bigquery"],
|
|
83
|
+
)
|
|
84
|
+
kwargs["credentials"] = credentials
|
|
85
|
+
elif self.credentials_path:
|
|
86
|
+
from google.oauth2 import service_account
|
|
87
|
+
|
|
88
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
89
|
+
self.credentials_path,
|
|
90
|
+
scopes=["https://www.googleapis.com/auth/bigquery"],
|
|
91
|
+
)
|
|
92
|
+
kwargs["credentials"] = credentials
|
|
93
|
+
|
|
94
|
+
return ibis.bigquery.connect(**kwargs)
|
|
95
|
+
|
|
96
|
+
def get_database_name(self) -> str:
|
|
97
|
+
"""Get the database name for BigQuery."""
|
|
98
|
+
|
|
99
|
+
return self.project_id
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
import ibis
|
|
4
|
+
from ibis import BaseBackend
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
from rich.prompt import Prompt
|
|
7
|
+
|
|
8
|
+
from nao_core.config.exceptions import InitError
|
|
9
|
+
|
|
10
|
+
from .base import DatabaseConfig, console
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DatabricksConfig(DatabaseConfig):
|
|
14
|
+
"""Databricks-specific configuration."""
|
|
15
|
+
|
|
16
|
+
type: Literal["databricks"] = "databricks"
|
|
17
|
+
server_hostname: str = Field(description="Databricks server hostname (e.g., 'adb-xxxx.azuredatabricks.net')")
|
|
18
|
+
http_path: str = Field(description="HTTP path to the SQL warehouse or cluster")
|
|
19
|
+
access_token: str = Field(description="Databricks personal access token")
|
|
20
|
+
catalog: str | None = Field(default=None, description="Unity Catalog name (optional)")
|
|
21
|
+
schema_name: str | None = Field(
|
|
22
|
+
default=None,
|
|
23
|
+
validation_alias="schema",
|
|
24
|
+
serialization_alias="schema",
|
|
25
|
+
description="Default schema (optional)",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def promptConfig(cls) -> "DatabricksConfig":
|
|
30
|
+
"""Interactively prompt the user for Databricks configuration."""
|
|
31
|
+
console.print("\n[bold cyan]Databricks Configuration[/bold cyan]\n")
|
|
32
|
+
|
|
33
|
+
name = Prompt.ask("[bold]Connection name[/bold]", default="databricks-prod")
|
|
34
|
+
|
|
35
|
+
server_hostname = Prompt.ask("[bold]Server hostname[/bold] [dim](e.g., adb-xxxx.azuredatabricks.net)[/dim]")
|
|
36
|
+
if not server_hostname:
|
|
37
|
+
raise InitError("Server hostname cannot be empty.")
|
|
38
|
+
|
|
39
|
+
http_path = Prompt.ask("[bold]HTTP path[/bold] [dim](e.g., /sql/1.0/warehouses/xxxx)[/dim]")
|
|
40
|
+
if not http_path:
|
|
41
|
+
raise InitError("HTTP path cannot be empty.")
|
|
42
|
+
|
|
43
|
+
access_token = Prompt.ask("[bold]Access token[/bold]", password=True)
|
|
44
|
+
if not access_token:
|
|
45
|
+
raise InitError("Access token cannot be empty.")
|
|
46
|
+
|
|
47
|
+
catalog = Prompt.ask("[bold]Catalog[/bold] [dim](optional, press Enter to skip)[/dim]", default=None)
|
|
48
|
+
|
|
49
|
+
schema = Prompt.ask("[bold]Default schema[/bold] [dim](optional, press Enter to skip)[/dim]", default=None)
|
|
50
|
+
|
|
51
|
+
return DatabricksConfig(
|
|
52
|
+
name=name,
|
|
53
|
+
server_hostname=server_hostname,
|
|
54
|
+
http_path=http_path,
|
|
55
|
+
access_token=access_token,
|
|
56
|
+
catalog=catalog,
|
|
57
|
+
schema_name=schema,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def connect(self) -> BaseBackend:
|
|
61
|
+
"""Create an Ibis Databricks connection."""
|
|
62
|
+
kwargs: dict = {
|
|
63
|
+
"server_hostname": self.server_hostname,
|
|
64
|
+
"http_path": self.http_path,
|
|
65
|
+
"access_token": self.access_token,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if self.catalog:
|
|
69
|
+
kwargs["catalog"] = self.catalog
|
|
70
|
+
|
|
71
|
+
if self.schema_name:
|
|
72
|
+
kwargs["schema"] = self.schema_name
|
|
73
|
+
|
|
74
|
+
return ibis.databricks.connect(**kwargs)
|
|
75
|
+
|
|
76
|
+
def get_database_name(self) -> str:
|
|
77
|
+
"""Get the database name for Databricks."""
|
|
78
|
+
|
|
79
|
+
return self.catalog or "main"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
import ibis
|
|
5
|
+
from ibis import BaseBackend
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
from rich.prompt import Prompt
|
|
8
|
+
|
|
9
|
+
from .base import DatabaseConfig, console
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DuckDBConfig(DatabaseConfig):
|
|
13
|
+
"""DuckDB-specific configuration."""
|
|
14
|
+
|
|
15
|
+
type: Literal["duckdb"] = "duckdb"
|
|
16
|
+
path: str = Field(description="Path to the DuckDB database file", default=":memory:")
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def promptConfig(cls) -> "DuckDBConfig":
|
|
20
|
+
"""Interactively prompt the user for DuckDB configuration."""
|
|
21
|
+
console.print("\n[bold cyan]DuckDB Configuration[/bold cyan]\n")
|
|
22
|
+
|
|
23
|
+
name = Prompt.ask("[bold]Connection name[/bold]", default="duckdb-memory")
|
|
24
|
+
|
|
25
|
+
path = Prompt.ask("[bold]Path to the DuckDB database file[/bold]", default=":memory:")
|
|
26
|
+
|
|
27
|
+
return DuckDBConfig(name=name, path=path)
|
|
28
|
+
|
|
29
|
+
def connect(self) -> BaseBackend:
|
|
30
|
+
"""Create an Ibis DuckDB connection."""
|
|
31
|
+
return ibis.duckdb.connect(
|
|
32
|
+
database=self.path,
|
|
33
|
+
read_only=False if self.path == ":memory:" else True,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def get_database_name(self) -> str:
|
|
37
|
+
"""Get the database name for DuckDB."""
|
|
38
|
+
|
|
39
|
+
if self.path == ":memory:":
|
|
40
|
+
return "memory"
|
|
41
|
+
return Path(self.path).stem
|