PyPI - nao-core - Versions diffs - 0.0.38__py3-none-manylinux2014_aarch64.whl - Mend

nao-core 0.0.38__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

nao_core/__init__.py +2 -0
nao_core/__init__.py.bak +2 -0
nao_core/bin/build-info.json +5 -0
nao_core/bin/fastapi/main.py +268 -0
nao_core/bin/fastapi/test_main.py +156 -0
nao_core/bin/migrations-postgres/0000_user_auth_and_chat_tables.sql +98 -0
nao_core/bin/migrations-postgres/0001_message_feedback.sql +9 -0
nao_core/bin/migrations-postgres/0002_chat_message_stop_reason_and_error_message.sql +2 -0
nao_core/bin/migrations-postgres/0003_handle_slack_with_thread.sql +2 -0
nao_core/bin/migrations-postgres/0004_input_and_output_tokens.sql +8 -0
nao_core/bin/migrations-postgres/0005_add_project_tables.sql +39 -0
nao_core/bin/migrations-postgres/0006_llm_model_ids.sql +4 -0
nao_core/bin/migrations-postgres/0007_chat_message_llm_info.sql +2 -0
nao_core/bin/migrations-postgres/meta/0000_snapshot.json +707 -0
nao_core/bin/migrations-postgres/meta/0001_snapshot.json +766 -0
nao_core/bin/migrations-postgres/meta/0002_snapshot.json +778 -0
nao_core/bin/migrations-postgres/meta/0003_snapshot.json +799 -0
nao_core/bin/migrations-postgres/meta/0004_snapshot.json +847 -0
nao_core/bin/migrations-postgres/meta/0005_snapshot.json +1129 -0
nao_core/bin/migrations-postgres/meta/0006_snapshot.json +1141 -0
nao_core/bin/migrations-postgres/meta/_journal.json +62 -0
nao_core/bin/migrations-sqlite/0000_user_auth_and_chat_tables.sql +98 -0
nao_core/bin/migrations-sqlite/0001_message_feedback.sql +8 -0
nao_core/bin/migrations-sqlite/0002_chat_message_stop_reason_and_error_message.sql +2 -0
nao_core/bin/migrations-sqlite/0003_handle_slack_with_thread.sql +2 -0
nao_core/bin/migrations-sqlite/0004_input_and_output_tokens.sql +8 -0
nao_core/bin/migrations-sqlite/0005_add_project_tables.sql +38 -0
nao_core/bin/migrations-sqlite/0006_llm_model_ids.sql +4 -0
nao_core/bin/migrations-sqlite/0007_chat_message_llm_info.sql +2 -0
nao_core/bin/migrations-sqlite/meta/0000_snapshot.json +674 -0
nao_core/bin/migrations-sqlite/meta/0001_snapshot.json +735 -0
nao_core/bin/migrations-sqlite/meta/0002_snapshot.json +749 -0
nao_core/bin/migrations-sqlite/meta/0003_snapshot.json +763 -0
nao_core/bin/migrations-sqlite/meta/0004_snapshot.json +819 -0
nao_core/bin/migrations-sqlite/meta/0005_snapshot.json +1086 -0
nao_core/bin/migrations-sqlite/meta/0006_snapshot.json +1100 -0
nao_core/bin/migrations-sqlite/meta/_journal.json +62 -0
nao_core/bin/nao-chat-server +0 -0
nao_core/bin/public/assets/code-block-F6WJLWQG-CV0uOmNJ.js +153 -0
nao_core/bin/public/assets/index-DcbndLHo.css +1 -0
nao_core/bin/public/assets/index-t1hZI3nl.js +560 -0
nao_core/bin/public/favicon.ico +0 -0
nao_core/bin/public/index.html +18 -0
nao_core/bin/rg +0 -0
nao_core/commands/__init__.py +6 -0
nao_core/commands/chat.py +225 -0
nao_core/commands/debug.py +158 -0
nao_core/commands/init.py +358 -0
nao_core/commands/sync/__init__.py +124 -0
nao_core/commands/sync/accessors.py +290 -0
nao_core/commands/sync/cleanup.py +156 -0
nao_core/commands/sync/providers/__init__.py +32 -0
nao_core/commands/sync/providers/base.py +113 -0
nao_core/commands/sync/providers/databases/__init__.py +17 -0
nao_core/commands/sync/providers/databases/bigquery.py +79 -0
nao_core/commands/sync/providers/databases/databricks.py +79 -0
nao_core/commands/sync/providers/databases/duckdb.py +78 -0
nao_core/commands/sync/providers/databases/postgres.py +79 -0
nao_core/commands/sync/providers/databases/provider.py +129 -0
nao_core/commands/sync/providers/databases/snowflake.py +79 -0
nao_core/commands/sync/providers/notion/__init__.py +5 -0
nao_core/commands/sync/providers/notion/provider.py +205 -0
nao_core/commands/sync/providers/repositories/__init__.py +5 -0
nao_core/commands/sync/providers/repositories/provider.py +134 -0
nao_core/commands/sync/registry.py +23 -0
nao_core/config/__init__.py +30 -0
nao_core/config/base.py +100 -0
nao_core/config/databases/__init__.py +55 -0
nao_core/config/databases/base.py +85 -0
nao_core/config/databases/bigquery.py +99 -0
nao_core/config/databases/databricks.py +79 -0
nao_core/config/databases/duckdb.py +41 -0
nao_core/config/databases/postgres.py +83 -0
nao_core/config/databases/snowflake.py +125 -0
nao_core/config/exceptions.py +7 -0
nao_core/config/llm/__init__.py +19 -0
nao_core/config/notion/__init__.py +8 -0
nao_core/config/repos/__init__.py +3 -0
nao_core/config/repos/base.py +11 -0
nao_core/config/slack/__init__.py +12 -0
nao_core/context/__init__.py +54 -0
nao_core/context/base.py +57 -0
nao_core/context/git.py +177 -0
nao_core/context/local.py +59 -0
nao_core/main.py +13 -0
nao_core/templates/__init__.py +41 -0
nao_core/templates/context.py +193 -0
nao_core/templates/defaults/databases/columns.md.j2 +23 -0
nao_core/templates/defaults/databases/description.md.j2 +32 -0
nao_core/templates/defaults/databases/preview.md.j2 +22 -0
nao_core/templates/defaults/databases/profiling.md.j2 +34 -0
nao_core/templates/engine.py +133 -0
nao_core/templates/render.py +196 -0
nao_core-0.0.38.dist-info/METADATA +150 -0
nao_core-0.0.38.dist-info/RECORD +98 -0
nao_core-0.0.38.dist-info/WHEEL +4 -0
nao_core-0.0.38.dist-info/entry_points.txt +2 -0
nao_core-0.0.38.dist-info/licenses/LICENSE +22 -0

nao_core/commands/sync/accessors.py ADDED Viewed

@@ -0,0 +1,290 @@
+"""Data accessor classes for generating markdown documentation from database tables."""
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any
+from ibis import BaseBackend
+from nao_core.templates import get_template_engine
+class DataAccessor(ABC):
+    """Base class for data accessors that generate markdown files for tables.
+    Accessors use Jinja2 templates for generating output. Default templates
+    are shipped with nao and can be overridden by users by placing templates
+    with the same name in their project's `templates/` directory.
+    Example:
+        To override the preview template, create:
+        `<project_root>/templates/databases/preview.md.j2`
+    """
+    # Path to the nao project root (set by sync provider)
+    _project_path: Path | None = None
+    @property
+    @abstractmethod
+    def filename(self) -> str:
+        """The filename this accessor writes to (e.g., 'columns.md')."""
+        ...
+    @property
+    @abstractmethod
+    def template_name(self) -> str:
+        """The template file to use (e.g., 'databases/columns.md.j2')."""
+        ...
+    @abstractmethod
+    def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
+        """Get the template context for rendering.
+        Args:
+            conn: The Ibis database connection
+            dataset: The dataset/schema name
+            table: The table name
+        Returns:
+            Dictionary of variables to pass to the template
+        """
+        ...
+    def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
+        """Generate the markdown content for a table using templates.
+        Args:
+            conn: The Ibis database connection
+            dataset: The dataset/schema name
+            table: The table name
+        Returns:
+            Markdown string content
+        """
+        try:
+            context = self.get_context(conn, dataset, table)
+            engine = get_template_engine(self._project_path)
+            return engine.render(self.template_name, **context)
+        except Exception as e:
+            return f"# {table}\n\nError generating content: {e}"
+    def get_table(self, conn: BaseBackend, dataset: str, table: str):
+        """Helper to get an Ibis table reference."""
+        return conn.table(table, database=dataset)
+    @classmethod
+    def set_project_path(cls, path: Path | None) -> None:
+        """Set the project path for template resolution.
+        Args:
+            path: Path to the nao project root
+        """
+        cls._project_path = path
+def truncate_middle(text: str, max_length: int) -> str:
+    """Truncate text in the middle if it exceeds max_length."""
+    if len(text) <= max_length:
+        return text
+    half = (max_length - 3) // 2
+    return text[:half] + "..." + text[-half:]
+class ColumnsAccessor(DataAccessor):
+    """Generates columns.md with column names, types, and nullable info.
+    Template variables:
+        - table_name: Name of the table
+        - dataset: Schema/dataset name
+        - columns: List of dicts with 'name', 'type', 'nullable', 'description'
+        - column_count: Total number of columns
+    """
+    def __init__(self, max_description_length: int = 256):
+        self.max_description_length = max_description_length
+    @property
+    def filename(self) -> str:
+        return "columns.md"
+    @property
+    def template_name(self) -> str:
+        return "databases/columns.md.j2"
+    def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
+        t = self.get_table(conn, dataset, table)
+        schema = t.schema()
+        columns = []
+        for name, dtype in schema.items():
+            columns.append(
+                {
+                    "name": name,
+                    "type": str(dtype),
+                    "nullable": dtype.nullable if hasattr(dtype, "nullable") else True,
+                    "description": None,  # Could be populated from metadata
+                }
+            )
+        return {
+            "table_name": table,
+            "dataset": dataset,
+            "columns": columns,
+            "column_count": len(columns),
+        }
+class PreviewAccessor(DataAccessor):
+    """Generates preview.md with the first N rows of data as JSONL.
+    Template variables:
+        - table_name: Name of the table
+        - dataset: Schema/dataset name
+        - rows: List of row dictionaries
+        - row_count: Number of preview rows
+        - columns: List of column info dicts
+    """
+    def __init__(self, num_rows: int = 10):
+        self.num_rows = num_rows
+    @property
+    def filename(self) -> str:
+        return "preview.md"
+    @property
+    def template_name(self) -> str:
+        return "databases/preview.md.j2"
+    def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
+        t = self.get_table(conn, dataset, table)
+        schema = t.schema()
+        preview_df = t.limit(self.num_rows).execute()
+        rows = []
+        for _, row in preview_df.iterrows():
+            row_dict = row.to_dict()
+            # Convert non-serializable types to strings
+            for key, val in row_dict.items():
+                if val is not None and not isinstance(val, (str, int, float, bool, list, dict)):
+                    row_dict[key] = str(val)
+            rows.append(row_dict)
+        columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
+        return {
+            "table_name": table,
+            "dataset": dataset,
+            "rows": rows,
+            "row_count": len(rows),
+            "columns": columns,
+        }
+class DescriptionAccessor(DataAccessor):
+    """Generates description.md with table metadata (row count, column count, etc.).
+    Template variables:
+        - table_name: Name of the table
+        - dataset: Schema/dataset name
+        - row_count: Total rows in the table
+        - column_count: Number of columns
+        - description: Table description (if available)
+        - columns: List of column info dicts
+    """
+    @property
+    def filename(self) -> str:
+        return "description.md"
+    @property
+    def template_name(self) -> str:
+        return "databases/description.md.j2"
+    def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
+        t = self.get_table(conn, dataset, table)
+        schema = t.schema()
+        row_count = t.count().execute()
+        columns = [{"name": name, "type": str(dtype)} for name, dtype in schema.items()]
+        return {
+            "table_name": table,
+            "dataset": dataset,
+            "row_count": row_count,
+            "column_count": len(schema),
+            "description": None,  # Could be populated from metadata
+            "columns": columns,
+        }
+class ProfilingAccessor(DataAccessor):
+    """Generates profiling.md with column statistics and data profiling.
+    Template variables:
+        - table_name: Name of the table
+        - dataset: Schema/dataset name
+        - column_stats: List of dicts with stats for each column:
+            - name: Column name
+            - type: Data type
+            - null_count: Number of nulls
+            - unique_count: Number of unique values
+            - min_value: Min value (numeric/temporal)
+            - max_value: Max value (numeric/temporal)
+            - error: Error message if stats couldn't be computed
+        - columns: List of column info dicts
+    """
+    @property
+    def filename(self) -> str:
+        return "profiling.md"
+    @property
+    def template_name(self) -> str:
+        return "databases/profiling.md.j2"
+    def get_context(self, conn: BaseBackend, dataset: str, table: str) -> dict[str, Any]:
+        t = self.get_table(conn, dataset, table)
+        schema = t.schema()
+        column_stats = []
+        columns = []
+        for name, dtype in schema.items():
+            columns.append({"name": name, "type": str(dtype)})
+            col = t[name]
+            dtype_str = str(dtype)
+            stat = {
+                "name": name,
+                "type": dtype_str,
+                "null_count": 0,
+                "unique_count": 0,
+                "min_value": None,
+                "max_value": None,
+                "error": None,
+            }
+            try:
+                stat["null_count"] = t.filter(col.isnull()).count().execute()
+                stat["unique_count"] = col.nunique().execute()
+                if dtype.is_numeric() or dtype.is_temporal():
+                    try:
+                        min_val = str(col.min().execute())
+                        max_val = str(col.max().execute())
+                        stat["min_value"] = truncate_middle(min_val, 20)
+                        stat["max_value"] = truncate_middle(max_val, 20)
+                    except Exception:
+                        pass
+            except Exception as col_error:
+                stat["error"] = str(col_error)
+            column_stats.append(stat)
+        return {
+            "table_name": table,
+            "dataset": dataset,
+            "column_stats": column_stats,
+            "columns": columns,
+        }

nao_core/commands/sync/cleanup.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""Cleanup utilities for removing stale sync files."""
+import shutil
+from collections import defaultdict
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List
+from rich.console import Console
+console = Console()
+@dataclass
+class DatabaseSyncState:
+    """Tracks the state of a database sync operation.
+    Used to track which paths were synced so stale paths can be cleaned up.
+    """
+    db_path: Path
+    """The root path for this database (e.g., databases/type=duckdb/database=mydb)"""
+    synced_schemas: set[str] = field(default_factory=set)
+    """Set of schema names that were synced"""
+    synced_tables: dict[str, set[str]] = field(default_factory=dict)
+    """Dict mapping schema names to sets of table names that were synced"""
+    schemas_synced: int = 0
+    """Count of schemas synced"""
+    tables_synced: int = 0
+    """Count of tables synced"""
+    def add_table(self, schema: str, table: str) -> None:
+        """Record that a table was synced.
+        Args:
+            schema: The schema/dataset name
+            table: The table name
+        """
+        self.synced_schemas.add(schema)
+        if schema not in self.synced_tables:
+            self.synced_tables[schema] = set()
+        self.synced_tables[schema].add(table)
+        self.tables_synced += 1
+    def add_schema(self, schema: str) -> None:
+        """Record that a schema was synced (even if empty).
+        Args:
+            schema: The schema/dataset name
+        """
+        self.synced_schemas.add(schema)
+        self.schemas_synced += 1
+def cleanup_stale_paths(state: DatabaseSyncState, verbose: bool = False) -> int:
+    """Remove directories that exist on disk but weren't synced.
+    This function cleans up:
+    - Table directories that no longer exist in the source
+    - Schema directories that no longer exist or have no tables
+    Args:
+        state: The sync state tracking what was synced
+        verbose: Whether to print cleanup messages
+    Returns:
+        Number of stale paths removed
+    """
+    removed_count = 0
+    if not state.db_path.exists():
+        return 0
+    # Find all existing schema directories
+    existing_schemas = {
+        d.name.replace("schema=", ""): d for d in state.db_path.iterdir() if d.is_dir() and d.name.startswith("schema=")
+    }
+    # Remove schemas that weren't synced
+    for schema_name, schema_path in existing_schemas.items():
+        if schema_name not in state.synced_schemas:
+            if verbose:
+                console.print(f"  [dim red]removing stale schema:[/dim red] {schema_name}")
+            shutil.rmtree(schema_path)
+            removed_count += 1
+            continue
+        # Find existing tables in this schema
+        existing_tables = {
+            d.name.replace("table=", ""): d for d in schema_path.iterdir() if d.is_dir() and d.name.startswith("table=")
+        }
+        synced_tables_for_schema = state.synced_tables.get(schema_name, set())
+        # Remove tables that weren't synced
+        for table_name, table_path in existing_tables.items():
+            if table_name not in synced_tables_for_schema:
+                if verbose:
+                    console.print(f"  [dim red]removing stale table:[/dim red] {schema_name}.{table_name}")
+                shutil.rmtree(table_path)
+                removed_count += 1
+    return removed_count
+def cleanup_stale_databases(active_databases: List, base_path: Path, verbose: bool = False):
+    """Remove databases that are not present in the config file."""
+    valid_db_folders_by_type: Dict[str, set] = defaultdict(set)
+    for db in active_databases:
+        type_folder = f"type={db.type}"
+        db_identifier = db.get_database_name()
+        db_folder = f"database={db_identifier}"
+        valid_db_folders_by_type[type_folder].add(db_folder)
+    for type_dir in base_path.iterdir():
+        if not type_dir.is_dir():
+            continue
+        type_folder_name = type_dir.name
+        # Remove entire type directory if it doesn't exist in nao_config
+        if type_folder_name not in valid_db_folders_by_type:
+            shutil.rmtree(type_dir)
+            if verbose:
+                console.print(f"\n[yellow] Removed unused database type:[/yellow] {type_dir}")
+            continue
+        valid_db_folders = valid_db_folders_by_type[type_folder_name]
+        # Remove unused database folders if it doesn't exist in nao_config
+        for db_dir in type_dir.iterdir():
+            if not db_dir.is_dir():
+                continue
+            if db_dir.name not in valid_db_folders:
+                shutil.rmtree(db_dir)
+                if verbose:
+                    console.print(f"\n[yellow] Removed unused database:[/yellow] {type_folder_name}/{db_dir.name}")
+def cleanup_stale_repos(config_repos: list, base_path: Path, verbose: bool = False) -> None:
+    """Remove repositories that are not present in the config file."""
+    repo_names = {repo.name for repo in config_repos}
+    for repo_dir in base_path.iterdir():
+        if repo_dir.is_dir() and repo_dir.name not in repo_names:
+            shutil.rmtree(repo_dir)
+            if verbose:
+                console.print(f"\n[yellow] Removed unused repo:[/yellow] {repo_dir.name}")

nao_core/commands/sync/providers/__init__.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""Sync providers for different resource types."""
+from .base import SyncProvider, SyncResult
+from .databases.provider import DatabaseSyncProvider
+from .notion.provider import NotionSyncProvider
+from .repositories.provider import RepositorySyncProvider
+# Default providers in order of execution
+DEFAULT_PROVIDERS: list[SyncProvider] = [
+    NotionSyncProvider(),
+    RepositorySyncProvider(),
+    DatabaseSyncProvider(),
+]
+def get_all_providers() -> list[SyncProvider]:
+    """Get all registered sync providers.
+    Returns:
+            List of sync provider instances
+    """
+    return DEFAULT_PROVIDERS.copy()
+__all__ = [
+    "SyncProvider",
+    "SyncResult",
+    "DatabaseSyncProvider",
+    "RepositorySyncProvider",
+    "DEFAULT_PROVIDERS",
+    "get_all_providers",
+]

nao_core/commands/sync/providers/base.py ADDED Viewed

@@ -0,0 +1,113 @@
+"""Base class for sync providers."""
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from nao_core.config import NaoConfig
+@dataclass
+class SyncResult:
+    """Result of a sync operation."""
+    provider_name: str
+    items_synced: int
+    details: dict[str, Any] | None = None
+    summary: str | None = None
+    error: str | None = None
+    @property
+    def success(self) -> bool:
+        """Check if the sync was successful."""
+        return self.error is None
+    def get_summary(self) -> str:
+        """Get a human-readable summary of the sync result."""
+        if self.error:
+            return f"failed: {self.error}"
+        if self.summary:
+            return self.summary
+        return f"{self.items_synced} synced"
+    @classmethod
+    def from_error(cls, provider_name: str, error: Exception) -> "SyncResult":
+        """Create a SyncResult from an exception."""
+        return cls(
+            provider_name=provider_name,
+            items_synced=0,
+            error=str(error),
+        )
+class SyncProvider(ABC):
+    """Abstract base class for sync providers.
+    A sync provider is responsible for synchronizing a specific type of resource
+    (e.g., repositories, databases) from the nao configuration to local files.
+    """
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Human-readable name for this provider (e.g., 'Repositories', 'Databases')."""
+        ...
+    @property
+    @abstractmethod
+    def emoji(self) -> str:
+        """Emoji icon for this provider."""
+        ...
+    @property
+    @abstractmethod
+    def default_output_dir(self) -> str:
+        """Default output directory for this provider."""
+        ...
+    @abstractmethod
+    def get_items(self, config: NaoConfig) -> list[Any]:
+        """Extract items to sync from the configuration.
+        Args:
+                config: The nao configuration
+        Returns:
+                List of items to sync (e.g., repo configs, database configs)
+        """
+        ...
+    @abstractmethod
+    def sync(self, items: list[Any], output_path: Path, project_path: Path | None = None) -> SyncResult:
+        """Sync the items to the output path.
+        Args:
+                items: List of items to sync
+                output_path: Path where synced data should be written
+                project_path: Path to the nao project root (for template resolution)
+        Returns:
+                SyncResult with statistics about what was synced
+        """
+        ...
+    def should_sync(self, config: NaoConfig) -> bool:
+        """Check if this provider has items to sync.
+        Args:
+                config: The nao configuration
+        Returns:
+                True if there are items to sync
+        """
+        return len(self.get_items(config)) > 0
+    def pre_sync(self, config: NaoConfig, output_path: Path) -> None:
+        """For preparation before sync.
+        Args:
+            config: The loaded nao configuration.
+            output_path: Base directory where the preparation should be applied.
+        """
+        pass

nao_core/commands/sync/providers/databases/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Database syncing functionality for generating markdown documentation from database schemas."""
+from .bigquery import sync_bigquery
+from .databricks import sync_databricks
+from .duckdb import sync_duckdb
+from .postgres import sync_postgres
+from .provider import DatabaseSyncProvider
+from .snowflake import sync_snowflake
+__all__ = [
+    "DatabaseSyncProvider",
+    "sync_bigquery",
+    "sync_databricks",
+    "sync_duckdb",
+    "sync_postgres",
+    "sync_snowflake",
+]

nao_core/commands/sync/providers/databases/bigquery.py ADDED Viewed

@@ -0,0 +1,79 @@
+from pathlib import Path
+from rich.progress import Progress
+from nao_core.commands.sync.accessors import DataAccessor
+from nao_core.commands.sync.cleanup import DatabaseSyncState
+def sync_bigquery(
+    db_config,
+    base_path: Path,
+    progress: Progress,
+    accessors: list[DataAccessor],
+) -> DatabaseSyncState:
+    """Sync BigQuery database schema to markdown files.
+    Args:
+            db_config: The database configuration
+            base_path: Base output path
+            progress: Rich progress instance
+            accessors: List of data accessors to run
+    Returns:
+            DatabaseSyncState with sync results and tracked paths
+    """
+    conn = db_config.connect()
+    db_name = db_config.get_database_name()
+    db_path = base_path / "type=bigquery" / f"database={db_name}"
+    state = DatabaseSyncState(db_path=db_path)
+    if db_config.dataset_id:
+        datasets = [db_config.dataset_id]
+    else:
+        datasets = conn.list_databases()
+    dataset_task = progress.add_task(
+        f"[dim]{db_config.name}[/dim]",
+        total=len(datasets),
+    )
+    for dataset in datasets:
+        try:
+            all_tables = conn.list_tables(database=dataset)
+        except Exception:
+            progress.update(dataset_task, advance=1)
+            continue
+        # Filter tables based on include/exclude patterns
+        tables = [t for t in all_tables if db_config.matches_pattern(dataset, t)]
+        # Skip dataset if no tables match
+        if not tables:
+            progress.update(dataset_task, advance=1)
+            continue
+        dataset_path = db_path / f"schema={dataset}"
+        dataset_path.mkdir(parents=True, exist_ok=True)
+        state.add_schema(dataset)
+        table_task = progress.add_task(
+            f"  [cyan]{dataset}[/cyan]",
+            total=len(tables),
+        )
+        for table in tables:
+            table_path = dataset_path / f"table={table}"
+            table_path.mkdir(parents=True, exist_ok=True)
+            for accessor in accessors:
+                content = accessor.generate(conn, dataset, table)
+                output_file = table_path / accessor.filename
+                output_file.write_text(content)
+            state.add_table(dataset, table)
+            progress.update(table_task, advance=1)
+        progress.update(dataset_task, advance=1)
+    return state