PyPI - nao-core - Versions diffs - 0.0.12__py3-none-any.whl → 0.0.15__py3-none-any.whl - Mend

nao-core 0.0.12py3-none-any.whl → 0.0.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

nao_core/__init__.py +1 -1
nao_core/bin/db.sqlite +0 -0
nao_core/bin/fastapi/main.py +102 -0
nao_core/bin/public/assets/_chatId-z5gRlor1.js +1 -0
nao_core/bin/public/assets/chat-messages-DUR3D342.js +1 -0
nao_core/bin/public/assets/index-BDlcD_HE.js +1 -0
nao_core/bin/public/assets/index-Bc7icYyJ.css +1 -0
nao_core/bin/public/assets/index-CGg3ZQH6.js +49 -0
nao_core/bin/public/assets/{login-CGCfd7iQ.js → login-D87n9R5V.js} +1 -1
nao_core/bin/public/assets/signinForm-9PY1Lvqj.js +1 -0
nao_core/bin/public/assets/{signup-BGjbIX9B.js → signup-B7NC1g08.js} +1 -1
nao_core/bin/public/favicon.ico +0 -0
nao_core/bin/public/index.html +3 -3
nao_core/commands/chat.py +67 -25
nao_core/commands/debug.py +0 -4
nao_core/commands/init.py +3 -3
nao_core/commands/sync.py +273 -44
nao_core/config/__init__.py +13 -0
nao_core/{config.py → config/base.py} +4 -66
nao_core/config/databases/__init__.py +29 -0
nao_core/config/databases/base.py +72 -0
nao_core/config/databases/bigquery.py +42 -0
nao_core/config/llm/__init__.py +16 -0
{nao_core-0.0.12.dist-info → nao_core-0.0.15.dist-info}/METADATA +3 -1
nao_core-0.0.15.dist-info/RECORD +39 -0
nao_core/bin/public/assets/index-BUcR0FCx.css +0 -1
nao_core/bin/public/assets/index-DDQ8i103.js +0 -14
nao_core/bin/public/assets/index-nOBqrovO.js +0 -36
nao_core/bin/public/assets/signinForm-BGrBZeLW.js +0 -1
nao_core-0.0.12.dist-info/RECORD +0 -31
{nao_core-0.0.12.dist-info → nao_core-0.0.15.dist-info}/WHEEL +0 -0
{nao_core-0.0.12.dist-info → nao_core-0.0.15.dist-info}/entry_points.txt +0 -0
{nao_core-0.0.12.dist-info → nao_core-0.0.15.dist-info}/licenses/LICENSE +0 -0

nao_core/commands/sync.py CHANGED Viewed

@@ -1,46 +1,261 @@
 import sys
+from abc import ABC, abstractmethod
 from pathlib import Path
 from ibis import BaseBackend
 from rich.console import Console
 from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
-from nao_core.config import NaoConfig
+from nao_core.config import AccessorType, NaoConfig
 console = Console()
-def get_table_schema_markdown(conn: BaseBackend, dataset: str, table: str) -> str:
-    """Generate markdown content describing a table's columns."""
-    try:
-        # Get the table reference and its schema
+# =============================================================================
+# Data Accessors
+# =============================================================================
+class DataAccessor(ABC):
+    """Base class for data accessors that generate markdown files for tables."""
+    @property
+    @abstractmethod
+    def filename(self) -> str:
+        """The filename this accessor writes to (e.g., 'columns.md')."""
+        ...
+    @abstractmethod
+    def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
+        """Generate the markdown content for a table.
+        Args:
+            conn: The Ibis database connection
+            dataset: The dataset/schema name
+            table: The table name
+        Returns:
+            Markdown string content
+        """
+        ...
+    def get_table(self, conn: BaseBackend, dataset: str, table: str):
+        """Helper to get an Ibis table reference."""
         full_table_name = f"{dataset}.{table}"
-        t = conn.table(full_table_name)
-        schema = t.schema()
-        lines = [
-            f"# {table}",
-            "",
-            f"**Dataset:** `{dataset}`",
-            "",
-            "## Columns",
-            "",
-            "| Column | Type | Nullable |",
-            "|--------|------|----------|",
-        ]
-        for name, dtype in schema.items():
-            nullable = "Yes" if dtype.nullable else "No"
-            lines.append(f"| `{name}` | `{dtype}` | {nullable} |")
-        return "\n".join(lines)
-    except Exception as e:
-        return f"# {table}\n\nError fetching schema: {e}"
-def sync_bigquery(db_config, base_path: Path, progress: Progress) -> tuple[int, int]:
+        return conn.table(full_table_name)
+class ColumnsAccessor(DataAccessor):
+    """Generates columns.md with column names, types, and nullable info."""
+    @property
+    def filename(self) -> str:
+        return "columns.md"
+    def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
+        try:
+            t = self.get_table(conn, dataset, table)
+            schema = t.schema()
+            lines = [
+                f"# {table}",
+                "",
+                f"**Dataset:** `{dataset}`",
+                "",
+                "## Columns",
+                "",
+                "| Column | Type | Nullable | Description |",
+                "|--------|------|----------|-------------|",
+            ]
+            for name, dtype in schema.items():
+                nullable = "Yes" if dtype.nullable else "No"
+                description = ""
+                lines.append(f"| `{name}` | `{dtype}` | {nullable} | {description} |")
+            return "\n".join(lines)
+        except Exception as e:
+            return f"# {table}\n\nError fetching schema: {e}"
+class PreviewAccessor(DataAccessor):
+    """Generates preview.md with the first N rows of data."""
+    def __init__(self, num_rows: int = 10):
+        self.num_rows = num_rows
+    @property
+    def filename(self) -> str:
+        return "preview.md"
+    def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
+        try:
+            t = self.get_table(conn, dataset, table)
+            schema = t.schema()
+            preview_df = t.limit(self.num_rows).execute()
+            lines = [
+                f"# {table} - Preview",
+                "",
+                f"**Dataset:** `{dataset}`",
+                f"**Showing:** First {len(preview_df)} rows",
+                "",
+                "## Data Preview",
+                "",
+            ]
+            columns = list(schema.keys())
+            header = "| " + " | ".join(f"`{col}`" for col in columns) + " |"
+            separator = "| " + " | ".join("---" for _ in columns) + " |"
+            lines.append(header)
+            lines.append(separator)
+            for _, row in preview_df.iterrows():
+                row_values = []
+                for col in columns:
+                    val = row[col]
+                    val_str = str(val) if val is not None else ""
+                    if len(val_str) > 50:
+                        val_str = val_str[:47] + "..."
+                    val_str = val_str.replace("|", "\\|").replace("\n", " ")
+                    row_values.append(val_str)
+                lines.append("| " + " | ".join(row_values) + " |")
+            return "\n".join(lines)
+        except Exception as e:
+            return f"# {table} - Preview\n\nError fetching preview: {e}"
+class DescriptionAccessor(DataAccessor):
+    """Generates description.md with table metadata (row count, column count, etc.)."""
+    @property
+    def filename(self) -> str:
+        return "description.md"
+    def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
+        try:
+            t = self.get_table(conn, dataset, table)
+            schema = t.schema()
+            row_count = t.count().execute()
+            col_count = len(schema)
+            lines = [
+                f"# {table}",
+                "",
+                f"**Dataset:** `{dataset}`",
+                "",
+                "## Table Metadata",
+                "",
+                "| Property | Value |",
+                "|----------|-------|",
+                f"| **Row Count** | {row_count:,} |",
+                f"| **Column Count** | {col_count} |",
+                "",
+                "## Description",
+                "",
+                "_No description available._",
+                "",
+            ]
+            return "\n".join(lines)
+        except Exception as e:
+            return f"# {table}\n\nError fetching description: {e}"
+class ProfilingAccessor(DataAccessor):
+    """Generates profiling.md with column statistics and data profiling."""
+    @property
+    def filename(self) -> str:
+        return "profiling.md"
+    def generate(self, conn: BaseBackend, dataset: str, table: str) -> str:
+        try:
+            t = self.get_table(conn, dataset, table)
+            schema = t.schema()
+            lines = [
+                f"# {table} - Profiling",
+                "",
+                f"**Dataset:** `{dataset}`",
+                "",
+                "## Column Statistics",
+                "",
+                "| Column | Type | Nulls | Unique | Min | Max |",
+                "|--------|------|-------|--------|-----|-----|",
+            ]
+            for name, dtype in schema.items():
+                col = t[name]
+                dtype_str = str(dtype)
+                try:
+                    null_count = t.filter(col.isnull()).count().execute()
+                    unique_count = col.nunique().execute()
+                    min_val = ""
+                    max_val = ""
+                    if dtype.is_numeric() or dtype.is_temporal():
+                        try:
+                            min_val = str(col.min().execute())
+                            max_val = str(col.max().execute())
+                            if len(min_val) > 20:
+                                min_val = min_val[:17] + "..."
+                            if len(max_val) > 20:
+                                max_val = max_val[:17] + "..."
+                        except Exception:
+                            pass
+                    lines.append(
+                        f"| `{name}` | `{dtype_str}` | {null_count:,} | {unique_count:,} | {min_val} | {max_val} |"
+                    )
+                except Exception as col_error:
+                    lines.append(f"| `{name}` | `{dtype_str}` | Error: {col_error} | | | |")
+            return "\n".join(lines)
+        except Exception as e:
+            return f"# {table} - Profiling\n\nError fetching profiling: {e}"
+# =============================================================================
+# Accessor Registry
+# =============================================================================
+ACCESSOR_REGISTRY: dict[AccessorType, DataAccessor] = {
+    AccessorType.COLUMNS: ColumnsAccessor(),
+    AccessorType.PREVIEW: PreviewAccessor(num_rows=10),
+    AccessorType.DESCRIPTION: DescriptionAccessor(),
+    AccessorType.PROFILING: ProfilingAccessor(),
+}
+def get_accessors(accessor_types: list[AccessorType]) -> list[DataAccessor]:
+    """Get accessor instances for the given types."""
+    return [ACCESSOR_REGISTRY[t] for t in accessor_types if t in ACCESSOR_REGISTRY]
+# =============================================================================
+# Sync Functions
+# =============================================================================
+def sync_bigquery(
+    db_config,
+    base_path: Path,
+    progress: Progress,
+    accessors: list[DataAccessor],
+) -> tuple[int, int]:
     """Sync BigQuery database schema to markdown files.
+    Args:
+        db_config: The database configuration
+        base_path: Base output path
+        progress: Rich progress instance
+        accessors: List of data accessors to run
     Returns:
             Tuple of (datasets_synced, tables_synced)
     """
@@ -50,7 +265,6 @@ def sync_bigquery(db_config, base_path: Path, progress: Progress) -> tuple[int,
     datasets_synced = 0
     tables_synced = 0
-    # Get datasets to sync
     if db_config.dataset_id:
         datasets = [db_config.dataset_id]
     else:
@@ -62,17 +276,24 @@ def sync_bigquery(db_config, base_path: Path, progress: Progress) -> tuple[int,
     )
     for dataset in datasets:
-        dataset_path = db_path / dataset
-        dataset_path.mkdir(parents=True, exist_ok=True)
-        datasets_synced += 1
-        # List tables in this dataset
         try:
-            tables = conn.list_tables(database=dataset)
+            all_tables = conn.list_tables(database=dataset)
         except Exception:
             progress.update(dataset_task, advance=1)
             continue
+        # Filter tables based on include/exclude patterns
+        tables = [t for t in all_tables if db_config.matches_pattern(dataset, t)]
+        # Skip dataset if no tables match
+        if not tables:
+            progress.update(dataset_task, advance=1)
+            continue
+        dataset_path = db_path / dataset
+        dataset_path.mkdir(parents=True, exist_ok=True)
+        datasets_synced += 1
         table_task = progress.add_task(
             f"  [cyan]{dataset}[/cyan]",
             total=len(tables),
@@ -82,11 +303,12 @@ def sync_bigquery(db_config, base_path: Path, progress: Progress) -> tuple[int,
             table_path = dataset_path / table
             table_path.mkdir(parents=True, exist_ok=True)
-            columns_md = get_table_schema_markdown(conn, dataset, table)
-            columns_file = table_path / "columns.md"
-            columns_file.write_text(columns_md)
-            tables_synced += 1
+            for accessor in accessors:
+                content = accessor.generate(conn, dataset, table)
+                output_file = table_path / accessor.filename
+                output_file.write_text(content)
+            tables_synced += 1
             progress.update(table_task, advance=1)
         progress.update(dataset_task, advance=1)
@@ -97,15 +319,17 @@ def sync_bigquery(db_config, base_path: Path, progress: Progress) -> tuple[int,
 def sync(output_dir: str = "databases"):
     """Sync database schemas to local markdown files.
-    Creates a folder structure with table schemas:
+    Creates a folder structure with table metadata:
       databases/bigquery/<connection>/<dataset>/<table>/columns.md
+      databases/bigquery/<connection>/<dataset>/<table>/preview.md
+      databases/bigquery/<connection>/<dataset>/<table>/description.md
+      databases/bigquery/<connection>/<dataset>/<table>/profiling.md
     Args:
-            output_dir: Output directory for the database schemas (default: "databases")
+        output_dir: Output directory for the database schemas (default: "databases")
     """
     console.print("\n[bold cyan]🔄 nao sync[/bold cyan]\n")
-    # Load config
     config = NaoConfig.try_load()
     if not config:
         console.print("[bold red]✗[/bold red] No nao_config.yaml found in current directory")
@@ -133,9 +357,14 @@ def sync(output_dir: str = "databases"):
         transient=False,
     ) as progress:
         for db in config.databases:
+            # Get accessors from database config
+            db_accessors = get_accessors(db.accessors)
+            accessor_names = [a.filename.replace(".md", "") for a in db_accessors]
             try:
                 if db.type == "bigquery":
-                    datasets, tables = sync_bigquery(db, base_path, progress)
+                    console.print(f"[dim]{db.name} accessors:[/dim] {', '.join(accessor_names)}")
+                    datasets, tables = sync_bigquery(db, base_path, progress, db_accessors)
                     total_datasets += datasets
                     total_tables += tables
                 else:

nao_core/config/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .base import NaoConfig
+from .databases import AccessorType, AnyDatabaseConfig, BigQueryConfig, DatabaseType
+from .llm import LLMConfig, LLMProvider
+__all__ = [
+    "NaoConfig",
+    "AccessorType",
+    "AnyDatabaseConfig",
+    "BigQueryConfig",
+    "DatabaseType",
+    "LLMConfig",
+    "LLMProvider",
+]

nao_core/{config.py → config/base.py} RENAMED Viewed

@@ -1,80 +1,18 @@
-from enum import Enum
 from pathlib import Path
-from typing import Literal
-import ibis
 import yaml
 from ibis import BaseBackend
 from pydantic import BaseModel, Field, model_validator
-class LLMProvider(str, Enum):
-    """Supported LLM providers."""
-    OPENAI = "openai"
-class DatabaseType(str, Enum):
-    """Supported database types."""
-    BIGQUERY = "bigquery"
-class BigQueryConfig(BaseModel):
-    """BigQuery-specific configuration."""
-    type: Literal["bigquery"] = "bigquery"
-    name: str = Field(description="A friendly name for this connection")
-    project_id: str = Field(description="GCP project ID")
-    dataset_id: str | None = Field(default=None, description="Default BigQuery dataset")
-    credentials_path: str | None = Field(
-        default=None,
-        description="Path to service account JSON file. If not provided, uses Application Default Credentials (ADC)",
-    )
-    def connect(self) -> BaseBackend:
-        """Create an Ibis BigQuery connection."""
-        kwargs: dict = {"project_id": self.project_id}
-        if self.dataset_id:
-            kwargs["dataset_id"] = self.dataset_id
-        if self.credentials_path:
-            from google.oauth2 import service_account
-            credentials = service_account.Credentials.from_service_account_file(
-                self.credentials_path,
-                scopes=["https://www.googleapis.com/auth/bigquery"],
-            )
-            kwargs["credentials"] = credentials
-        return ibis.bigquery.connect(**kwargs)
-DatabaseConfig = BigQueryConfig
-def parse_database_config(data: dict) -> DatabaseConfig:
-    """Parse a database config dict into the appropriate type."""
-    db_type = data.get("type")
-    if db_type == "bigquery":
-        return BigQueryConfig.model_validate(data)
-    else:
-        raise ValueError(f"Unknown database type: {db_type}")
-class LLMConfig(BaseModel):
-    """LLM configuration."""
-    provider: LLMProvider = Field(description="The LLM provider to use")
-    api_key: str = Field(description="The API key to use")
+from .databases import AnyDatabaseConfig, parse_database_config
+from .llm import LLMConfig
 class NaoConfig(BaseModel):
     """nao project configuration."""
     project_name: str = Field(description="The name of the nao project")
-    databases: list[BigQueryConfig] = Field(description="The databases to use")
+    databases: list[AnyDatabaseConfig] = Field(default_factory=list, description="The databases to use")
     llm: LLMConfig | None = Field(default=None, description="The LLM configuration")
     @model_validator(mode="before")
@@ -121,7 +59,7 @@ class NaoConfig(BaseModel):
         """Try to load config from path, returns None if not found or invalid.
         Args:
-                path: Directory containing nao_config.yaml. Defaults to current directory.
+            path: Directory containing nao_config.yaml. Defaults to current directory.
         """
         if path is None:
             path = Path.cwd()

nao_core/config/databases/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+from .base import AccessorType, DatabaseConfig, DatabaseType
+from .bigquery import BigQueryConfig
+# =============================================================================
+# Database Config Registry
+# =============================================================================
+# When adding more backends, convert this to a discriminated union:
+# AnyDatabaseConfig = Annotated[
+#     Union[
+#         Annotated[BigQueryConfig, Tag("bigquery")],
+#         Annotated[PostgresConfig, Tag("postgres")],
+#     ],
+#     Discriminator(lambda x: x.get("type", "bigquery")),
+# ]
+AnyDatabaseConfig = BigQueryConfig
+def parse_database_config(data: dict) -> DatabaseConfig:
+    """Parse a database config dict into the appropriate type."""
+    db_type = data.get("type")
+    if db_type == "bigquery":
+        return BigQueryConfig.model_validate(data)
+    else:
+        raise ValueError(f"Unknown database type: {db_type}")
+__all__ = ["AccessorType", "DatabaseConfig", "DatabaseType", "BigQueryConfig", "AnyDatabaseConfig"]

nao_core/config/databases/base.py ADDED Viewed

@@ -0,0 +1,72 @@
+import fnmatch
+from abc import ABC, abstractmethod
+from enum import Enum
+from ibis import BaseBackend
+from pydantic import BaseModel, Field
+class DatabaseType(str, Enum):
+    """Supported database types."""
+    BIGQUERY = "bigquery"
+class AccessorType(str, Enum):
+    """Available data accessors for sync."""
+    COLUMNS = "columns"
+    PREVIEW = "preview"
+    DESCRIPTION = "description"
+    PROFILING = "profiling"
+class DatabaseConfig(BaseModel, ABC):
+    """Base configuration for all database backends."""
+    name: str = Field(description="A friendly name for this connection")
+    # Sync settings
+    accessors: list[AccessorType] = Field(
+        default=[AccessorType.COLUMNS, AccessorType.PREVIEW, AccessorType.DESCRIPTION],
+        description="List of accessors to run during sync (columns, preview, description, profiling)",
+    )
+    include: list[str] = Field(
+        default_factory=list,
+        description="Glob patterns for schemas/tables to include (e.g., 'prod_*.*', 'analytics.dim_*'). Empty means include all.",
+    )
+    exclude: list[str] = Field(
+        default_factory=list,
+        description="Glob patterns for schemas/tables to exclude (e.g., 'temp_*.*', '*.backup_*')",
+    )
+    @abstractmethod
+    def connect(self) -> BaseBackend:
+        """Create an Ibis connection for this database."""
+        ...
+    def matches_pattern(self, schema: str, table: str) -> bool:
+        """Check if a schema.table matches the include/exclude patterns.
+        Args:
+            schema: The schema/dataset name
+            table: The table name
+        Returns:
+            True if the table should be included, False if excluded
+        """
+        full_name = f"{schema}.{table}"
+        # If include patterns exist, table must match at least one
+        if self.include:
+            included = any(fnmatch.fnmatch(full_name, pattern) for pattern in self.include)
+            if not included:
+                return False
+        # If exclude patterns exist, table must not match any
+        if self.exclude:
+            excluded = any(fnmatch.fnmatch(full_name, pattern) for pattern in self.exclude)
+            if excluded:
+                return False
+        return True

nao_core/config/databases/bigquery.py ADDED Viewed

@@ -0,0 +1,42 @@
+from typing import Literal
+import ibis
+from ibis import BaseBackend
+from pydantic import Field
+from .base import DatabaseConfig
+class BigQueryConfig(DatabaseConfig):
+    """BigQuery-specific configuration."""
+    type: Literal["bigquery"] = "bigquery"
+    project_id: str = Field(description="GCP project ID")
+    dataset_id: str | None = Field(default=None, description="Default BigQuery dataset")
+    credentials_path: str | None = Field(
+        default=None,
+        description="Path to service account JSON file. If not provided, uses Application Default Credentials (ADC)",
+    )
+    sso: bool = Field(default=False, description="Use Single Sign-On (SSO) for authentication")
+    location: str | None = Field(default=None, description="BigQuery location")
+    def connect(self) -> BaseBackend:
+        """Create an Ibis BigQuery connection."""
+        kwargs: dict = {"project_id": self.project_id}
+        if self.dataset_id:
+            kwargs["dataset_id"] = self.dataset_id
+        if self.sso:
+            kwargs["auth_local_webserver"] = True
+        if self.credentials_path:
+            from google.oauth2 import service_account
+            credentials = service_account.Credentials.from_service_account_file(
+                self.credentials_path,
+                scopes=["https://www.googleapis.com/auth/bigquery"],
+            )
+            kwargs["credentials"] = credentials
+        return ibis.bigquery.connect(**kwargs)

nao_core/config/llm/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+from enum import Enum
+from pydantic import BaseModel, Field
+class LLMProvider(str, Enum):
+    """Supported LLM providers."""
+    OPENAI = "openai"
+class LLMConfig(BaseModel):
+    """LLM configuration."""
+    provider: LLMProvider = Field(description="The LLM provider to use")
+    api_key: str = Field(description="The API key to use")

{nao_core-0.0.12.dist-info → nao_core-0.0.15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nao-core
-Version: 0.0.12
+Version: 0.0.15
 Summary: nao Core is your analytics context builder with the best chat interface.
 Project-URL: Homepage, https://getnao.io
 Project-URL: Repository, https://github.com/naolabs/chat
@@ -20,11 +20,13 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Requires-Python: >=3.10
 Requires-Dist: cyclopts>=4.4.4
+Requires-Dist: fastapi>=0.128.0
 Requires-Dist: ibis-framework[bigquery]>=9.0.0
 Requires-Dist: openai>=1.0.0
 Requires-Dist: pydantic>=2.10.0
 Requires-Dist: pyyaml>=6.0.0
 Requires-Dist: rich>=14.0.0
+Requires-Dist: uvicorn>=0.40.0
 Description-Content-Type: text/markdown
 # nao CLI

nao-core 0.0.12__py3-none-any.whl → 0.0.15__py3-none-any.whl

nao-core 0.0.12py3-none-any.whl → 0.0.15py3-none-any.whl