PyPI - signalpilot-ai-internal - Versions diffs - 0.10.22__py3-none-any.whl → 0.11.24__py3-none-any.whl - Mend

signalpilot-ai-internal 0.10.22py3-none-any.whl → 0.11.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

signalpilot_ai_internal/database_config_service.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""
+Database Configuration Service
+Manages database configurations stored in db.toml in the connect/ cache directory
+"""
+import logging
+from typing import Any, Dict, List, Optional
+from .signalpilot_home import get_signalpilot_home
+logger = logging.getLogger(__name__)
+class DatabaseConfigService:
+    """
+    Service for managing database configurations in TOML format.
+    Configurations stored at <cache_dir>/connect/db.toml
+    (e.g., ~/Library/Caches/SignalPilotAI/connect/db.toml on macOS)
+    """
+    _instance = None
+    # Supported database types
+    SUPPORTED_TYPES = ["snowflake", "postgres", "mysql", "databricks"]
+    def __init__(self):
+        self._home_manager = get_signalpilot_home()
+    @classmethod
+    def get_instance(cls) -> 'DatabaseConfigService':
+        """Get singleton instance."""
+        if cls._instance is None:
+            cls._instance = DatabaseConfigService()
+        return cls._instance
+    def get_all_configs(self) -> List[Dict[str, Any]]:
+        """Get all database configurations."""
+        return self._home_manager.get_database_configs()
+    def get_config(self, db_type: str, name: str) -> Optional[Dict[str, Any]]:
+        """Get a specific database configuration."""
+        return self._home_manager.get_database_config(db_type, name)
+    def get_configs_by_type(self, db_type: str) -> List[Dict[str, Any]]:
+        """Get all configurations for a specific database type."""
+        configs = self.get_all_configs()
+        return [c for c in configs if c.get("type") == db_type]
+    def add_config(self, db_type: str, config: Dict[str, Any]) -> bool:
+        """Add a new database configuration."""
+        if db_type not in self.SUPPORTED_TYPES:
+            logger.error(f"Unsupported database type: {db_type}")
+            return False
+        if "name" not in config:
+            logger.error("Database config must have a 'name' field")
+            return False
+        return self._home_manager.add_database_config(db_type, config)
+    def update_config(self, db_type: str, name: str,
+                      updates: Dict[str, Any]) -> bool:
+        """Update an existing database configuration."""
+        return self._home_manager.update_database_config(db_type, name, updates)
+    def remove_config(self, db_type: str, name: str) -> bool:
+        """Remove a database configuration."""
+        return self._home_manager.remove_database_config(db_type, name)
+    def set_defaults(self, defaults: Dict[str, Any]) -> bool:
+        """Set global defaults for database configurations."""
+        return self._home_manager.set_database_defaults(defaults)
+    def get_defaults(self) -> Dict[str, Any]:
+        """Get global defaults."""
+        return self._home_manager.get_database_defaults()
+    # ==================== Type-specific helpers ====================
+    def add_snowflake_config(self, name: str, account: str,
+                             database: str = None,
+                             warehouse: str = None,
+                             role: str = None,
+                             username: str = None,
+                             password: str = None,
+                             **extra) -> bool:
+        """Add a Snowflake database configuration."""
+        config = {
+            "name": name,
+            "account": account,
+        }
+        if database:
+            config["database"] = database
+        if warehouse:
+            config["warehouse"] = warehouse
+        if role:
+            config["role"] = role
+        if username:
+            config["username"] = username
+        if password:
+            config["password"] = password
+        config.update(extra)
+        return self.add_config("snowflake", config)
+    def add_postgres_config(self, name: str, host: str, port: int,
+                            database: str, username: str, password: str,
+                            **extra) -> bool:
+        """Add a PostgreSQL database configuration."""
+        config = {
+            "name": name,
+            "host": host,
+            "port": port,
+            "database": database,
+            "username": username,
+            "password": password,
+        }
+        config.update(extra)
+        return self.add_config("postgres", config)
+    def add_mysql_config(self, name: str, host: str, port: int,
+                         database: str, username: str, password: str,
+                         **extra) -> bool:
+        """Add a MySQL database configuration."""
+        config = {
+            "name": name,
+            "host": host,
+            "port": port,
+            "database": database,
+            "username": username,
+            "password": password,
+        }
+        config.update(extra)
+        return self.add_config("mysql", config)
+    def add_databricks_config(self, name: str, host: str,
+                              http_path: str, catalog: str,
+                              auth_type: str = "pat",
+                              access_token: str = None,
+                              client_id: str = None,
+                              client_secret: str = None,
+                              **extra) -> bool:
+        """Add a Databricks database configuration."""
+        config = {
+            "name": name,
+            "host": host,
+            "http_path": http_path,
+            "catalog": catalog,
+            "auth_type": auth_type,
+        }
+        if access_token:
+            config["access_token"] = access_token
+        if client_id:
+            config["client_id"] = client_id
+        if client_secret:
+            config["client_secret"] = client_secret
+        config.update(extra)
+        return self.add_config("databricks", config)
+def get_database_config_service() -> DatabaseConfigService:
+    """Get the singleton instance."""
+    return DatabaseConfigService.get_instance()

signalpilot_ai_internal/databricks_schema_service.py CHANGED Viewed

@@ -22,6 +22,7 @@ import threading
 from jupyter_server.base.handlers import APIHandler
 import tornado
+from .log_utils import print
 # In-memory token cache for Service Principal OAuth tokens
 # Key: connection_id or hash of client credentials
@@ -193,17 +194,18 @@ class DatabricksSchemaHandler(APIHandler):
         """Build Databricks connection parameters from configuration"""
         import re
-        # Extract host from connectionUrl
-        connection_url = config.get('connectionUrl', '')
+        # Extract host - check 'host' first, then fall back to 'connectionUrl' for backwards compatibility
+        connection_url = config.get('host') or config.get('connectionUrl', '')
         if not connection_url:
-            raise ValueError("connectionUrl (workspace URL) is required for Databricks")
+            raise ValueError("host (workspace URL) is required for Databricks")
-        # Extract host from URL
+        # Extract host from URL - support both with and without protocol prefix
         url_match = re.match(r'https?://([^/]+)', connection_url)
-        if not url_match:
-            raise ValueError(f"Invalid Databricks connectionUrl format: {connection_url}")
-        server_hostname = url_match.group(1)
+        if url_match:
+            server_hostname = url_match.group(1)
+        else:
+            # Assume it's just the hostname without protocol
+            server_hostname = connection_url.split('/')[0].strip()
         # Get HTTP path for SQL warehouse
         http_path = config.get('warehouseHttpPath') or config.get('httpPath')
@@ -666,15 +668,18 @@ class DatabricksQueryHandler(APIHandler):
         """Build Databricks connection parameters from configuration"""
         import re
-        connection_url = config.get('connectionUrl', '')
+        # Extract host - check 'host' first, then fall back to 'connectionUrl' for backwards compatibility
+        connection_url = config.get('host') or config.get('connectionUrl', '')
         if not connection_url:
-            raise ValueError("connectionUrl (workspace URL) is required for Databricks")
+            raise ValueError("host (workspace URL) is required for Databricks")
+        # Extract host from URL - support both with and without protocol prefix
         url_match = re.match(r'https?://([^/]+)', connection_url)
-        if not url_match:
-            raise ValueError(f"Invalid Databricks connectionUrl format: {connection_url}")
-        server_hostname = url_match.group(1)
+        if url_match:
+            server_hostname = url_match.group(1)
+        else:
+            # Assume it's just the hostname without protocol
+            server_hostname = connection_url.split('/')[0].strip()
         http_path = config.get('warehouseHttpPath') or config.get('httpPath')
         if not http_path:

signalpilot_ai_internal/file_scanner_service.py CHANGED Viewed

@@ -19,6 +19,7 @@ import pyarrow.dataset as ds
 from openpyxl import load_workbook
 from .cache_service import get_cache_service, get_file_scan_cache_manager
+from .log_utils import print
 class FileScannerService:
@@ -30,7 +31,7 @@ class FileScannerService:
         self._lock = threading.RLock()
         # Data file extensions
-        self.DATA_EXTENSIONS = {'.csv', '.json', '.xlsx', '.xls', '.parquet', '.pkl', '.pickle',
+        self.DATA_EXTENSIONS = {'.csv', '.json', '.xlsx', '.xls', '.parquet',
                                '.feather', '.hdf5', '.h5', '.sql', '.db', '.sqlite', '.tsv', '.txt', '.ipynb'}
         # Directories to exclude from search
@@ -76,100 +77,6 @@ class FileScannerService:
                 return (printable / len(chunk)) < 0.7
         except (IOError, OSError):
             return True
-    def _generate_pickle_data_preview(self, data: Any, max_items: int = 3, max_chars: int = 1000) -> Tuple[str, bool]:
-        """
-        Generate a content preview for non-DataFrame pickle data.
-        Returns (preview_content, is_truncated)
-        """
-        try:
-            data_type = type(data).__name__
-            if isinstance(data, (list, tuple)):
-                if len(data) == 0:
-                    return f"Empty {data_type}", False
-                preview_items = []
-                for i, item in enumerate(data[:max_items]):
-                    item_str = str(item)
-                    if len(item_str) > 200:
-                        item_str = item_str[:200] + "..."
-                    preview_items.append(f"[{i}]: {item_str}")
-                preview = f"{data_type} with {len(data)} items:\n" + "\n".join(preview_items)
-                is_truncated = len(data) > max_items
-                if len(preview) > max_chars:
-                    preview = preview[:max_chars] + "..."
-                    is_truncated = True
-                return preview, is_truncated
-            elif isinstance(data, dict):
-                if len(data) == 0:
-                    return f"Empty {data_type}", False
-                preview_items = []
-                for i, (key, value) in enumerate(list(data.items())[:max_items]):
-                    key_str = str(key)
-                    value_str = str(value)
-                    if len(value_str) > 150:
-                        value_str = value_str[:150] + "..."
-                    preview_items.append(f"'{key_str}': {value_str}")
-                preview = f"{data_type} with {len(data)} keys:\n" + "\n".join(preview_items)
-                is_truncated = len(data) > max_items
-                if len(preview) > max_chars:
-                    preview = preview[:max_chars] + "..."
-                    is_truncated = True
-                return preview, is_truncated
-            elif isinstance(data, np.ndarray):
-                shape_str = str(data.shape)
-                dtype_str = str(data.dtype)
-                if data.size == 0:
-                    return f"Empty numpy array: shape={shape_str}, dtype={dtype_str}", False
-                # Show first few elements
-                flat_data = data.flatten()[:max_items]
-                elements_str = ", ".join([str(x) for x in flat_data])
-                preview = f"numpy.ndarray: shape={shape_str}, dtype={dtype_str}\nFirst elements: [{elements_str}]"
-                is_truncated = data.size > max_items
-                if len(preview) > max_chars:
-                    preview = preview[:max_chars] + "..."
-                    is_truncated = True
-                return preview, is_truncated
-            elif isinstance(data, str):
-                if len(data) == 0:
-                    return "Empty string", False
-                preview = f"String ({len(data)} chars): {data[:max_chars]}"
-                is_truncated = len(data) > max_chars
-                return preview, is_truncated
-            elif isinstance(data, (int, float, bool)):
-                return f"{data_type}: {data}", False
-            else:
-                # For other types, try to convert to string
-                data_str = str(data)
-                if len(data_str) > max_chars:
-                    data_str = data_str[:max_chars] + "..."
-                    is_truncated = True
-                else:
-                    is_truncated = False
-                return f"{data_type}: {data_str}", is_truncated
-        except Exception as e:
-            return f"Error generating preview for {type(data).__name__}: {str(e)}", False
     def _parse_json_array_simple(self, filepath: str, max_items: int = 5) -> Tuple[List[Any], bool]:
         """
@@ -613,12 +520,11 @@ class FileScannerService:
             'is_tsv': extension == '.tsv',
             'is_json': extension == '.json',
             'is_parquet': extension == '.parquet',
-            'is_pkl': extension in ['.pkl', '.pickle'],
             'is_xlsx': extension == '.xlsx',
             'is_ipynb': extension == '.ipynb',
             'is_text': extension in ['.txt', '.md', '.py', '.js', '.ts', '.html', '.xml', '.ipynb'],
-            'is_data': extension in ['.csv', '.tsv', '.json', '.jsonl', '.parquet', '.pkl', '.pickle', '.xlsx'],
-            'is_binary': extension in ['.parquet', '.pkl', '.pickle', '.xlsx']  # Will be set later based on actual binary detection
+            'is_data': extension in ['.csv', '.tsv', '.json', '.jsonl', '.parquet', '.xlsx'],
+            'is_binary': extension in ['.parquet', '.xlsx']  # Will be set later based on actual binary detection
         }
         try:
@@ -1121,8 +1027,6 @@ class FileScannerService:
                 file_type = 'tsv'
             elif extension == '.parquet':
                 file_type = 'parquet'
-            elif extension in ['.pkl', '.pickle']:
-                file_type = 'pkl'
             elif extension == '.xlsx':
                 file_type = 'xlsx'
             elif extension == '.json' or extension == '.jsonl':
@@ -1174,51 +1078,6 @@ class FileScannerService:
                     except Exception:
                         sheet_names = ['Sheet1']  # Default sheet name
                         total_sheets = 1  # Default to 1 if we can't determine
-                elif file_type == 'pkl':
-                    print(f"Reading pickle file: {abs_path}")
-                    data = pd.read_pickle(abs_path)
-                    print(f"Data: {data}")
-                    if isinstance(data, pd.DataFrame):
-                        print(f"Data is a DataFrame: {data.head(5)}")
-                        df = data.head(5)  # Limit to first 5 rows
-                    else:
-                        # Handle non-DataFrame pickle data
-                        print(f"Data is not a DataFrame: {type(data).__name__}")
-                        # Get file info
-                        file_info = self._get_file_type_info(str(item), extension)
-                        entry['file_info'] = file_info
-                        # Check if file is binary (pickle files are always binary)
-                        is_binary = True
-                        file_info['is_binary'] = True
-                        # Generate content preview for the pickle data
-                        content_preview, is_truncated = self._generate_pickle_data_preview(data)
-                        entry['content_preview'] = content_preview
-                        entry['is_truncated'] = is_truncated
-                        # Create schema for non-DataFrame pickle data
-                        schema = {
-                            'success': True,
-                            'fileId': abs_path,
-                            'fileName': item.name,
-                            'filePath': abs_path,
-                            'fileType': file_type,
-                            'extractedAt': datetime.now().isoformat(),
-                            'summary': f'Pickle file containing {type(data).__name__}',
-                            'columns': [],
-                            'totalRows': 1 if not hasattr(data, '__len__') else len(data) if hasattr(data, '__len__') else 1,
-                            'totalColumns': 0,
-                            'fileMtime': current_mtime
-                        }
-                        # Cache the entry
-                        if entry:
-                            entry['schema'] = schema
-                            self.file_scan_cache.set_file_entry(abs_path, entry)
-                        return schema
                 elif file_type == 'json':
                     # Read and analyze JSON file
                     json_data, file_format, is_truncated = self._read_json_file(abs_path)
@@ -1273,7 +1132,7 @@ class FileScannerService:
                     return schema
-                # Get file info for DataFrame pickle files and other file types
+                # Get file info for other file types
                 file_info = self._get_file_type_info(str(item), extension)
                 entry['file_info'] = file_info

signalpilot-ai-internal 0.10.22__py3-none-any.whl → 0.11.24__py3-none-any.whl

signalpilot-ai-internal 0.10.22py3-none-any.whl → 0.11.24py3-none-any.whl