PyPI - sibi-dst - Versions diffs - 0.3.56__py3-none-any.whl → 0.3.57__py3-none-any.whl - Mend

sibi-dst 0.3.56py3-none-any.whl → 0.3.57py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +165 -166
sibi_dst/df_helper/_df_helper.py +55 -23
sibi_dst/df_helper/_parquet_artifact.py +29 -11
sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +182 -89
sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +6 -2
sibi_dst/utils/__init__.py +2 -0
sibi_dst/utils/data_wrapper.py +34 -93
sibi_dst/utils/parquet_saver.py +15 -12
sibi_dst/utils/update_planner.py +237 -0
{sibi_dst-0.3.56.dist-info → sibi_dst-0.3.57.dist-info}/METADATA +1 -1
{sibi_dst-0.3.56.dist-info → sibi_dst-0.3.57.dist-info}/RECORD +12 -11
{sibi_dst-0.3.56.dist-info → sibi_dst-0.3.57.dist-info}/WHEEL +0 -0

sibi_dst/df_helper/_parquet_artifact.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import datetime
 import logging
+import threading
 from typing import Optional, Any, Dict
 import dask.dataframe as dd
@@ -78,6 +79,7 @@ class ParquetArtifact(DfHelper):
                             `parquet_filename`, `parquet_start_date`,
                             or `parquet_end_date`) are missing or not set properly.
         """
+        self._lock = threading.Lock()
         self.config = {
             **self.DEFAULT_CONFIG,
             **kwargs,
@@ -119,21 +121,36 @@ class ParquetArtifact(DfHelper):
         super().__init__(**self.config)
     def load(self, **kwargs):
-        self.df = super().load(**kwargs)
+        with self._lock:
+            self.df = super().load(**kwargs)
         return self.df
     def generate_parquet(self, **kwargs) -> None:
         """
         Generate a Parquet file using the configured DataWrapper class.
         """
-        params = self._prepare_params(kwargs)
-        dw = DataWrapper(self.data_wrapper_class, **params)
-        dw.process()
+        with self._lock:
+            params = self._prepare_params(kwargs)
+            dw = DataWrapper(self.data_wrapper_class, **params)
+            dw.process()
+    def __enter__(self):
+        if getattr(self, "_entered", False):
+            return self
+        self._entered = True
+        self.ensure_directory_exists(self.parquet_storage_path)
+        return self
     def __exit__(self, exc_type, exc_value, traceback):
-        # Ensure resources are cleaned up
-        if self.fs:
-            self.fs.close()
+        try:
+            if getattr(self, "_entered", False) and self.fs:
+                self.fs.close()
+        except Exception as e:
+            self.logger.warning(f"Error closing filesystem: {e}")
+        finally:
+            self._entered = False
+        # return False so exceptions aren’t suppressed
+        return False
     def update_parquet(self, period: str = 'today', **kwargs) -> None:
         """Update the Parquet file with data from a specific period."""
@@ -223,7 +240,8 @@ class ParquetArtifact(DfHelper):
     def ensure_directory_exists(self, path: str) -> None:
         """Ensure the directory exists in the specified filesystem."""
-        try:
-            self.fs.makedirs(path, exist_ok=True)
-        except Exception as e:
-            raise ValueError(f"Error creating directory {path} in filesystem {self.filesystem_type}: {e}")
+        with self._lock:
+            try:
+                self.fs.makedirs(path, exist_ok=True)
+            except Exception as e:
+                raise ValueError(f"Error creating directory {path} in filesystem {self.filesystem_type}: {e}")

sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py CHANGED Viewed

@@ -1,109 +1,202 @@
-from typing import Any, Optional, ClassVar
+from __future__ import annotations
+from typing import Any, Optional, ClassVar, Generator, Type
 import threading
-from pydantic import BaseModel, model_validator
-from sqlalchemy import create_engine
+from contextlib import contextmanager
+from pydantic import BaseModel, field_validator, ValidationError, model_validator
+from sqlalchemy import create_engine, event, text
+from sqlalchemy.engine import url as sqlalchemy_url
+from sqlalchemy.engine import Engine
 from sqlalchemy.exc import OperationalError
-from sqlalchemy.sql import text
+from sqlalchemy.pool import QueuePool, NullPool, StaticPool
+from sqlalchemy.orm import sessionmaker, Session
 from sibi_dst.utils import Logger
 from ._sql_model_builder import SqlAlchemyModelBuilder
 class SqlAlchemyConnectionConfig(BaseModel):
     """
-    Configuration class for managing an SQLAlchemy database connection.
-    This class provides configurations to establish a connection to a database,
-    validate the connection, and dynamically build a SQLAlchemy model for a specific
-    table if required. It initializes the database engine using the provided connection URL
-    and ensures that the connection and table information are properly validated.
-    :ivar connection_url: The URL used to connect to the database.
-    :type connection_url: str
-    :ivar table: The name of the database table for which a model will be constructed.
-    :type table: Optional[str]
-    :ivar model: The dynamically built SQLAlchemy model for the specified table.
-    :type model: Any
-    :ivar engine: The SQLAlchemy engine instance reused for database connections.
-    :type engine: Optional[Any]
+    Thread-safe, registry-backed SQLAlchemy connection manager with:
+      - Shared engine reuse
+      - Active connection tracking
+      - Idle-pool and database-level cleanup
+      - Dynamic ORM model building via SqlAlchemyModelBuilder
+      - Optional session factory
     """
     connection_url: str
     table: Optional[str] = None
-    model: Any = None
-    engine: Optional[Any] = None
-    logger: Optional[Any] = None
-    pool_size: int = 10
-    max_overflow: int = 5
+    model: Optional[Any] = None
+    engine: Optional[Engine] = None
+    logger: Logger = None
+    debug: bool = False
+    pool_size: int = 5
+    max_overflow: int = 10
     pool_timeout: int = 30
-    pool_recycle:int = 300
+    pool_recycle: int = 300
+    pool_pre_ping: bool = True
+    poolclass: Type = QueuePool
-    # Class-level registry and lock for thread-safe engine reuse
-    _engine_registry: ClassVar[dict] = {}
+    session_factory: Optional[sessionmaker] = None
+    _owns_engine: bool = False
+    _engine_registry: ClassVar[dict[tuple, Engine]] = {}
     _registry_lock: ClassVar[threading.Lock] = threading.Lock()
+    _active_connections: ClassVar[int] = 0
-    @model_validator(mode="after")
-    def validate_and_initialize(self):
-        """
-        Validate connection parameters, initialize the engine, and build the dynamic model if necessary.
-        """
-        if not self.logger:
-            self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
-        if not self.connection_url:
-            raise ValueError("`connection_url` must be provided.")
-        # Validate `connection_url`
-        if self.engine is not None:
-            engine_url = str(self.engine.url)
-            if engine_url != self.connection_url:
-                raise ValueError(f"Engine URL '{engine_url}' does not match the provided connection URL '{self.connection_url}'.")
-        else:
-            # Generate a unique key for the engine registry based on the connection URL
-            engine_key = (
-                self.connection_url,
-                self.pool_size,
-                self.max_overflow,
-                self.pool_timeout,
-                self.pool_recycle
-            )
-            with self.__class__._registry_lock:
-                if engine_key in self.__class__._engine_registry:
-                    # Reuse the existing engine
-                    self.logger.info(f"Reusing existing engine for connection URL: {self.connection_url}")
-                    self.engine = self.__class__._engine_registry[engine_key]
-                else:
-                    # Initialize the engine
-                    self.logger.info(f"Creating new engine for connection URL: {self.connection_url}")
-                    self.engine = create_engine(self.connection_url,
-                                    pool_size=self.pool_size,
-                                    max_overflow=self.max_overflow,
-                                    pool_timeout=self.pool_timeout,
-                                    pool_recycle=self.pool_recycle)
-                    self.__class__._engine_registry[engine_key] = self.engine
-        # Validate the connection
-        self.validate_connection()
-        if not self.table:
-            raise ValueError("`table_name` must be provided to build the model.")
-        try:
-            self.model = SqlAlchemyModelBuilder(self.engine, self.table).build_model()
-        except Exception as e:
-            raise ValueError(f"Failed to build model for table '{self.table}': {e}")
+    class Config:
+        arbitrary_types_allowed = True
+        underscore_attrs_are_private = True
+    @field_validator("pool_size", "max_overflow", "pool_timeout", "pool_recycle")
+    @classmethod
+    def _validate_pool_params(cls, v: int) -> int:
+        if v < 0:
+            raise ValueError("Pool parameters must be non-negative")
+        return v
+    @model_validator(mode="after")
+    def _init_all(self) -> SqlAlchemyConnectionConfig:
+        self._init_logger()
+        self._init_engine()
+        self._validate_conn()
+        self._build_model()
+        self.session_factory = sessionmaker(bind=self.engine, expire_on_commit=False)
         return self
-    def validate_connection(self):
-        """
-        Test the database connection by executing a simple query.
-        """
+    def _init_logger(self) -> None:
+        self.logger = self.logger or Logger.default_logger(logger_name=self.__class__.__name__)
+        self.logger.set_level(Logger.DEBUG if self.debug else Logger.INFO)
+    def _engine_key(self) -> tuple:
+        parsed = sqlalchemy_url.make_url(self.connection_url)
+        query = {k: v for k, v in parsed.query.items() if not k.startswith("pool_")}
+        normalized = parsed.set(query=query)
+        key = [str(normalized)]
+        if self.poolclass not in (NullPool, StaticPool):
+            key += [self.pool_size, self.max_overflow, self.pool_timeout, self.pool_recycle, self.pool_pre_ping, self.table]
+        return tuple(key)
+    def _init_engine(self) -> None:
+        key = self._engine_key()
+        with self._registry_lock:
+            existing = self._engine_registry.get(key)
+            if existing:
+                self.engine = existing
+                self._owns_engine = False
+                self.logger.debug(f"Reusing engine {key}")
+            else:
+                self.logger.debug(f"Creating engine {key}")
+                self.engine = create_engine(
+                    self.connection_url,
+                    pool_size=self.pool_size,
+                    max_overflow=self.max_overflow,
+                    pool_timeout=self.pool_timeout,
+                    pool_recycle=self.pool_recycle,
+                    pool_pre_ping=self.pool_pre_ping,
+                    poolclass=self.poolclass,
+                )
+                self._attach_events()
+                self._engine_registry[key] = self.engine
+                self._owns_engine = True
+    def _attach_events(self) -> None:
+        event.listen(self.engine, "checkout", self._on_checkout)
+        event.listen(self.engine, "checkin", self._on_checkin)
+    def _on_checkout(self, *args) -> None:
+        with self._registry_lock:
+            type(self)._active_connections += 1
+        self.logger.debug(f"Checked out, active: {self.active_connections}")
+    def _on_checkin(self, *args) -> None:
+        with self._registry_lock:
+            type(self)._active_connections = max(type(self)._active_connections - 1, 0)
+        self.logger.debug(f"Checked in, active: {self.active_connections}")
+    @property
+    def active_connections(self) -> int:
+        return type(self)._active_connections
+    def _validate_conn(self) -> None:
         try:
-            with self.engine.connect() as connection:
-                connection.execute(text("SELECT 1"))
+            with self.managed_connection() as conn:
+                conn.execute(text("SELECT 1"))
+            self.logger.debug("Connection OK")
         except OperationalError as e:
-            raise ValueError(f"Failed to connect to the database: {e}")
+            self.logger.error(f"Connection failed: {e}")
+            raise ValidationError(f"DB connection failed: {e}")
-    @classmethod
-    def clear_engine_registry(cls):
-        """Clear the global engine registry (useful for testing)."""
-        with cls._registry_lock:
-            cls._engine_registry.clear()
+    @contextmanager
+    def managed_connection(self) -> Generator[Any, None, Any]:
+        conn = self.engine.connect()
+        try:
+            yield conn
+        finally:
+            conn.close()
+    def get_session(self) -> Session:
+        if not self.session_factory:
+            raise RuntimeError("Session factory not initialized")
+        return self.session_factory()
+    def _build_model(self) -> None:
+        """Dynamically build and assign the ORM model if table is set"""
+        if not self.table or not self.engine:
+            return
+        try:
+            builder = SqlAlchemyModelBuilder(self.engine, self.table)
+            self.model = builder.build_model()
+            self.logger.debug(f"Model built for table: {self.table}")
+        except Exception as e:
+            self.logger.error(f"Model build failed: {e}")
+            raise ValidationError(f"Model construction error: {e}") from e
+    def dispose_idle_connections(self) -> int:
+        key = self._engine_key()
+        with self._registry_lock:
+            if self._engine_registry.get(key) is not self.engine:
+                self.logger.warning("Engine changed")
+                return 0
+            pool = self.engine.pool
+            if isinstance(pool, QueuePool):
+                count = pool.checkedin()
+                pool.dispose()
+                self.logger.debug(f"Disposed {count}")
+                return count
+            self.logger.warning(f"No idle dispose for {type(pool).__name__}")
+            return 0
+    def terminate_idle_connections(self, idle_seconds: int = 300) -> int:
+        terminated = 0
+        dialect = self.engine.dialect.name
+        with self.managed_connection() as conn:
+            if dialect == 'postgresql':
+                res = conn.execute(text(
+                    f"SELECT pg_terminate_backend(pid) FROM pg_stat_activity "
+                    f"WHERE state='idle' AND (now() - query_start) > interval '{idle_seconds} seconds' "
+                    f"AND pid<>pg_backend_pid()"
+                ))
+                terminated = res.rowcount
+            elif dialect == 'mysql':
+                for row in conn.execute(text("SHOW PROCESSLIST")):
+                    if row.Command == 'Sleep' and row.Time > idle_seconds:
+                        conn.execute(text(f"KILL {row.Id}"))
+                        terminated += 1
+            else:
+                self.logger.warning(f"Idle termination not supported: {dialect}")
+        self.logger.debug(f"Terminated {terminated}")
+        return terminated
+    def close(self) -> None:
+        with self._registry_lock:
+            key = self._engine_key()
+            if not self._owns_engine:
+                self.logger.warning("Not owner, skipping close")
+                return
+            if self._engine_registry.get(key) != self.engine:
+                self.logger.warning("Engine not in registry")
+                return
+            self.engine.dispose()
+            del self._engine_registry[key]
+            type(self)._active_connections = 0
+            self.logger.debug(f"Engine closed {key}")

sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py CHANGED Viewed

@@ -133,9 +133,13 @@ class SqlAlchemyLoadFromDb:
                 dask_df = dd.from_pandas(pd.DataFrame(), npartitions=1)
                 return dask_df
             return self.df
+        except RuntimeError as e:
+            self.logger.info(f"Runtime Error {e}:Failed to load data into Dask DataFrame.")
+            dask_df = dd.from_pandas(pd.DataFrame(), npartitions=1)
+            return dask_df
         except Exception as e:
-            self.logger.debug(f"Failed to load data into Dask DataFrame.{e}")
+            self.logger.info(f"Exception {e}:Failed to load data into Dask DataFrame.")
             dask_df = dd.from_pandas(pd.DataFrame(), npartitions=1)
             return dask_df

sibi_dst/utils/__init__.py CHANGED Viewed

@@ -12,11 +12,13 @@ from .parquet_saver import ParquetSaver
 from .clickhouse_writer import ClickHouseWriter
 from .airflow_manager import AirflowDAGManager
 from .credentials import *
+from .update_planner import UpdatePlanner
 from .data_wrapper import DataWrapper
 from .storage_config import StorageConfig
 from .data_from_http_source import DataFromHttpSource
 from .webdav_client import WebDAVClient
 __all__ = [
     "Logger",
     "ConfigManager",

sibi_dst/utils/data_wrapper.py CHANGED Viewed

@@ -2,7 +2,7 @@ import datetime
 import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Type, Any, Dict, Optional, Union, List, Tuple
-from threading import Lock
+import threading
 import fsspec
 import pandas as pd
 from IPython.display import display
@@ -11,6 +11,7 @@ from tqdm import tqdm
 from .log_utils import Logger
 from .date_utils import FileAgeChecker
 from .parquet_saver import ParquetSaver
+from .update_planner import UpdatePlanner
 class DataWrapper:
@@ -48,7 +49,7 @@ class DataWrapper:
                  timeout: float = 60,
                  reference_date: datetime.date = None,
                  custom_priority_map: Dict[str, int] = None,
-                 max_threads: int = 10):
+                 max_threads: int = 3):
         self.dataclass = dataclass
         self.date_field = date_field
         self.data_path = self._ensure_forward_slash(data_path)
@@ -75,10 +76,26 @@ class DataWrapper:
         self.start_date = self._convert_to_date(start_date)
         self.end_date = self._convert_to_date(end_date)
-        self._lock = Lock()
+        self._lock = threading.Lock()
         self.processed_dates = []
         self.age_checker = FileAgeChecker(logger=self.logger)
-        self.data_class_instance = self.dataclass(**self.class_params) or None
+        self.update_planner_params = {
+            "data_path": self.data_path,
+            "filename": self.parquet_filename,
+            "fs": self.fs,
+            "debug": self.debug,
+            "logger": self.logger,
+            "reverse_order": self.reverse_order,
+            "overwrite": self.overwrite,
+            "ignore_missing": self.ignore_missing,
+            "history_days_threshold": history_days_threshold,
+            "max_age_minutes": max_age_minutes,
+            "show_progress": self.show_progress,
+            "description": f"{self.dataclass.__name__}"
+        }
+        self.update_plan = UpdatePlanner(**self.update_planner_params).generate_plan(self.start_date, self.end_date)
     def _init_filesystem(self) -> fsspec.AbstractFileSystem:
         with self._lock:
@@ -115,13 +132,13 @@ class DataWrapper:
     def process(self, max_retries: int = 3):
         """Process updates with priority-based execution and retries"""
-        update_plan = self.generate_update_plan()
+        #update_plan = self.generate_update_plan()
+        update_plan = self.update_plan
         if update_plan.empty:
             self.logger.info("No updates required")
             return
         # Filter for required updates first
-        update_plan = update_plan[update_plan["update_required"] == True]
+        #update_plan = update_plan[update_plan["update_required"] == True]
         if self.show_progress:
             #display(self._enhanced_display_table(update_plan))
@@ -140,9 +157,9 @@ class DataWrapper:
             return
         desc = f"Processing {self.dataclass.__name__}, task: {self._priority_label(priority)}"
-        self.logger.info(f"Starting {desc.lower()}")
+        self.logger.debug(f"Starting {desc.lower()}")
         max_threads = min(len(dates), self.max_threads)
-        self.logger.info(f"DataWrapper Max threads set at: {max_threads}")
+        self.logger.debug(f"DataWrapper Max threads set at: {max_threads}")
         with ThreadPoolExecutor(max_workers=max_threads) as executor:
             futures = {
                 executor.submit(self._process_date_with_retry, date, max_retries): date
@@ -166,83 +183,6 @@ class DataWrapper:
             f"Unknown Priority {priority}"
         )
-    def _enhanced_display_table(self, df: pd.DataFrame) -> pd.DataFrame.style:
-        """Format the update plan table for better readability"""
-        return df.style \
-            .bar(subset=["file_age_minutes"], color="#5fba7d") \
-            .background_gradient(subset=["update_priority"], cmap="YlOrBr") \
-            .set_caption(f"Update Plan: {self.dataclass.__name__}")
-    def generate_update_plan(self) -> pd.DataFrame:
-        """Generate update plan with parallel file status checks"""
-        dates = self.generate_date_range()
-        history_start = self.reference_date - datetime.timedelta(days=self.history_days_threshold)
-        rows = []
-        with ThreadPoolExecutor() as executor:
-            future_to_date = {
-                executor.submit(self._get_file_status, date): date
-                for date in dates
-            }
-            for future in tqdm(as_completed(future_to_date),
-                               total=len(future_to_date),
-                               desc=f"Analyzing files for {self.dataclass.__name__} ",
-                               disable=not self.show_progress):
-                current_date = future_to_date[future]
-                file_exists, file_age = future.result()
-                rows.append(self._create_plan_row(
-                    current_date,
-                    history_start,
-                    file_exists,
-                    file_age
-                ))
-        return pd.DataFrame(rows).sort_values("update_priority")
-    def _get_file_status(self, date: datetime.date) -> Tuple[bool, float]:
-        """Get file existence and age with error handling"""
-        path = f"{self.data_path}{date.year}/{date.month:02d}/{date.day:02d}/{self.parquet_filename}"
-        try:
-            exists = self.fs.exists(path)
-            age = self.age_checker.get_file_or_dir_age_minutes(path, self.fs) if exists else None
-            return exists, age
-        except Exception as e:
-            self.logger.warning(f"Error checking {path}: {str(e)}")
-            return False, None
-    def _create_plan_row(self,
-                         date: datetime.date,
-                         history_start: datetime.date,
-                         file_exists: bool,
-                         file_age: float) -> dict:
-        """Create a row for the update plan DataFrame"""
-        within_history = history_start <= date <= self.reference_date
-        category, update_required = "file_is_recent", False
-        if self.overwrite:
-            category, update_required = "overwrite", True
-        elif within_history:
-            if not file_exists:
-                category, update_required = "missing_in_history", True
-            elif file_age > self.max_age_minutes:
-                category, update_required = "existing_but_stale", True
-        elif not file_exists and not self.ignore_missing:
-            category, update_required = "missing_outside_history", True
-        return {
-            "date": date,
-            "file_exists": file_exists,
-            "file_age_minutes": file_age,
-            "age_threshold": self.max_age_minutes,
-            "within_history": within_history,
-            "ignore_missing": self.ignore_missing,
-            "update_category": category,
-            "update_priority": self.priority_map[category],
-            "update_required": update_required,
-            "class": self.dataclass.__name__
-        }
     def _process_date_with_retry(self, date: datetime.date, max_retries: int):
         """Process a date with retry logic"""
         for attempt in range(1, max_retries + 1):
@@ -267,13 +207,14 @@ class DataWrapper:
             self.logger.debug(f"Class Params: {self.class_params}")
             self.logger.debug(f"Load Params: {self.load_params}")
-            #data = self.dataclass(**self.class_params)
-            df = self.data_class_instance.load_period(
-                dt_field=self.date_field,
-                start=date,
-                end=date,
-                **self.load_params
-            )
+            df = pd.DataFrame()
+            with self.dataclass(**self.class_params) as data:
+                df = data.load_period(
+                    dt_field=self.date_field,
+                    start=date,
+                    end=date,
+                    **self.load_params
+                )
             if len(df.index)==0:
                 self.logger.warning(f"No data found for {date}")

sibi_dst/utils/parquet_saver.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import base64
 import hashlib
 import logging
+import threading
 import warnings
 from typing import Optional
@@ -27,6 +28,7 @@ class ParquetSaver:
         self.logger.set_level(logging.DEBUG if self.debug else logging.INFO)
         self.fs = fs
         self.protocol = self.parquet_storage_path.split(":")[0]
+        self._lock = threading.Lock()
     def save_to_parquet(self, parquet_filename: Optional[str] = None, clear_existing=True):
         """
@@ -34,18 +36,19 @@ class ParquetSaver:
         :param parquet_filename: Filename for the Parquet file.
         :param clear_existing: Whether to clear existing files in the target directory.
         """
-        full_path = self._construct_full_path(parquet_filename)
-        self.logger.info(f"Save method for :{full_path}")
-        # Ensure directory exists and clear if necessary
-        self._ensure_directory_exists(full_path, clear_existing=clear_existing)
-        # Define schema and save DataFrame to Parquet
-        schema = self._define_schema()
-        self._convert_dtypes(schema)
-        self._save_dataframe_to_parquet(full_path, schema)
-        # Close the filesystem if the close method exists
-        if hasattr(self.fs, 'close') and callable(getattr(self.fs, 'close', None)):
-            self.fs.close()
+        with self._lock:
+            full_path = self._construct_full_path(parquet_filename)
+            self.logger.info(f"Save method for :{full_path}")
+            # Ensure directory exists and clear if necessary
+            self._ensure_directory_exists(full_path, clear_existing=clear_existing)
+            # Define schema and save DataFrame to Parquet
+            schema = self._define_schema()
+            self._convert_dtypes(schema)
+            self._save_dataframe_to_parquet(full_path, schema)
+            # Close the filesystem if the close method exists
+            if hasattr(self.fs, 'close') and callable(getattr(self.fs, 'close', None)):
+                self.fs.close()
     def _define_schema(self) -> pa.Schema:
         """Define a PyArrow schema dynamically based on df_result column types."""

sibi-dst 0.3.56__py3-none-any.whl → 0.3.57__py3-none-any.whl

sibi-dst 0.3.56py3-none-any.whl → 0.3.57py3-none-any.whl