sibi-dst 2025.9.13__tar.gz → 2025.9.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/PKG-INFO +2 -1
  2. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/pyproject.toml +2 -1
  3. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/_df_helper.py +1 -1
  4. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/base.py +40 -18
  5. sibi_dst-2025.9.15/sibi_dst/utils/dask_utils.py +436 -0
  6. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst.egg-info/PKG-INFO +2 -1
  7. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst.egg-info/requires.txt +1 -0
  8. sibi_dst-2025.9.13/sibi_dst/utils/dask_utils.py +0 -200
  9. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/README.md +0 -0
  10. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/setup.cfg +0 -0
  11. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/__init__.py +0 -0
  12. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/__init__.py +0 -0
  13. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/_artifact_updater_async.py +0 -0
  14. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/_artifact_updater_threaded.py +0 -0
  15. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
  16. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/_parquet_reader.py +0 -0
  17. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/__init__.py +0 -0
  18. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
  19. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
  20. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
  21. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
  22. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
  23. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  24. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
  25. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  26. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  27. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
  28. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
  29. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/core/__init__.py +0 -0
  30. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/core/_defaults.py +0 -0
  31. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
  32. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/core/_params_config.py +0 -0
  33. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/df_helper/core/_query_config.py +0 -0
  34. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/geopy_helper/__init__.py +0 -0
  35. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
  36. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/geopy_helper/utils.py +0 -0
  37. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/osmnx_helper/__init__.py +0 -0
  38. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
  39. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
  40. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
  41. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
  42. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
  43. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/osmnx_helper/route_path_builder.py +0 -0
  44. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/osmnx_helper/utils.py +0 -0
  45. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/tests/__init__.py +0 -0
  46. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/tests/test_baseclass.py +0 -0
  47. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
  48. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/__init__.py +0 -0
  49. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/async_utils.py +0 -0
  50. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/boilerplate/__init__.py +0 -0
  51. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/boilerplate/base_attacher.py +0 -0
  52. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/boilerplate/base_data_cube.py +0 -0
  53. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/boilerplate/base_parquet_artifact.py +0 -0
  54. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/boilerplate/base_parquet_reader.py +0 -0
  55. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/boilerplate/base_pipeline.py +0 -0
  56. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/boilerplate/base_pipeline_template.py +0 -0
  57. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/boilerplate/hybrid_data_loader.py +0 -0
  58. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/business_days.py +0 -0
  59. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/clickhouse_writer.py +0 -0
  60. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/credentials.py +0 -0
  61. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/data_from_http_source.py +0 -0
  62. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/data_utils.py +0 -0
  63. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/data_wrapper.py +0 -0
  64. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/date_utils.py +0 -0
  65. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/df_utils.py +0 -0
  66. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/file_age_checker.py +0 -0
  67. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/file_utils.py +0 -0
  68. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/filepath_generator.py +0 -0
  69. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/iceberg_saver.py +0 -0
  70. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/log_utils.py +0 -0
  71. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/manifest_manager.py +0 -0
  72. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/parquet_saver.py +0 -0
  73. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/periods.py +0 -0
  74. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/phone_formatter.py +0 -0
  75. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/progress/__init__.py +0 -0
  76. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/progress/jobs.py +0 -0
  77. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/progress/sse_runner.py +0 -0
  78. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/storage_config.py +0 -0
  79. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/storage_hive.py +0 -0
  80. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/storage_manager.py +0 -0
  81. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/update_planner.py +0 -0
  82. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/webdav_client.py +0 -0
  83. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/utils/write_gatekeeper.py +0 -0
  84. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/__init__.py +0 -0
  85. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/__init__.py +0 -0
  86. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
  87. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
  88. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
  89. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  90. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  91. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  92. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
  93. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
  94. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
  95. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
  96. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
  97. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
  98. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
  99. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
  100. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
  101. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
  102. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/utils/__init__.py +0 -0
  103. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst/v2/utils/log_utils.py +0 -0
  104. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst.egg-info/SOURCES.txt +0 -0
  105. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst.egg-info/dependency_links.txt +0 -0
  106. {sibi_dst-2025.9.13 → sibi_dst-2025.9.15}/sibi_dst.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sibi-dst
3
- Version: 2025.9.13
3
+ Version: 2025.9.15
4
4
  Summary: A data science toolkit for scalable data processing and analysis.
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -9,6 +9,7 @@ Requires-Dist: clickhouse-driver>=0.2.9
9
9
  Requires-Dist: dask>=2025.9.1
10
10
  Requires-Dist: distributed>=2025.9.1
11
11
  Requires-Dist: fastapi>=0.118.0
12
+ Requires-Dist: filelock>=3.20.0
12
13
  Requires-Dist: folium>=0.20.0
13
14
  Requires-Dist: mysqlclient>=2.2.7
14
15
  Requires-Dist: opentelemetry-api>=1.37.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sibi-dst"
3
- version = "2025.9.13"
3
+ version = "2025.9.15"
4
4
  description = "A data science toolkit for scalable data processing and analysis."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -10,6 +10,7 @@ dependencies = [
10
10
  "dask>=2025.9.1",
11
11
  "distributed>=2025.9.1",
12
12
  "fastapi>=0.118.0",
13
+ "filelock>=3.20.0",
13
14
  "folium>=0.20.0",
14
15
  "mysqlclient>=2.2.7",
15
16
  "opentelemetry-api>=1.37.0",
@@ -137,7 +137,7 @@ class DfHelper(ManagedResource):
137
137
  def __init__(self, backend="sqlalchemy", **kwargs):
138
138
  self.default_config = self.default_config or {}
139
139
  kwargs = {**self.default_config.copy(), **kwargs}
140
- kwargs.setdefault("auto_sse", True)
140
+ kwargs.setdefault("auto_sse", False)
141
141
  super().__init__(**kwargs)
142
142
  self.backend = backend
143
143
 
@@ -14,7 +14,18 @@ from sibi_dst.utils import Logger
14
14
 
15
15
  # --------- Minimal built-in SSE sink (used when auto_sse=True) ----------
16
16
  class _QueueSSE:
17
- """Async queue–backed SSE sink: async send/put, async iterator, graceful close."""
17
+ """
18
+ Handles asynchronous streaming of events with structured data.
19
+
20
+ This class provides the ability to manage an asynchronous queue for handling
21
+ streamed Server-Sent Events (SSE). It supports operations like sending events
22
+ with associated data, manually enqueuing items, and iterating over items in an
23
+ asynchronous loop. The class also includes mechanisms for clean closure of the
24
+ stream.
25
+
26
+ :ivar q: An asynchronous queue used to store events and data.
27
+ :type q: asyncio.Queue
28
+ """
18
29
  __slots__ = ("q", "_closed")
19
30
 
20
31
  def __init__(self) -> None:
@@ -46,11 +57,31 @@ class _QueueSSE:
46
57
  # ------------------------------ Base class ------------------------------
47
58
  class ManagedResource(abc.ABC):
48
59
  """
49
- Owns a logger and optional fsspec filesystem. Can emit SSE events via:
50
- - an async emitter callable: await emitter(event, data)
51
- - a sink exposing async send(event, data) or async put(item)
52
- If neither is provided and auto_sse=True, a queue-backed sink is created eagerly.
53
- Thread-safe lifecycle, CM support, GC finalizer.
60
+ Management of shared resources with configurable verbosity, logging,
61
+ and support for external file systems and server-sent events (SSE).
62
+
63
+ This class is designed to assist in managing resources such as logging,
64
+ file systems, and SSE within an asynchronous or synchronous environment.
65
+ It provides facilities for handling resource lifecycle, introspection,
66
+ and cleanup while ensuring resources are appropriately managed. The class
67
+ also supports lazy initialization of external dependencies via factories.
68
+
69
+ :ivar verbose: Controls verbosity of logging or operations. If set to True,
70
+ more detailed logging/output will be generated.
71
+ :type verbose: bool
72
+ :ivar debug: Enables debug-level logging and internal diagnostics when True.
73
+ Typically used for troubleshooting purposes.
74
+ :type debug: bool
75
+ :ivar logger: The logger instance used for this resource. If left unset,
76
+ a default logger will be created.
77
+ :type logger: Optional[Logger]
78
+ :ivar fs: The file system interface being used. Typically an instance of
79
+ `fsspec.AbstractFileSystem`. If not provided, it may be created lazily
80
+ using a supplied factory function.
81
+ :type fs: Optional[fsspec.AbstractFileSystem]
82
+ :ivar emitter: A callable, potentially asynchronous, function for emitting
83
+ events. Events are sent as a combination of event names and payload data.
84
+ :type emitter: Optional[Callable[[str, Dict[str, Any]], Awaitable[None]]]
54
85
  """
55
86
 
56
87
  __slots__ = (
@@ -74,28 +105,23 @@ class ManagedResource(abc.ABC):
74
105
  debug: bool = False,
75
106
  log_cleanup_errors: bool = True,
76
107
  logger: Optional[Logger] = None,
77
- # filesystem
78
108
  fs: Optional[fsspec.AbstractFileSystem] = None,
79
109
  fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None,
80
- # SSE
81
110
  emitter: Optional[Callable[[str, Dict[str, Any]], Awaitable[None]]] = None,
82
111
  emitter_factory: Optional[Callable[[], Callable[[str, Dict[str, Any]], Awaitable[None]]]] = None,
83
112
  sse: Optional[object] = None,
84
113
  sse_factory: Optional[Callable[[], object]] = None,
85
- auto_sse: bool = False, # eager auto-create if no emitter/sink is supplied
114
+ auto_sse: bool = False,
86
115
  **_: object,
87
116
  ) -> None:
88
- # flags
89
117
  self.verbose = verbose
90
118
  self.debug = debug
91
119
  self._log_cleanup_errors = log_cleanup_errors
92
120
 
93
- # lifecycle
94
121
  self._is_closed = False
95
122
  self._closing = False
96
123
  self._close_lock = threading.RLock()
97
124
 
98
- # logger
99
125
  if logger is None:
100
126
  self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
101
127
  self._owns_logger = True
@@ -105,7 +131,6 @@ class ManagedResource(abc.ABC):
105
131
  self.logger = logger
106
132
  self._owns_logger = False
107
133
 
108
- # fs
109
134
  self.fs: Optional[fsspec.AbstractFileSystem] = None
110
135
  self._fs_factory = None
111
136
  self._owns_fs = False
@@ -119,7 +144,6 @@ class ManagedResource(abc.ABC):
119
144
  self._fs_factory = fs_factory
120
145
  self._owns_fs = True
121
146
 
122
- # sse / emitter
123
147
  self._sse: Optional[object] = None
124
148
  self._sse_factory: Optional[Callable[[], object]] = None
125
149
  self._owns_sse = False
@@ -140,16 +164,15 @@ class ManagedResource(abc.ABC):
140
164
  self._sse_factory = sse_factory
141
165
  self._owns_sse = True
142
166
 
143
- # EAGER auto-SSE: create sink+emitter now if none supplied
144
167
  if self._auto_sse and self._sse is None and self._emitter is None and self._sse_factory is None:
145
168
  self._create_auto_sse()
146
169
 
147
- # GC finalizer
170
+ # Garbage Collector finaliser
148
171
  self._finalizer = weakref.finalize(self, self._finalize_static, weakref.ref(self))
149
172
 
150
173
  if self.debug:
151
174
  with contextlib.suppress(Exception):
152
- self.logger.debug("Initialized %s %s", self.__class__.__name__, repr(self))
175
+ self.logger.debug("Initialised %s %s", self.__class__.__name__, repr(self))
153
176
 
154
177
  # ---------- Introspection ----------
155
178
  @property
@@ -222,7 +245,6 @@ class ManagedResource(abc.ABC):
222
245
 
223
246
  # ---------- SSE ----------
224
247
  def _create_auto_sse(self) -> None:
225
- # internal helper: create queue sink + emitter, mark as owned
226
248
  sink = _QueueSSE()
227
249
  self._sse = sink
228
250
  self._owns_sse = True
@@ -0,0 +1,436 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import List, Any, Dict
6
+
7
+ import dask
8
+ import dask.dataframe as dd
9
+
10
+ def _to_int_safe(x) -> int:
11
+ """
12
+ Convert scalar-like to int safely.
13
+ Handles numpy scalars, pandas Series/DataFrame outputs.
14
+ """
15
+ if hasattr(x, "item"): # numpy scalar, pandas scalar
16
+ return int(x.item())
17
+ if hasattr(x, "iloc"): # Series-like
18
+ return int(x.iloc[0])
19
+ return int(x)
20
+
21
+ def dask_is_probably_empty(ddf: dd.DataFrame) -> bool:
22
+ return getattr(ddf, "npartitions", 0) == 0 or len(ddf._meta.columns) == 0
23
+
24
+
25
+ def dask_is_empty_truthful(ddf: dd.DataFrame) -> bool:
26
+ n = ddf.map_partitions(len).sum().compute()
27
+ return int(n) == 0
28
+
29
+
30
+ def dask_is_empty(ddf: dd.DataFrame, *, sample: int = 4) -> bool:
31
+ if dask_is_probably_empty(ddf):
32
+ return True
33
+
34
+ k = min(max(sample, 1), ddf.npartitions)
35
+ probes = dask.compute(*[
36
+ ddf.get_partition(i).map_partitions(len) for i in range(k)
37
+ ], scheduler="threads")
38
+
39
+ if any(_to_int_safe(n) > 0 for n in probes):
40
+ return False
41
+ if k == ddf.npartitions and all(_to_int_safe(n) == 0 for n in probes):
42
+ return True
43
+
44
+ return dask_is_empty_truthful(ddf)
45
+
46
+ class UniqueValuesExtractor:
47
+ @staticmethod
48
+ def _compute_to_list_sync(series) -> List[Any]:
49
+ """Run in a worker thread when Dask-backed."""
50
+ if hasattr(series, "compute"):
51
+ return series.compute().tolist()
52
+ return series.tolist()
53
+
54
+ async def compute_to_list(self, series) -> List[Any]:
55
+ # Offload potential Dask .compute() to a thread to avoid blocking the event loop
56
+ return await asyncio.to_thread(self._compute_to_list_sync, series)
57
+
58
+ async def extract_unique_values(self, df, *columns: str) -> Dict[str, List[Any]]:
59
+ async def one(col: str):
60
+ ser = df[col].dropna().unique()
61
+ return col, await self.compute_to_list(ser)
62
+
63
+ pairs = await asyncio.gather(*(one(c) for c in columns))
64
+ return dict(pairs)
65
+
66
+ import asyncio
67
+ import json
68
+ import logging
69
+ import os
70
+ import tempfile
71
+ from contextlib import suppress, asynccontextmanager, contextmanager
72
+ from typing import Optional
73
+ from dask.distributed import Client, LocalCluster, get_client
74
+ from filelock import FileLock
75
+
76
+
77
+ class DaskClientMixin:
78
+ """
79
+ Provides shared Dask client lifecycle management with:
80
+ - Shared registry (JSON + file lock)
81
+ - Automatic refcounting across processes
82
+ - Auto-cleanup of stale clusters
83
+ - Optional background watchdog to monitor cluster health
84
+ """
85
+
86
+ REGISTRY_PATH = os.path.join(tempfile.gettempdir(), "shared_dask_cluster.json")
87
+ REGISTRY_LOCK = FileLock(REGISTRY_PATH + ".lock")
88
+ WATCHDOG_INTERVAL = 60 # seconds between health checks
89
+
90
+ def __init__(self, **kwargs):
91
+ self.dask_client: Optional[Client] = None
92
+ self.own_dask_client: bool = False
93
+ self.logger = kwargs.get("logger") or logging.getLogger(__name__)
94
+ self._watchdog_task: Optional[asyncio.Task] = None
95
+ self._watchdog_stop = asyncio.Event()
96
+
97
+ # ----------------------------------------------------------------------
98
+ # Registry management
99
+ # ----------------------------------------------------------------------
100
+ @classmethod
101
+ def _read_registry(cls) -> Optional[dict]:
102
+ """Read registry JSON if it exists and is valid."""
103
+ if not os.path.exists(cls.REGISTRY_PATH):
104
+ return None
105
+ try:
106
+ with open(cls.REGISTRY_PATH, "r") as f:
107
+ data = json.load(f)
108
+ if "address" not in data or not isinstance(data["address"], str):
109
+ return None
110
+ return data
111
+ except (json.JSONDecodeError, OSError):
112
+ return None
113
+
114
+ @classmethod
115
+ def _write_registry(cls, data: dict) -> None:
116
+ """Write updated registry JSON atomically."""
117
+ tmp_path = cls.REGISTRY_PATH + ".tmp"
118
+ with open(tmp_path, "w") as f:
119
+ json.dump(data, f)
120
+ os.replace(tmp_path, cls.REGISTRY_PATH)
121
+
122
+ @classmethod
123
+ def _remove_registry(cls) -> None:
124
+ """Delete the registry file if present."""
125
+ with suppress(FileNotFoundError):
126
+ os.remove(cls.REGISTRY_PATH)
127
+
128
+ @classmethod
129
+ def _cleanup_stale_registry(cls, logger=None):
130
+ """Detect and remove stale registry entries if cluster is unreachable."""
131
+ registry = cls._read_registry()
132
+ if not registry:
133
+ return
134
+ try:
135
+ client = Client(address=registry["address"], timeout=5)
136
+ client.close()
137
+ except Exception:
138
+ if logger:
139
+ logger.warning(
140
+ f"Detected stale Dask cluster registry at {registry.get('address')}. Cleaning up."
141
+ )
142
+ cls._remove_registry()
143
+
144
+ # ----------------------------------------------------------------------
145
+ # Dask client initialization
146
+ # ----------------------------------------------------------------------
147
+ def _init_dask_client(
148
+ self,
149
+ dask_client: Optional[Client] = None,
150
+ *,
151
+ logger=None,
152
+ scheduler_address: Optional[str] = None,
153
+ use_remote_cluster: bool = False,
154
+ n_workers: int = 2,
155
+ threads_per_worker: int = 1,
156
+ processes: bool = False,
157
+ asynchronous: bool = False,
158
+ memory_limit: str = "auto",
159
+ local_directory: Optional[str] = None,
160
+ silence_logs: str = "info",
161
+ resources: Optional[dict] = None,
162
+ timeout: int = 30,
163
+ watchdog: bool = True,
164
+ ):
165
+ """Initialize or attach to a shared Dask client."""
166
+ self.logger = logger or self.logger
167
+ self.dask_client = dask_client
168
+ self.own_dask_client = False
169
+
170
+ # Silence excessive logging
171
+ logging.getLogger("distributed.scheduler").setLevel(logging.WARNING)
172
+ logging.getLogger("distributed.worker").setLevel(logging.WARNING)
173
+ logging.getLogger("distributed.shuffle._scheduler_plugin").setLevel(logging.ERROR)
174
+
175
+ # 1️⃣ Try reusing existing client
176
+ if self.dask_client is None:
177
+ with suppress(ValueError, RuntimeError):
178
+ self.dask_client = get_client()
179
+
180
+ # 2️⃣ Try remote cluster connection
181
+ if self.dask_client is None and use_remote_cluster and scheduler_address:
182
+ try:
183
+ self.dask_client = Client(address=scheduler_address, timeout=timeout)
184
+ self.own_dask_client = True
185
+ self.logger.info(
186
+ f"Connected to external Dask scheduler at {scheduler_address}. "
187
+ f"Dashboard: {self.dask_client.dashboard_link}"
188
+ )
189
+ if watchdog:
190
+ self._start_watchdog()
191
+ return
192
+ except Exception as e:
193
+ self.logger.warning(
194
+ f"Failed to connect to remote Dask scheduler: {e}. Falling back to local cluster."
195
+ )
196
+
197
+ # 3️⃣ Shared local cluster via registry
198
+ with self.REGISTRY_LOCK:
199
+ self._cleanup_stale_registry(self.logger)
200
+ registry = self._read_registry()
201
+
202
+ if registry:
203
+ try:
204
+ self.dask_client = Client(address=registry["address"], timeout=timeout)
205
+ registry["refcount"] = registry.get("refcount", 0) + 1
206
+ self._write_registry(registry)
207
+ self.logger.info(
208
+ f"Reusing existing LocalCluster at {registry['address']} (refcount={registry['refcount']})."
209
+ )
210
+ if watchdog:
211
+ self._start_watchdog()
212
+ return
213
+ except Exception:
214
+ self.logger.warning("Existing cluster unreachable. Recreating.")
215
+ self._remove_registry()
216
+
217
+ # Create a new local cluster
218
+ cluster = LocalCluster(
219
+ n_workers=n_workers,
220
+ threads_per_worker=threads_per_worker,
221
+ processes=processes,
222
+ asynchronous=asynchronous,
223
+ memory_limit=memory_limit,
224
+ local_directory=local_directory,
225
+ silence_logs=silence_logs,
226
+ resources=resources,
227
+ timeout=timeout,
228
+ )
229
+
230
+ self.dask_client = Client(cluster)
231
+ self.own_dask_client = True
232
+ registry = {"address": cluster.scheduler_address, "refcount": 1}
233
+ self._write_registry(registry)
234
+ self.logger.info(
235
+ f"Started new LocalCluster ({n_workers} workers × {threads_per_worker} threads). "
236
+ f"Dashboard: {self.dask_client.dashboard_link}"
237
+ )
238
+
239
+ if watchdog:
240
+ self._start_watchdog()
241
+
242
+ # ----------------------------------------------------------------------
243
+ # Watchdog logic
244
+ # ----------------------------------------------------------------------
245
+ def _start_watchdog(self):
246
+ """Spawn a background watchdog that monitors registry health."""
247
+ async def watchdog_loop():
248
+ while not self._watchdog_stop.is_set():
249
+ await asyncio.sleep(self.WATCHDOG_INTERVAL)
250
+ try:
251
+ self._cleanup_stale_registry(self.logger)
252
+ except Exception as e:
253
+ self.logger.warning(f"Dask watchdog encountered an error: {e}")
254
+
255
+ try:
256
+ loop = asyncio.get_event_loop()
257
+ if loop.is_running():
258
+ self._watchdog_task = loop.create_task(watchdog_loop())
259
+ self.logger.debug("Started Dask registry watchdog (async).")
260
+ except RuntimeError:
261
+ # Fallback for synchronous usage
262
+ self.logger.debug("Watchdog skipped (no active event loop).")
263
+
264
+ async def _stop_watchdog(self):
265
+ """Stop the watchdog loop gracefully."""
266
+ self._watchdog_stop.set()
267
+ if self._watchdog_task:
268
+ await asyncio.wait([self._watchdog_task], timeout=5)
269
+ self._watchdog_task = None
270
+
271
+ # ----------------------------------------------------------------------
272
+ # Client cleanup
273
+ # ----------------------------------------------------------------------
274
+ def _close_dask_client(self):
275
+ """Safely close client and update registry reference count."""
276
+ if not self.dask_client:
277
+ return
278
+
279
+ with self.REGISTRY_LOCK:
280
+ registry = self._read_registry()
281
+
282
+ if registry and "refcount" in registry:
283
+ registry["refcount"] = max(0, registry["refcount"] - 1)
284
+ if registry["refcount"] == 0:
285
+ self.logger.info("Reference count 0 — closing LocalCluster.")
286
+ try:
287
+ cluster = getattr(self.dask_client, "cluster", None)
288
+ self.dask_client.close()
289
+ if cluster:
290
+ cluster.close()
291
+ except Exception as e:
292
+ self.logger.warning(f"Error closing Dask cluster: {e}")
293
+ self._remove_registry()
294
+ else:
295
+ self._write_registry(registry)
296
+ self.logger.debug(
297
+ f"Decremented LocalCluster refcount to {registry['refcount']}."
298
+ )
299
+ else:
300
+ with suppress(Exception):
301
+ self.dask_client.close()
302
+ self.logger.debug("Closed Dask client without registry tracking.")
303
+
304
+ # Stop watchdog if active
305
+ if self._watchdog_task:
306
+ asyncio.create_task(self._stop_watchdog())
307
+
308
+
309
+ # ----------------------------------------------------------------------
310
+ # Shared Dask session (sync + async)
311
+ # ----------------------------------------------------------------------
312
+ def shared_dask_session(*, async_mode: bool = True, **kwargs):
313
+ """
314
+ Context manager for a shared Dask session (supports async + sync).
315
+
316
+ Example:
317
+ async with shared_dask_session(logger=logger) as client:
318
+ ...
319
+
320
+ with shared_dask_session(async_mode=False) as client:
321
+ ...
322
+ """
323
+ mixin = DaskClientMixin()
324
+ mixin._init_dask_client(**kwargs)
325
+
326
+ if async_mode:
327
+ @asynccontextmanager
328
+ async def _async_manager():
329
+ try:
330
+ yield mixin.dask_client
331
+ finally:
332
+ mixin._close_dask_client()
333
+ return _async_manager()
334
+ else:
335
+ @contextmanager
336
+ def _sync_manager():
337
+ try:
338
+ yield mixin.dask_client
339
+ finally:
340
+ mixin._close_dask_client()
341
+ return _sync_manager()
342
+
343
+ # from contextlib import suppress, asynccontextmanager
344
+ # from dask.distributed import Client, LocalCluster, get_client
345
+ # import os
346
+ #
347
+ # class DaskClientMixin:
348
+ # """
349
+ # Provides shared Dask client lifecycle management.
350
+ # Ensures reuse of an existing client if available,
351
+ # or creates a local in-process Dask cluster for fallback.
352
+ # """
353
+ #
354
+ # def _init_dask_client(
355
+ # self,
356
+ # dask_client=None,
357
+ # logger=None,
358
+ # *,
359
+ # n_workers: int = 1,
360
+ # threads_per_worker: int = 1,
361
+ # processes: bool = False,
362
+ # asynchronous: bool = False,
363
+ # memory_limit: str = "auto",
364
+ # #dashboard_address: str | None = None,
365
+ # local_directory: str | None = None,
366
+ # silence_logs: str = "info",
367
+ # resources: dict | None = None,
368
+ # timeout: int = 30,
369
+ # ):
370
+ # self.dask_client = dask_client
371
+ # self.own_dask_client = False
372
+ # self.logger = logger
373
+ # # Apply log filters globally
374
+ # logging.getLogger("distributed.shuffle._scheduler_plugin").setLevel(
375
+ # logging.ERROR
376
+ # )
377
+ # logging.getLogger("distributed.scheduler").setLevel(logging.WARNING)
378
+ # logging.getLogger("distributed.worker").setLevel(logging.WARNING)
379
+ #
380
+ # if self.dask_client is None:
381
+ # with suppress(ValueError, RuntimeError):
382
+ # # Try to attach to an existing client (common in shared Dask setups)
383
+ # self.dask_client = get_client()
384
+ #
385
+ # if self.dask_client is None:
386
+ # # Default to half of logical cores if not specified
387
+ # n_workers = n_workers or max(2, os.cpu_count() // 2)
388
+ #
389
+ # cluster = LocalCluster(
390
+ # n_workers=n_workers,
391
+ # threads_per_worker=threads_per_worker,
392
+ # processes=processes,
393
+ # asynchronous=asynchronous,
394
+ # memory_limit=memory_limit,
395
+ # local_directory=local_directory,
396
+ # silence_logs=silence_logs,
397
+ # resources=resources,
398
+ # timeout=timeout,
399
+ # )
400
+ #
401
+ # self.dask_client = Client(cluster)
402
+ # self.own_dask_client = True
403
+ #
404
+ # if self.logger:
405
+ # self.logger.info(
406
+ # f"Started local Dask cluster with {n_workers} workers × {threads_per_worker} threads "
407
+ # f"({memory_limit} memory per worker). Dashboard: {self.dask_client.dashboard_link}"
408
+ # )
409
+ # else:
410
+ # if self.logger:
411
+ # self.logger.debug(
412
+ # f"Using existing Dask client: {self.dask_client.dashboard_link}"
413
+ # )
414
+ #
415
+ # def _close_dask_client(self):
416
+ # """Close the Dask client if this instance created it."""
417
+ # if getattr(self, "own_dask_client", False) and self.dask_client is not None:
418
+ # try:
419
+ # cluster = getattr(self.dask_client, "cluster", None)
420
+ # self.dask_client.close()
421
+ # if cluster is not None:
422
+ # cluster.close()
423
+ # if self.logger:
424
+ # self.logger.info("Closed local Dask client and cluster.")
425
+ # except Exception as e:
426
+ # if self.logger:
427
+ # self.logger.warning(f"Error while closing Dask client: {e}")
428
+ #
429
+ # @asynccontextmanager
430
+ # async def shared_dask_session(**kwargs):
431
+ # mixin = DaskClientMixin()
432
+ # mixin._init_dask_client(**kwargs)
433
+ # try:
434
+ # yield mixin.dask_client
435
+ # finally:
436
+ # mixin._close_dask_client()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sibi-dst
3
- Version: 2025.9.13
3
+ Version: 2025.9.15
4
4
  Summary: A data science toolkit for scalable data processing and analysis.
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -9,6 +9,7 @@ Requires-Dist: clickhouse-driver>=0.2.9
9
9
  Requires-Dist: dask>=2025.9.1
10
10
  Requires-Dist: distributed>=2025.9.1
11
11
  Requires-Dist: fastapi>=0.118.0
12
+ Requires-Dist: filelock>=3.20.0
12
13
  Requires-Dist: folium>=0.20.0
13
14
  Requires-Dist: mysqlclient>=2.2.7
14
15
  Requires-Dist: opentelemetry-api>=1.37.0
@@ -3,6 +3,7 @@ clickhouse-driver>=0.2.9
3
3
  dask>=2025.9.1
4
4
  distributed>=2025.9.1
5
5
  fastapi>=0.118.0
6
+ filelock>=3.20.0
6
7
  folium>=0.20.0
7
8
  mysqlclient>=2.2.7
8
9
  opentelemetry-api>=1.37.0
@@ -1,200 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import logging
5
- from typing import List, Any, Dict
6
-
7
- import dask
8
- # dask.config.set({"distributed.worker.daemon": False})
9
- import dask.dataframe as dd
10
-
11
- def _to_int_safe(x) -> int:
12
- """
13
- Convert scalar-like to int safely.
14
- Handles numpy scalars, pandas Series/DataFrame outputs.
15
- """
16
- if hasattr(x, "item"): # numpy scalar, pandas scalar
17
- return int(x.item())
18
- if hasattr(x, "iloc"): # Series-like
19
- return int(x.iloc[0])
20
- return int(x)
21
-
22
- def dask_is_probably_empty(ddf: dd.DataFrame) -> bool:
23
- return getattr(ddf, "npartitions", 0) == 0 or len(ddf._meta.columns) == 0
24
-
25
-
26
- def dask_is_empty_truthful(ddf: dd.DataFrame) -> bool:
27
- n = ddf.map_partitions(len).sum().compute()
28
- return int(n) == 0
29
-
30
-
31
- def dask_is_empty(ddf: dd.DataFrame, *, sample: int = 4) -> bool:
32
- if dask_is_probably_empty(ddf):
33
- return True
34
-
35
- k = min(max(sample, 1), ddf.npartitions)
36
- probes = dask.compute(*[
37
- ddf.get_partition(i).map_partitions(len) for i in range(k)
38
- ], scheduler="threads")
39
-
40
- if any(_to_int_safe(n) > 0 for n in probes):
41
- return False
42
- if k == ddf.npartitions and all(_to_int_safe(n) == 0 for n in probes):
43
- return True
44
-
45
- return dask_is_empty_truthful(ddf)
46
-
47
- class UniqueValuesExtractor:
48
- @staticmethod
49
- def _compute_to_list_sync(series) -> List[Any]:
50
- """Run in a worker thread when Dask-backed."""
51
- if hasattr(series, "compute"):
52
- return series.compute().tolist()
53
- return series.tolist()
54
-
55
- async def compute_to_list(self, series) -> List[Any]:
56
- # Offload potential Dask .compute() to a thread to avoid blocking the event loop
57
- return await asyncio.to_thread(self._compute_to_list_sync, series)
58
-
59
- async def extract_unique_values(self, df, *columns: str) -> Dict[str, List[Any]]:
60
- async def one(col: str):
61
- ser = df[col].dropna().unique()
62
- return col, await self.compute_to_list(ser)
63
-
64
- pairs = await asyncio.gather(*(one(c) for c in columns))
65
- return dict(pairs)
66
-
67
- from contextlib import suppress, asynccontextmanager
68
- from dask.distributed import Client, LocalCluster, get_client
69
- import os
70
-
71
- class DaskClientMixin:
72
- """
73
- Provides shared Dask client lifecycle management.
74
- Ensures reuse of an existing client if available,
75
- or creates a local in-process Dask cluster for fallback.
76
- """
77
-
78
- def _init_dask_client(
79
- self,
80
- dask_client=None,
81
- logger=None,
82
- *,
83
- n_workers: int = 1,
84
- threads_per_worker: int = 1,
85
- processes: bool = False,
86
- asynchronous: bool = False,
87
- memory_limit: str = "auto",
88
- #dashboard_address: str | None = None,
89
- local_directory: str | None = None,
90
- silence_logs: str = "info",
91
- resources: dict | None = None,
92
- timeout: int = 30,
93
- ):
94
- self.dask_client = dask_client
95
- self.own_dask_client = False
96
- self.logger = logger
97
- # Apply log filters globally
98
- logging.getLogger("distributed.shuffle._scheduler_plugin").setLevel(
99
- logging.ERROR
100
- )
101
- logging.getLogger("distributed.scheduler").setLevel(logging.WARNING)
102
- logging.getLogger("distributed.worker").setLevel(logging.WARNING)
103
-
104
- if self.dask_client is None:
105
- with suppress(ValueError, RuntimeError):
106
- # Try to attach to an existing client (common in shared Dask setups)
107
- self.dask_client = get_client()
108
-
109
- if self.dask_client is None:
110
- # Default to half of logical cores if not specified
111
- n_workers = n_workers or max(2, os.cpu_count() // 2)
112
-
113
- cluster = LocalCluster(
114
- n_workers=n_workers,
115
- threads_per_worker=threads_per_worker,
116
- processes=processes,
117
- asynchronous=asynchronous,
118
- memory_limit=memory_limit,
119
- local_directory=local_directory,
120
- silence_logs=silence_logs,
121
- resources=resources,
122
- timeout=timeout,
123
- )
124
-
125
- self.dask_client = Client(cluster)
126
- self.own_dask_client = True
127
-
128
- if self.logger:
129
- self.logger.info(
130
- f"Started local Dask cluster with {n_workers} workers × {threads_per_worker} threads "
131
- f"({memory_limit} memory per worker). Dashboard: {self.dask_client.dashboard_link}"
132
- )
133
- else:
134
- if self.logger:
135
- self.logger.debug(
136
- f"Using existing Dask client: {self.dask_client.dashboard_link}"
137
- )
138
-
139
- def _close_dask_client(self):
140
- """Close the Dask client if this instance created it."""
141
- if getattr(self, "own_dask_client", False) and self.dask_client is not None:
142
- try:
143
- cluster = getattr(self.dask_client, "cluster", None)
144
- self.dask_client.close()
145
- if cluster is not None:
146
- cluster.close()
147
- if self.logger:
148
- self.logger.info("Closed local Dask client and cluster.")
149
- except Exception as e:
150
- if self.logger:
151
- self.logger.warning(f"Error while closing Dask client: {e}")
152
-
153
- @asynccontextmanager
154
- async def shared_dask_session(**kwargs):
155
- mixin = DaskClientMixin()
156
- mixin._init_dask_client(**kwargs)
157
- try:
158
- yield mixin.dask_client
159
- finally:
160
- mixin._close_dask_client()
161
-
162
- # from contextlib import suppress
163
- # from dask.distributed import Client, get_client
164
- #
165
- # class DaskClientMixin:
166
- # """
167
- # Provides shared Dask client lifecycle management.
168
- # Ensures reuse of existing client when available, otherwise creates a lightweight local one.
169
- # """
170
- #
171
- # def _init_dask_client(self, dask_client=None, logger=None):
172
- # self.dask_client = dask_client
173
- # self.own_dask_client = False
174
- # self.logger = logger
175
- #
176
- # if self.dask_client is None:
177
- # with suppress(ValueError, RuntimeError):
178
- # # Try to attach to an existing active client if running inside a Dask context
179
- # self.dask_client = get_client()
180
- #
181
- # if self.dask_client is None:
182
- # # Start a local in-process scheduler for fallback
183
- # self.dask_client = Client(processes=False)
184
- # self.own_dask_client = True
185
- # if self.logger:
186
- # self.logger.info(f"Started local Dask client: {self.dask_client.dashboard_link}")
187
- # else:
188
- # if self.logger:
189
- # self.logger.debug(f"Using existing Dask client: {self.dask_client.dashboard_link}")
190
- #
191
- # def _close_dask_client(self):
192
- # """Close client only if this instance created it."""
193
- # if getattr(self, "own_dask_client", False) and self.dask_client is not None:
194
- # try:
195
- # self.dask_client.close()
196
- # if self.logger:
197
- # self.logger.info("Closed local Dask client.")
198
- # except Exception as e:
199
- # if self.logger:
200
- # self.logger.warning(f"Error while closing Dask client: {e}")
File without changes
File without changes