sibi-flux 2025.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sibi_dst/__init__.py +44 -0
- sibi_flux/__init__.py +49 -0
- sibi_flux/artifacts/__init__.py +7 -0
- sibi_flux/artifacts/base.py +166 -0
- sibi_flux/artifacts/parquet.py +360 -0
- sibi_flux/artifacts/parquet_engine/__init__.py +5 -0
- sibi_flux/artifacts/parquet_engine/executor.py +204 -0
- sibi_flux/artifacts/parquet_engine/manifest.py +101 -0
- sibi_flux/artifacts/parquet_engine/planner.py +544 -0
- sibi_flux/conf/settings.py +131 -0
- sibi_flux/core/__init__.py +5 -0
- sibi_flux/core/managed_resource/__init__.py +3 -0
- sibi_flux/core/managed_resource/_managed_resource.py +733 -0
- sibi_flux/core/type_maps/__init__.py +100 -0
- sibi_flux/dask_cluster/__init__.py +47 -0
- sibi_flux/dask_cluster/async_core.py +27 -0
- sibi_flux/dask_cluster/client_manager.py +549 -0
- sibi_flux/dask_cluster/core.py +322 -0
- sibi_flux/dask_cluster/exceptions.py +34 -0
- sibi_flux/dask_cluster/utils.py +49 -0
- sibi_flux/datacube/__init__.py +3 -0
- sibi_flux/datacube/_data_cube.py +332 -0
- sibi_flux/datacube/config_engine.py +152 -0
- sibi_flux/datacube/field_factory.py +48 -0
- sibi_flux/datacube/field_registry.py +122 -0
- sibi_flux/datacube/generator.py +677 -0
- sibi_flux/datacube/orchestrator.py +171 -0
- sibi_flux/dataset/__init__.py +3 -0
- sibi_flux/dataset/_dataset.py +162 -0
- sibi_flux/df_enricher/__init__.py +56 -0
- sibi_flux/df_enricher/async_enricher.py +201 -0
- sibi_flux/df_enricher/merger.py +253 -0
- sibi_flux/df_enricher/specs.py +45 -0
- sibi_flux/df_enricher/types.py +12 -0
- sibi_flux/df_helper/__init__.py +5 -0
- sibi_flux/df_helper/_df_helper.py +450 -0
- sibi_flux/df_helper/backends/__init__.py +34 -0
- sibi_flux/df_helper/backends/_params.py +173 -0
- sibi_flux/df_helper/backends/_strategies.py +295 -0
- sibi_flux/df_helper/backends/http/__init__.py +5 -0
- sibi_flux/df_helper/backends/http/_http_config.py +122 -0
- sibi_flux/df_helper/backends/parquet/__init__.py +7 -0
- sibi_flux/df_helper/backends/parquet/_parquet_options.py +268 -0
- sibi_flux/df_helper/backends/sqlalchemy/__init__.py +9 -0
- sibi_flux/df_helper/backends/sqlalchemy/_db_connection.py +256 -0
- sibi_flux/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
- sibi_flux/df_helper/backends/sqlalchemy/_io_dask.py +386 -0
- sibi_flux/df_helper/backends/sqlalchemy/_load_from_db.py +134 -0
- sibi_flux/df_helper/backends/sqlalchemy/_model_registry.py +239 -0
- sibi_flux/df_helper/backends/sqlalchemy/_sql_model_builder.py +42 -0
- sibi_flux/df_helper/backends/utils.py +32 -0
- sibi_flux/df_helper/core/__init__.py +15 -0
- sibi_flux/df_helper/core/_defaults.py +104 -0
- sibi_flux/df_helper/core/_filter_handler.py +617 -0
- sibi_flux/df_helper/core/_params_config.py +185 -0
- sibi_flux/df_helper/core/_query_config.py +17 -0
- sibi_flux/df_validator/__init__.py +3 -0
- sibi_flux/df_validator/_df_validator.py +222 -0
- sibi_flux/logger/__init__.py +1 -0
- sibi_flux/logger/_logger.py +480 -0
- sibi_flux/mcp/__init__.py +26 -0
- sibi_flux/mcp/client.py +150 -0
- sibi_flux/mcp/router.py +126 -0
- sibi_flux/orchestration/__init__.py +9 -0
- sibi_flux/orchestration/_artifact_orchestrator.py +346 -0
- sibi_flux/orchestration/_pipeline_executor.py +212 -0
- sibi_flux/osmnx_helper/__init__.py +22 -0
- sibi_flux/osmnx_helper/_pbf_handler.py +384 -0
- sibi_flux/osmnx_helper/graph_loader.py +225 -0
- sibi_flux/osmnx_helper/utils.py +100 -0
- sibi_flux/pipelines/__init__.py +3 -0
- sibi_flux/pipelines/base.py +218 -0
- sibi_flux/py.typed +0 -0
- sibi_flux/readers/__init__.py +3 -0
- sibi_flux/readers/base.py +82 -0
- sibi_flux/readers/parquet.py +106 -0
- sibi_flux/utils/__init__.py +53 -0
- sibi_flux/utils/boilerplate/__init__.py +19 -0
- sibi_flux/utils/boilerplate/base_attacher.py +45 -0
- sibi_flux/utils/boilerplate/base_cube_router.py +283 -0
- sibi_flux/utils/boilerplate/base_data_cube.py +132 -0
- sibi_flux/utils/boilerplate/base_pipeline_template.py +54 -0
- sibi_flux/utils/boilerplate/hybrid_data_loader.py +193 -0
- sibi_flux/utils/clickhouse_writer/__init__.py +6 -0
- sibi_flux/utils/clickhouse_writer/_clickhouse_writer.py +225 -0
- sibi_flux/utils/common.py +7 -0
- sibi_flux/utils/credentials/__init__.py +3 -0
- sibi_flux/utils/credentials/_config_manager.py +155 -0
- sibi_flux/utils/dask_utils.py +14 -0
- sibi_flux/utils/data_utils/__init__.py +3 -0
- sibi_flux/utils/data_utils/_data_utils.py +389 -0
- sibi_flux/utils/dataframe_utils.py +52 -0
- sibi_flux/utils/date_utils/__init__.py +10 -0
- sibi_flux/utils/date_utils/_business_days.py +220 -0
- sibi_flux/utils/date_utils/_date_utils.py +311 -0
- sibi_flux/utils/date_utils/_file_age_checker.py +319 -0
- sibi_flux/utils/file_utils.py +48 -0
- sibi_flux/utils/filepath_generator/__init__.py +5 -0
- sibi_flux/utils/filepath_generator/_filepath_generator.py +185 -0
- sibi_flux/utils/parquet_saver/__init__.py +6 -0
- sibi_flux/utils/parquet_saver/_parquet_saver.py +436 -0
- sibi_flux/utils/parquet_saver/_write_gatekeeper.py +33 -0
- sibi_flux/utils/retry.py +46 -0
- sibi_flux/utils/storage/__init__.py +7 -0
- sibi_flux/utils/storage/_fs_registry.py +112 -0
- sibi_flux/utils/storage/_storage_manager.py +257 -0
- sibi_flux/utils/storage/factory.py +33 -0
- sibi_flux-2025.12.0.dist-info/METADATA +283 -0
- sibi_flux-2025.12.0.dist-info/RECORD +110 -0
- sibi_flux-2025.12.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
# src/sibi_flux/core/managed_resource/_managed_resource.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import abc
|
|
5
|
+
import asyncio
|
|
6
|
+
import contextlib
|
|
7
|
+
import json
|
|
8
|
+
import threading
|
|
9
|
+
from typing import (
|
|
10
|
+
Any,
|
|
11
|
+
Awaitable,
|
|
12
|
+
Callable,
|
|
13
|
+
Dict,
|
|
14
|
+
Optional,
|
|
15
|
+
Protocol,
|
|
16
|
+
Self,
|
|
17
|
+
final,
|
|
18
|
+
runtime_checkable,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
import fsspec
|
|
22
|
+
|
|
23
|
+
from sibi_flux.logger import Logger
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ----------------------- SSE sink protocol -----------------------
|
|
27
|
+
@runtime_checkable
|
|
28
|
+
class SSESinkProtocol(Protocol):
|
|
29
|
+
"""
|
|
30
|
+
Structural protocol for an SSE sink.
|
|
31
|
+
|
|
32
|
+
Supported sink shapes:
|
|
33
|
+
- async send(event: str, data: dict)
|
|
34
|
+
- async put({"event": str, "data": str|dict})
|
|
35
|
+
- optional (a)close for lifecycle
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def send(self, event: str, data: Dict[str, Any]) -> Any: ...
|
|
39
|
+
def put(self, item: Dict[str, Any]) -> Any: ...
|
|
40
|
+
def aclose(self) -> Any: ...
|
|
41
|
+
def close(self) -> Any: ...
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# --------- Minimal built-in SSE sink (used when auto_sse=True) ----------
|
|
45
|
+
class _QueueSSE:
|
|
46
|
+
"""
|
|
47
|
+
Internal sink used when auto_sse=True.
|
|
48
|
+
|
|
49
|
+
Behaviour:
|
|
50
|
+
- Iteration yields dict items {"event": str, "data": str}
|
|
51
|
+
- send(event, data_dict) stores JSON string in `data`
|
|
52
|
+
- close/aclose wakes consumers deterministically
|
|
53
|
+
|
|
54
|
+
Additions:
|
|
55
|
+
- maxsize for optional backpressure
|
|
56
|
+
- close() is thread-safe *when the event loop is known*
|
|
57
|
+
- loop is bound lazily on first async usage/iteration
|
|
58
|
+
- aiter_sse() yields proper `text/event-stream` bytes
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
__slots__ = ("q", "_closed", "_loop", "_close_lock")
|
|
62
|
+
|
|
63
|
+
_CLOSE_ITEM = {"event": "__close__", "data": ""}
|
|
64
|
+
|
|
65
|
+
def __init__(self, *, maxsize: int = 0) -> None:
|
|
66
|
+
# maxsize=0 => unbounded (as asyncio.Queue default behaviour).
|
|
67
|
+
self.q: asyncio.Queue[Dict[str, Any]] = asyncio.Queue(maxsize=maxsize)
|
|
68
|
+
self._closed = False
|
|
69
|
+
self._close_lock = threading.Lock()
|
|
70
|
+
self._loop: Optional[asyncio.AbstractEventLoop] = None # bound lazily
|
|
71
|
+
|
|
72
|
+
def _bind_loop_if_needed(self) -> None:
|
|
73
|
+
if self._loop is not None:
|
|
74
|
+
return
|
|
75
|
+
try:
|
|
76
|
+
self._loop = asyncio.get_running_loop()
|
|
77
|
+
except RuntimeError:
|
|
78
|
+
# Not in a running loop; we'll try again later.
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
def _is_closed(self) -> bool:
|
|
82
|
+
with self._close_lock:
|
|
83
|
+
return self._closed
|
|
84
|
+
|
|
85
|
+
async def send(self, event: str, data: Dict[str, Any]) -> None:
|
|
86
|
+
self._bind_loop_if_needed()
|
|
87
|
+
if self._is_closed():
|
|
88
|
+
return
|
|
89
|
+
await self.q.put({"event": event, "data": json.dumps(data)})
|
|
90
|
+
|
|
91
|
+
async def put(self, item: Dict[str, Any]) -> None:
|
|
92
|
+
self._bind_loop_if_needed()
|
|
93
|
+
if self._is_closed():
|
|
94
|
+
return
|
|
95
|
+
await self.q.put(item)
|
|
96
|
+
|
|
97
|
+
async def aclose(self) -> None:
|
|
98
|
+
self._bind_loop_if_needed()
|
|
99
|
+
with self._close_lock:
|
|
100
|
+
if self._closed:
|
|
101
|
+
return
|
|
102
|
+
self._closed = True
|
|
103
|
+
|
|
104
|
+
with contextlib.suppress(Exception):
|
|
105
|
+
await self.q.put(dict(self._CLOSE_ITEM))
|
|
106
|
+
|
|
107
|
+
def close(self) -> None:
|
|
108
|
+
with self._close_lock:
|
|
109
|
+
if self._closed:
|
|
110
|
+
return
|
|
111
|
+
self._closed = True
|
|
112
|
+
|
|
113
|
+
close_item = dict(self._CLOSE_ITEM)
|
|
114
|
+
|
|
115
|
+
# asyncio.Queue is not thread-safe; if we have a loop, schedule insertion there.
|
|
116
|
+
loop = self._loop
|
|
117
|
+
if loop is not None and loop.is_running():
|
|
118
|
+
|
|
119
|
+
def _signal() -> None:
|
|
120
|
+
try:
|
|
121
|
+
self.q.put_nowait(close_item)
|
|
122
|
+
except asyncio.QueueFull:
|
|
123
|
+
|
|
124
|
+
async def _aput() -> None:
|
|
125
|
+
with contextlib.suppress(Exception):
|
|
126
|
+
await self.q.put(close_item)
|
|
127
|
+
|
|
128
|
+
asyncio.create_task(_aput())
|
|
129
|
+
|
|
130
|
+
loop.call_soon_threadsafe(_signal)
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
# Best-effort fallback when no loop is known.
|
|
134
|
+
with contextlib.suppress(Exception):
|
|
135
|
+
self.q.put_nowait(close_item)
|
|
136
|
+
|
|
137
|
+
def __aiter__(self):
|
|
138
|
+
return self._iter_items()
|
|
139
|
+
|
|
140
|
+
async def _iter_items(self):
|
|
141
|
+
self._bind_loop_if_needed()
|
|
142
|
+
while True:
|
|
143
|
+
item = await self.q.get()
|
|
144
|
+
try:
|
|
145
|
+
if item.get("event") == "__close__":
|
|
146
|
+
break
|
|
147
|
+
yield item
|
|
148
|
+
finally:
|
|
149
|
+
with contextlib.suppress(Exception):
|
|
150
|
+
self.q.task_done()
|
|
151
|
+
|
|
152
|
+
async def aiter_sse(self):
|
|
153
|
+
"""
|
|
154
|
+
SSE wire format iterator for ASGI StreamingResponse:
|
|
155
|
+
event: <name>\n
|
|
156
|
+
data: <payload>\n
|
|
157
|
+
\n
|
|
158
|
+
"""
|
|
159
|
+
async for item in self:
|
|
160
|
+
event = item.get("event", "")
|
|
161
|
+
data = item.get("data", "")
|
|
162
|
+
|
|
163
|
+
# Preserve existing convention: data stored as JSON string.
|
|
164
|
+
if not isinstance(data, str):
|
|
165
|
+
data = json.dumps(data)
|
|
166
|
+
|
|
167
|
+
lines = data.splitlines() or [""]
|
|
168
|
+
out: list[str] = []
|
|
169
|
+
if event:
|
|
170
|
+
out.append(f"event: {event}\n")
|
|
171
|
+
for line in lines:
|
|
172
|
+
out.append(f"data: {line}\n")
|
|
173
|
+
out.append("\n")
|
|
174
|
+
yield "".join(out).encode("utf-8")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ------------------------------ ManagedResource ------------------------------
|
|
178
|
+
class ManagedResource(abc.ABC):
|
|
179
|
+
"""
|
|
180
|
+
ManagedResource
|
|
181
|
+
|
|
182
|
+
A lifecycle base class that standardizes:
|
|
183
|
+
- sync close() and async aclose()
|
|
184
|
+
- optional filesystem attachment (fsspec)
|
|
185
|
+
- optional SSE emission for progress/log events
|
|
186
|
+
- safe idempotent cleanup
|
|
187
|
+
- conservative GC finalizer (optional)
|
|
188
|
+
|
|
189
|
+
Subclass hooks:
|
|
190
|
+
- _cleanup(): sync cleanup hook
|
|
191
|
+
- _acleanup(): async cleanup hook
|
|
192
|
+
|
|
193
|
+
Notes:
|
|
194
|
+
- close() is thread-safe.
|
|
195
|
+
- aclose() is intended for use within a single event loop.
|
|
196
|
+
- Do not rely on the finalizer for important cleanup; treat it as best-effort.
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
__slots__ = (
|
|
200
|
+
"verbose",
|
|
201
|
+
"debug",
|
|
202
|
+
"_log_cleanup_errors",
|
|
203
|
+
"logger",
|
|
204
|
+
"_owns_logger",
|
|
205
|
+
"fs",
|
|
206
|
+
"_fs_factory",
|
|
207
|
+
"_owns_fs",
|
|
208
|
+
"_sse",
|
|
209
|
+
"_sse_factory",
|
|
210
|
+
"_owns_sse",
|
|
211
|
+
"_emitter",
|
|
212
|
+
"_auto_sse",
|
|
213
|
+
"_auto_sse_maxsize",
|
|
214
|
+
"_emit_timeout_s",
|
|
215
|
+
"_emit_drop_on_timeout",
|
|
216
|
+
"_validate_emitter",
|
|
217
|
+
"_is_closed",
|
|
218
|
+
"_closing",
|
|
219
|
+
"_close_lock",
|
|
220
|
+
"_aclose_lock",
|
|
221
|
+
"_finalizer_enabled",
|
|
222
|
+
"__weakref__",
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
def __init__(
|
|
226
|
+
self,
|
|
227
|
+
*,
|
|
228
|
+
verbose: bool = False,
|
|
229
|
+
debug: bool = False,
|
|
230
|
+
log_cleanup_errors: bool = True,
|
|
231
|
+
logger: Optional[Logger] = None,
|
|
232
|
+
fs: Optional[fsspec.AbstractFileSystem] = None,
|
|
233
|
+
fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None,
|
|
234
|
+
emitter: Optional[Callable[[str, Dict[str, Any]], Awaitable[None]]] = None,
|
|
235
|
+
emitter_factory: Optional[
|
|
236
|
+
Callable[[], Callable[[str, Dict[str, Any]], Awaitable[None]]]
|
|
237
|
+
] = None,
|
|
238
|
+
sse: Optional[SSESinkProtocol] = None,
|
|
239
|
+
sse_factory: Optional[Callable[[], SSESinkProtocol]] = None,
|
|
240
|
+
auto_sse: bool = False,
|
|
241
|
+
auto_sse_maxsize: int = 0,
|
|
242
|
+
emit_timeout_s: Optional[float] = None,
|
|
243
|
+
emit_drop_on_timeout: bool = True,
|
|
244
|
+
validate_emitter: bool = False,
|
|
245
|
+
finalizer_enabled: bool = True,
|
|
246
|
+
**_: object,
|
|
247
|
+
) -> None:
|
|
248
|
+
self.verbose = verbose
|
|
249
|
+
self.debug = debug
|
|
250
|
+
self._log_cleanup_errors = log_cleanup_errors
|
|
251
|
+
|
|
252
|
+
self._emit_timeout_s = emit_timeout_s
|
|
253
|
+
self._emit_drop_on_timeout = emit_drop_on_timeout
|
|
254
|
+
self._validate_emitter = validate_emitter
|
|
255
|
+
|
|
256
|
+
self._is_closed = False
|
|
257
|
+
self._closing = False
|
|
258
|
+
self._close_lock = threading.RLock()
|
|
259
|
+
self._aclose_lock = asyncio.Lock()
|
|
260
|
+
|
|
261
|
+
# ---------- Logger ----------
|
|
262
|
+
if logger is None:
|
|
263
|
+
self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
|
|
264
|
+
self._owns_logger = False # default logger is shared (stdlib getLogger)
|
|
265
|
+
level = (
|
|
266
|
+
Logger.DEBUG
|
|
267
|
+
if self.debug
|
|
268
|
+
else (Logger.INFO if self.verbose else Logger.WARNING)
|
|
269
|
+
)
|
|
270
|
+
self.logger.set_level(level)
|
|
271
|
+
else:
|
|
272
|
+
self.logger = logger
|
|
273
|
+
self._owns_logger = False
|
|
274
|
+
|
|
275
|
+
# ---------- FS ----------
|
|
276
|
+
self.fs: Optional[fsspec.AbstractFileSystem] = None
|
|
277
|
+
self._fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None
|
|
278
|
+
self._owns_fs = False
|
|
279
|
+
|
|
280
|
+
if fs is not None:
|
|
281
|
+
if not isinstance(fs, fsspec.AbstractFileSystem):
|
|
282
|
+
raise TypeError(
|
|
283
|
+
f"fs must be an fsspec.AbstractFileSystem, got {type(fs)!r}"
|
|
284
|
+
)
|
|
285
|
+
self.fs = fs
|
|
286
|
+
elif fs_factory is not None:
|
|
287
|
+
if not callable(fs_factory):
|
|
288
|
+
raise TypeError("fs_factory must be callable")
|
|
289
|
+
self._fs_factory = fs_factory
|
|
290
|
+
self._owns_fs = True
|
|
291
|
+
|
|
292
|
+
# ---------- SSE / emitter ----------
|
|
293
|
+
self._sse: Optional[SSESinkProtocol] = None
|
|
294
|
+
self._sse_factory: Optional[Callable[[], SSESinkProtocol]] = None
|
|
295
|
+
self._owns_sse = False
|
|
296
|
+
self._auto_sse = auto_sse
|
|
297
|
+
self._auto_sse_maxsize = auto_sse_maxsize
|
|
298
|
+
|
|
299
|
+
self._emitter: Optional[Callable[[str, Dict[str, Any]], Awaitable[None]]] = None
|
|
300
|
+
if emitter is not None:
|
|
301
|
+
self._emitter = emitter
|
|
302
|
+
elif emitter_factory is not None:
|
|
303
|
+
if not callable(emitter_factory):
|
|
304
|
+
raise TypeError("emitter_factory must be callable")
|
|
305
|
+
self._emitter = emitter_factory()
|
|
306
|
+
|
|
307
|
+
if sse is not None:
|
|
308
|
+
self._sse = sse
|
|
309
|
+
self._emitter = self._emitter or self._build_emitter(sse)
|
|
310
|
+
elif sse_factory is not None:
|
|
311
|
+
if not callable(sse_factory):
|
|
312
|
+
raise TypeError("sse_factory must be callable")
|
|
313
|
+
self._sse_factory = sse_factory
|
|
314
|
+
self._owns_sse = True
|
|
315
|
+
|
|
316
|
+
if (
|
|
317
|
+
self._auto_sse
|
|
318
|
+
and self._sse is None
|
|
319
|
+
and self._emitter is None
|
|
320
|
+
and self._sse_factory is None
|
|
321
|
+
):
|
|
322
|
+
self._create_auto_sse()
|
|
323
|
+
|
|
324
|
+
# ---------- Finalizer ----------
|
|
325
|
+
# Replaced broken weakref.finalize with __del__
|
|
326
|
+
self._finalizer_enabled = finalizer_enabled
|
|
327
|
+
|
|
328
|
+
if self.debug:
|
|
329
|
+
with contextlib.suppress(Exception):
|
|
330
|
+
self.logger.debug(
|
|
331
|
+
"Initialised %s %s", self.__class__.__name__, repr(self)
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# ----------------- Properties -----------------
|
|
335
|
+
@property
|
|
336
|
+
def closed(self) -> bool:
|
|
337
|
+
return self._is_closed
|
|
338
|
+
|
|
339
|
+
@property
|
|
340
|
+
def has_fs(self) -> bool:
|
|
341
|
+
return self.fs is not None or self._fs_factory is not None
|
|
342
|
+
|
|
343
|
+
@property
|
|
344
|
+
def has_sse(self) -> bool:
|
|
345
|
+
return (
|
|
346
|
+
(self._emitter is not None)
|
|
347
|
+
or (self._sse is not None)
|
|
348
|
+
or self._auto_sse
|
|
349
|
+
or (self._sse_factory is not None)
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
def __repr__(self) -> str:
|
|
353
|
+
def _status(current: bool, factory: bool, owned: bool) -> str:
|
|
354
|
+
if current:
|
|
355
|
+
return "own" if owned else "external"
|
|
356
|
+
if factory:
|
|
357
|
+
return "own(lazy)"
|
|
358
|
+
return "none"
|
|
359
|
+
|
|
360
|
+
fs_status = _status(
|
|
361
|
+
self.fs is not None, self._fs_factory is not None, self._owns_fs
|
|
362
|
+
)
|
|
363
|
+
sse_current = (self._sse is not None) or (self._emitter is not None)
|
|
364
|
+
sse_factory = (self._sse_factory is not None) or self._auto_sse
|
|
365
|
+
sse_status = _status(sse_current, sse_factory, self._owns_sse or self._auto_sse)
|
|
366
|
+
return (
|
|
367
|
+
f"<{self.__class__.__name__} debug={self.debug} verbose={self.verbose} "
|
|
368
|
+
f"log_cleanup_errors={self._log_cleanup_errors} fs={fs_status} sse={sse_status}>"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# ----------------- Hooks -----------------
|
|
372
|
+
def _cleanup(self) -> None:
|
|
373
|
+
return
|
|
374
|
+
|
|
375
|
+
async def _acleanup(self) -> None:
|
|
376
|
+
return
|
|
377
|
+
|
|
378
|
+
def _assert_open(self) -> None:
|
|
379
|
+
if self._is_closed or self._closing:
|
|
380
|
+
raise RuntimeError(f"{self.__class__.__name__} is closed")
|
|
381
|
+
|
|
382
|
+
# ----------------- FS -----------------
|
|
383
|
+
def set_fs_factory(
|
|
384
|
+
self, factory: Optional[Callable[[], fsspec.AbstractFileSystem]]
|
|
385
|
+
) -> None:
|
|
386
|
+
with self._close_lock:
|
|
387
|
+
self._assert_open()
|
|
388
|
+
if self.fs is not None:
|
|
389
|
+
return
|
|
390
|
+
if factory is not None and not callable(factory):
|
|
391
|
+
raise TypeError("fs_factory must be callable")
|
|
392
|
+
self._fs_factory = factory
|
|
393
|
+
self._owns_fs = factory is not None
|
|
394
|
+
|
|
395
|
+
def _ensure_fs(self) -> Optional[fsspec.AbstractFileSystem]:
|
|
396
|
+
with self._close_lock:
|
|
397
|
+
self._assert_open()
|
|
398
|
+
if self.fs is not None:
|
|
399
|
+
return self.fs
|
|
400
|
+
if self._fs_factory is None:
|
|
401
|
+
return None
|
|
402
|
+
fs_new = self._fs_factory()
|
|
403
|
+
if not isinstance(fs_new, fsspec.AbstractFileSystem):
|
|
404
|
+
raise TypeError(
|
|
405
|
+
f"fs_factory() must return fsspec.AbstractFileSystem, got {type(fs_new)!r}"
|
|
406
|
+
)
|
|
407
|
+
self.fs = fs_new
|
|
408
|
+
return self.fs
|
|
409
|
+
|
|
410
|
+
def require_fs(self) -> fsspec.AbstractFileSystem:
|
|
411
|
+
fs = self._ensure_fs()
|
|
412
|
+
if fs is None:
|
|
413
|
+
raise RuntimeError(
|
|
414
|
+
f"{self.__class__.__name__}: filesystem is required but not configured"
|
|
415
|
+
)
|
|
416
|
+
return fs
|
|
417
|
+
|
|
418
|
+
# ----------------- SSE -----------------
|
|
419
|
+
def _create_auto_sse(self) -> None:
|
|
420
|
+
sink = _QueueSSE(maxsize=self._auto_sse_maxsize)
|
|
421
|
+
self._sse = sink
|
|
422
|
+
self._owns_sse = True
|
|
423
|
+
self._emitter = self._build_emitter(sink)
|
|
424
|
+
|
|
425
|
+
def set_sse_factory(self, factory: Optional[Callable[[], SSESinkProtocol]]) -> None:
|
|
426
|
+
with self._close_lock:
|
|
427
|
+
self._assert_open()
|
|
428
|
+
if self._sse is not None or self._emitter is not None:
|
|
429
|
+
return
|
|
430
|
+
if factory is not None and not callable(factory):
|
|
431
|
+
raise TypeError("sse_factory must be callable")
|
|
432
|
+
self._sse_factory = factory
|
|
433
|
+
self._owns_sse = factory is not None
|
|
434
|
+
|
|
435
|
+
def _ensure_sse(self) -> Optional[SSESinkProtocol]:
|
|
436
|
+
with self._close_lock:
|
|
437
|
+
# Allow retrieving already-created SSE even after close (useful for draining).
|
|
438
|
+
if self._sse is not None:
|
|
439
|
+
return self._sse
|
|
440
|
+
|
|
441
|
+
self._assert_open()
|
|
442
|
+
|
|
443
|
+
if self._sse_factory is not None:
|
|
444
|
+
sink = self._sse_factory()
|
|
445
|
+
self._sse = sink
|
|
446
|
+
self._owns_sse = True
|
|
447
|
+
if self._emitter is None:
|
|
448
|
+
self._emitter = self._build_emitter(sink)
|
|
449
|
+
return self._sse
|
|
450
|
+
|
|
451
|
+
if self._auto_sse and self._emitter is None:
|
|
452
|
+
self._create_auto_sse()
|
|
453
|
+
return self._sse
|
|
454
|
+
|
|
455
|
+
return None
|
|
456
|
+
|
|
457
|
+
def get_sse(self) -> Optional[SSESinkProtocol]:
|
|
458
|
+
return self._ensure_sse()
|
|
459
|
+
|
|
460
|
+
# ----------------- Emitter building/validation -----------------
|
|
461
|
+
def _signature_accepts(
|
|
462
|
+
self, fn: Callable[..., Any], /, *args: Any, **kwargs: Any
|
|
463
|
+
) -> bool:
|
|
464
|
+
import inspect
|
|
465
|
+
|
|
466
|
+
try:
|
|
467
|
+
sig = inspect.signature(fn)
|
|
468
|
+
sig.bind(*args, **kwargs)
|
|
469
|
+
return True
|
|
470
|
+
except (ValueError, TypeError):
|
|
471
|
+
return False
|
|
472
|
+
|
|
473
|
+
def _validate_send(self, send: Callable[..., Any]) -> bool:
|
|
474
|
+
return self._signature_accepts(
|
|
475
|
+
send, "evt", {"k": "v"}
|
|
476
|
+
) or self._signature_accepts(send, event="evt", data={"k": "v"})
|
|
477
|
+
|
|
478
|
+
def _validate_put(self, put: Callable[..., Any]) -> bool:
|
|
479
|
+
return self._signature_accepts(put, {"event": "evt", "data": "{}"})
|
|
480
|
+
|
|
481
|
+
def _build_emitter(
|
|
482
|
+
self, sink: object
|
|
483
|
+
) -> Callable[[str, Dict[str, Any]], Awaitable[None]]:
|
|
484
|
+
send = getattr(sink, "send", None)
|
|
485
|
+
put = getattr(sink, "put", None)
|
|
486
|
+
|
|
487
|
+
if callable(send) and (not self._validate_emitter or self._validate_send(send)):
|
|
488
|
+
if asyncio.iscoroutinefunction(send):
|
|
489
|
+
|
|
490
|
+
async def _emit(event: str, payload: Dict[str, Any]) -> None:
|
|
491
|
+
await send(event, payload)
|
|
492
|
+
|
|
493
|
+
return _emit
|
|
494
|
+
|
|
495
|
+
async def _emit(event: str, payload: Dict[str, Any]) -> None:
|
|
496
|
+
await asyncio.to_thread(send, event, payload)
|
|
497
|
+
|
|
498
|
+
return _emit
|
|
499
|
+
|
|
500
|
+
if callable(put) and (not self._validate_emitter or self._validate_put(put)):
|
|
501
|
+
if asyncio.iscoroutinefunction(put):
|
|
502
|
+
|
|
503
|
+
async def _emit(event: str, payload: Dict[str, Any]) -> None:
|
|
504
|
+
await put({"event": event, "data": json.dumps(payload)})
|
|
505
|
+
|
|
506
|
+
return _emit
|
|
507
|
+
|
|
508
|
+
async def _emit(event: str, payload: Dict[str, Any]) -> None:
|
|
509
|
+
await asyncio.to_thread(
|
|
510
|
+
put, {"event": event, "data": json.dumps(payload)}
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
return _emit
|
|
514
|
+
|
|
515
|
+
raise TypeError(
|
|
516
|
+
f"{self.__class__.__name__}: SSE sink must expose compatible send(event, data) or put(item)"
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
async def emit(self, event: str, **data: Any) -> None:
|
|
520
|
+
if self._is_closed or self._closing:
|
|
521
|
+
return
|
|
522
|
+
|
|
523
|
+
if self._emitter is None:
|
|
524
|
+
self._ensure_sse()
|
|
525
|
+
|
|
526
|
+
emitter = self._emitter
|
|
527
|
+
if emitter is None:
|
|
528
|
+
return
|
|
529
|
+
|
|
530
|
+
try:
|
|
531
|
+
if self._emit_timeout_s is None:
|
|
532
|
+
await emitter(event, data)
|
|
533
|
+
else:
|
|
534
|
+
await asyncio.wait_for(
|
|
535
|
+
emitter(event, data), timeout=self._emit_timeout_s
|
|
536
|
+
)
|
|
537
|
+
except asyncio.TimeoutError:
|
|
538
|
+
if not self._emit_drop_on_timeout:
|
|
539
|
+
raise
|
|
540
|
+
if self._log_cleanup_errors:
|
|
541
|
+
with contextlib.suppress(Exception):
|
|
542
|
+
self.logger.warning("emit(%r) timed out; event dropped", event)
|
|
543
|
+
except Exception:
|
|
544
|
+
if self._log_cleanup_errors:
|
|
545
|
+
with contextlib.suppress(Exception):
|
|
546
|
+
self.logger.error(
|
|
547
|
+
"Error emitting SSE event %r", event, exc_info=self.debug
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
# ----------------- Shutdown helpers -----------------
|
|
551
|
+
def _release_owned_fs(self) -> None:
|
|
552
|
+
if self._owns_fs and self.fs is not None:
|
|
553
|
+
close = getattr(self.fs, "close", None)
|
|
554
|
+
with contextlib.suppress(Exception):
|
|
555
|
+
if callable(close):
|
|
556
|
+
close()
|
|
557
|
+
self.fs = None
|
|
558
|
+
|
|
559
|
+
async def _aclose_obj(self, obj: object, timeout: float = 1.0) -> None:
|
|
560
|
+
# Prefer aclose(); only fall back to close() if aclose isn't available or fails.
|
|
561
|
+
aclose = getattr(obj, "aclose", None)
|
|
562
|
+
if callable(aclose):
|
|
563
|
+
try:
|
|
564
|
+
out = aclose()
|
|
565
|
+
if asyncio.iscoroutine(out):
|
|
566
|
+
await asyncio.wait_for(out, timeout=timeout)
|
|
567
|
+
return
|
|
568
|
+
except Exception:
|
|
569
|
+
if self.debug:
|
|
570
|
+
with contextlib.suppress(Exception):
|
|
571
|
+
self.logger.debug("Error in _aclose_obj", exc_info=True)
|
|
572
|
+
|
|
573
|
+
close = getattr(obj, "close", None)
|
|
574
|
+
if callable(close):
|
|
575
|
+
with contextlib.suppress(Exception):
|
|
576
|
+
await asyncio.to_thread(close)
|
|
577
|
+
|
|
578
|
+
def _shutdown_logger(self) -> None:
|
|
579
|
+
if not self._owns_logger:
|
|
580
|
+
return
|
|
581
|
+
with contextlib.suppress(Exception):
|
|
582
|
+
shutdown = getattr(self.logger, "shutdown", None)
|
|
583
|
+
if callable(shutdown):
|
|
584
|
+
shutdown()
|
|
585
|
+
return
|
|
586
|
+
with contextlib.suppress(Exception):
|
|
587
|
+
close_handlers = getattr(self.logger, "close_handlers", None)
|
|
588
|
+
if callable(close_handlers):
|
|
589
|
+
close_handlers()
|
|
590
|
+
|
|
591
|
+
def _shutdown_owned_resources_sync(self) -> None:
|
|
592
|
+
self._release_owned_fs()
|
|
593
|
+
if self._owns_sse and self._sse is not None:
|
|
594
|
+
with contextlib.suppress(Exception):
|
|
595
|
+
close = getattr(self._sse, "close", None)
|
|
596
|
+
if callable(close):
|
|
597
|
+
close()
|
|
598
|
+
self._sse = None
|
|
599
|
+
self._emitter = None
|
|
600
|
+
self._shutdown_logger()
|
|
601
|
+
|
|
602
|
+
async def _shutdown_owned_resources_async(self) -> None:
|
|
603
|
+
self._release_owned_fs()
|
|
604
|
+
if self._owns_sse and self._sse is not None:
|
|
605
|
+
await self._aclose_obj(self._sse)
|
|
606
|
+
self._sse = None
|
|
607
|
+
self._emitter = None
|
|
608
|
+
self._shutdown_logger()
|
|
609
|
+
|
|
610
|
+
# ----------------- Public lifecycle -----------------
|
|
611
|
+
@final
|
|
612
|
+
def close(self, *, suppress_errors: bool = False) -> None:
|
|
613
|
+
# Guard against partial initialization failure
|
|
614
|
+
lock = getattr(self, "_close_lock", None)
|
|
615
|
+
if lock is None:
|
|
616
|
+
return
|
|
617
|
+
|
|
618
|
+
with lock:
|
|
619
|
+
if self._is_closed or self._closing:
|
|
620
|
+
return
|
|
621
|
+
self._closing = True
|
|
622
|
+
|
|
623
|
+
try:
|
|
624
|
+
self._cleanup()
|
|
625
|
+
except Exception:
|
|
626
|
+
if self._log_cleanup_errors:
|
|
627
|
+
with contextlib.suppress(Exception):
|
|
628
|
+
self.logger.error(
|
|
629
|
+
"Error during %s._cleanup()",
|
|
630
|
+
self.__class__.__name__,
|
|
631
|
+
exc_info=self.debug,
|
|
632
|
+
)
|
|
633
|
+
if not suppress_errors:
|
|
634
|
+
raise
|
|
635
|
+
finally:
|
|
636
|
+
# Log before flipping debug (if you keep this policy)
|
|
637
|
+
if self.debug:
|
|
638
|
+
with contextlib.suppress(Exception):
|
|
639
|
+
self.logger.debug("Component %s closed.", self.__class__.__name__)
|
|
640
|
+
|
|
641
|
+
with lock:
|
|
642
|
+
self._is_closed = True
|
|
643
|
+
self._closing = False
|
|
644
|
+
self.debug = False # optional policy
|
|
645
|
+
|
|
646
|
+
self._shutdown_owned_resources_sync()
|
|
647
|
+
|
|
648
|
+
if getattr(self, "_finalizer_enabled", False):
|
|
649
|
+
pass
|
|
650
|
+
|
|
651
|
+
async def aclose(
|
|
652
|
+
self,
|
|
653
|
+
*,
|
|
654
|
+
suppress_errors: bool = True,
|
|
655
|
+
run_sync_cleanup_if_missing: bool = True,
|
|
656
|
+
) -> None:
|
|
657
|
+
async with self._aclose_lock:
|
|
658
|
+
with self._close_lock:
|
|
659
|
+
if self._is_closed or self._closing:
|
|
660
|
+
return
|
|
661
|
+
self._closing = True
|
|
662
|
+
|
|
663
|
+
try:
|
|
664
|
+
uses_default_acleanup = (
|
|
665
|
+
type(self)._acleanup is ManagedResource._acleanup
|
|
666
|
+
)
|
|
667
|
+
if run_sync_cleanup_if_missing and uses_default_acleanup:
|
|
668
|
+
await asyncio.to_thread(self._cleanup)
|
|
669
|
+
else:
|
|
670
|
+
await self._acleanup()
|
|
671
|
+
except Exception:
|
|
672
|
+
if self._log_cleanup_errors:
|
|
673
|
+
with contextlib.suppress(Exception):
|
|
674
|
+
self.logger.error(
|
|
675
|
+
"Error during %s._acleanup()",
|
|
676
|
+
self.__class__.__name__,
|
|
677
|
+
exc_info=self.debug,
|
|
678
|
+
)
|
|
679
|
+
if not suppress_errors:
|
|
680
|
+
raise
|
|
681
|
+
finally:
|
|
682
|
+
# Log before flipping debug (if you keep this policy)
|
|
683
|
+
if self.debug:
|
|
684
|
+
with contextlib.suppress(Exception):
|
|
685
|
+
self.logger.debug(
|
|
686
|
+
"Async component %s closed.", self.__class__.__name__
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
with self._close_lock:
|
|
690
|
+
self._is_closed = True
|
|
691
|
+
self._closing = False
|
|
692
|
+
self.debug = False # optional policy
|
|
693
|
+
|
|
694
|
+
await self._shutdown_owned_resources_async()
|
|
695
|
+
|
|
696
|
+
if self._finalizer_enabled:
|
|
697
|
+
# No-op: __del__ handles safety, close() handles explicit
|
|
698
|
+
pass
|
|
699
|
+
|
|
700
|
+
# ----------------- Context managers -----------------
|
|
701
|
+
@final
|
|
702
|
+
def __enter__(self) -> Self:
|
|
703
|
+
return self
|
|
704
|
+
|
|
705
|
+
@final
|
|
706
|
+
def __exit__(
|
|
707
|
+
self,
|
|
708
|
+
exc_type: type[BaseException] | None,
|
|
709
|
+
exc_val: BaseException | None,
|
|
710
|
+
exc_tb: Any | None,
|
|
711
|
+
) -> None:
|
|
712
|
+
self.close()
|
|
713
|
+
return False
|
|
714
|
+
|
|
715
|
+
async def __aenter__(self) -> Self:
|
|
716
|
+
return self
|
|
717
|
+
|
|
718
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
|
|
719
|
+
await self.aclose()
|
|
720
|
+
return False
|
|
721
|
+
|
|
722
|
+
# ----------------- Finalizer -----------------
|
|
723
|
+
def __del__(self) -> None:
|
|
724
|
+
"""Safe finalizer for resource cleanup."""
|
|
725
|
+
if getattr(self, "_finalizer_enabled", False):
|
|
726
|
+
# We must be very careful in __del__
|
|
727
|
+
# Only call close() if the object is still fundamentally intact
|
|
728
|
+
try:
|
|
729
|
+
if not getattr(self, "_is_closed", True):
|
|
730
|
+
self.close()
|
|
731
|
+
except Exception:
|
|
732
|
+
# Never raise in __del__
|
|
733
|
+
pass
|