sibi-dst 2025.9.4__tar.gz → 2025.9.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/PKG-INFO +1 -1
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/pyproject.toml +1 -1
- sibi_dst-2025.9.6/sibi_dst/tests/test_baseclass.py +403 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/base.py +0 -254
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/__init__.py +4 -1
- sibi_dst-2025.9.6/sibi_dst/utils/boilerplate/hybrid_data_loader.py +144 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/clickhouse_writer.py +138 -13
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/dask_utils.py +1 -1
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/README.md +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_artifact_updater_async.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_artifact_updater_threaded.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_parquet_reader.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/_defaults.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/data_cleaner.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/geopy_helper/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/geopy_helper/utils.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/route_path_builder.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/utils.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/tests/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/async_utils.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/base_attacher.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/base_data_cube.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/base_parquet_artifact.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/base_parquet_reader.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/business_days.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/credentials.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/data_from_http_source.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/data_utils.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/data_wrapper.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/date_utils.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/df_utils.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/file_age_checker.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/file_utils.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/filepath_generator.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/iceberg_saver.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/log_utils.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/manifest_manager.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/parquet_saver.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/periods.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/phone_formatter.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/progress/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/progress/jobs.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/progress/sse_runner.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/storage_config.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/storage_hive.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/storage_manager.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/update_planner.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/webdav_client.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/write_gatekeeper.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/utils/__init__.py +0 -0
- {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -0,0 +1,403 @@
|
|
1
|
+
import asyncio
|
2
|
+
import json
|
3
|
+
import threading
|
4
|
+
from typing import Any, Dict
|
5
|
+
from unittest.mock import MagicMock
|
6
|
+
|
7
|
+
import fsspec
|
8
|
+
|
9
|
+
from sibi_dst.utils import Logger
|
10
|
+
from sibi_dst.utils import ManagedResource
|
11
|
+
from sibi_dst.utils.base import _QueueSSE # Replace 'your_module' with actual module name
|
12
|
+
|
13
|
+
|
14
|
+
# ------------------------------ Test Fixtures ------------------------------
|
15
|
+
|
16
|
+
class TestResource(ManagedResource):
|
17
|
+
def __init__(self, *args, **kwargs):
|
18
|
+
super().__init__(*args, **kwargs)
|
19
|
+
self.cleanup_called = False
|
20
|
+
self.acleanup_called = False
|
21
|
+
|
22
|
+
def _cleanup(self) -> None:
|
23
|
+
self.cleanup_called = True
|
24
|
+
super()._cleanup()
|
25
|
+
|
26
|
+
async def _acleanup(self) -> None:
|
27
|
+
self.acleanup_called = True
|
28
|
+
await super()._acleanup()
|
29
|
+
|
30
|
+
|
31
|
+
class MockSSESink:
|
32
|
+
def __init__(self):
|
33
|
+
self.events = []
|
34
|
+
self.closed = False
|
35
|
+
|
36
|
+
async def send(self, event: str, data: Dict[str, Any]) -> None:
|
37
|
+
self.events.append({"event": event, "data": data})
|
38
|
+
|
39
|
+
async def aclose(self) -> None:
|
40
|
+
self.closed = True
|
41
|
+
|
42
|
+
|
43
|
+
class MockSyncSSESink:
|
44
|
+
def __init__(self):
|
45
|
+
self.events = []
|
46
|
+
self.closed = False
|
47
|
+
|
48
|
+
def send(self, event: str, data: Dict[str, Any]) -> None:
|
49
|
+
self.events.append({"event": event, "data": data})
|
50
|
+
|
51
|
+
def close(self) -> None:
|
52
|
+
self.closed = True
|
53
|
+
|
54
|
+
|
55
|
+
# ------------------------------ Mock fsspec filesystem ------------------------------
|
56
|
+
|
57
|
+
class MockFileSystem(fsspec.AbstractFileSystem):
|
58
|
+
def __init__(self, **kwargs):
|
59
|
+
super().__init__(**kwargs)
|
60
|
+
self.closed = False
|
61
|
+
|
62
|
+
def close(self):
|
63
|
+
self.closed = True
|
64
|
+
|
65
|
+
|
66
|
+
# ------------------------------ Utility for Event Loop ------------------------------
|
67
|
+
|
68
|
+
def run_async_test(coro):
|
69
|
+
"""Run async test safely in different environments."""
|
70
|
+
try:
|
71
|
+
# Try to get existing event loop (for Jupyter/IPython)
|
72
|
+
loop = asyncio.get_event_loop()
|
73
|
+
if loop.is_running():
|
74
|
+
# In Jupyter, create a new task
|
75
|
+
task = loop.create_task(coro)
|
76
|
+
return task
|
77
|
+
else:
|
78
|
+
return loop.run_until_complete(coro)
|
79
|
+
except RuntimeError:
|
80
|
+
# No event loop running, use asyncio.run()
|
81
|
+
return asyncio.run(coro)
|
82
|
+
|
83
|
+
|
84
|
+
# ------------------------------ Lifecycle Tests ------------------------------
|
85
|
+
|
86
|
+
def test_double_close_no_error():
|
87
|
+
"""Test that calling close() multiple times doesn't raise errors."""
|
88
|
+
resource = TestResource()
|
89
|
+
resource.close()
|
90
|
+
resource.close() # Should not raise
|
91
|
+
assert resource.closed
|
92
|
+
|
93
|
+
|
94
|
+
def test_double_aclose_no_error():
|
95
|
+
"""Test that calling aclose() multiple times doesn't raise errors."""
|
96
|
+
async def test():
|
97
|
+
resource = TestResource()
|
98
|
+
await resource.aclose()
|
99
|
+
await resource.aclose() # Should not raise
|
100
|
+
assert resource.closed
|
101
|
+
|
102
|
+
run_async_test(test())
|
103
|
+
|
104
|
+
|
105
|
+
def test_context_manager_sync():
|
106
|
+
"""Test sync context manager behavior."""
|
107
|
+
with TestResource() as resource:
|
108
|
+
assert not resource.closed
|
109
|
+
assert resource.closed
|
110
|
+
assert resource.cleanup_called
|
111
|
+
|
112
|
+
|
113
|
+
def test_context_manager_async():
|
114
|
+
"""Test async context manager behavior."""
|
115
|
+
async def test():
|
116
|
+
async with TestResource() as resource:
|
117
|
+
assert not resource.closed
|
118
|
+
assert resource.closed
|
119
|
+
assert resource.acleanup_called
|
120
|
+
|
121
|
+
run_async_test(test())
|
122
|
+
|
123
|
+
|
124
|
+
# ------------------------------ SSE Emission Tests ------------------------------
|
125
|
+
|
126
|
+
def test_auto_sse_creation():
|
127
|
+
"""Test automatic SSE creation when auto_sse=True."""
|
128
|
+
resource = TestResource(auto_sse=True)
|
129
|
+
sse = resource.get_sse()
|
130
|
+
assert sse is not None
|
131
|
+
assert isinstance(sse, _QueueSSE)
|
132
|
+
assert resource._owns_sse
|
133
|
+
|
134
|
+
|
135
|
+
def test_sse_emission_with_async_sink():
|
136
|
+
"""Test SSE emission with async send method."""
|
137
|
+
async def test():
|
138
|
+
sink = MockSSESink()
|
139
|
+
resource = TestResource(sse=sink)
|
140
|
+
|
141
|
+
await resource.emit("test_event", key="value")
|
142
|
+
|
143
|
+
assert len(sink.events) == 1
|
144
|
+
assert sink.events[0]["event"] == "test_event"
|
145
|
+
assert sink.events[0]["data"] == {"key": "value"}
|
146
|
+
|
147
|
+
run_async_test(test())
|
148
|
+
|
149
|
+
|
150
|
+
def test_sse_emission_with_sync_sink():
|
151
|
+
"""Test SSE emission with sync send method wrapped in async."""
|
152
|
+
sink = MockSyncSSESink()
|
153
|
+
resource = TestResource(sse=sink)
|
154
|
+
|
155
|
+
async def test():
|
156
|
+
await resource.emit("test_event", key="value")
|
157
|
+
|
158
|
+
assert len(sink.events) == 1
|
159
|
+
assert sink.events[0]["event"] == "test_event"
|
160
|
+
assert sink.events[0]["data"] == {"key": "value"}
|
161
|
+
|
162
|
+
run_async_test(test())
|
163
|
+
|
164
|
+
|
165
|
+
def test_sse_put_method_support():
|
166
|
+
"""Test SSE emission with put method."""
|
167
|
+
class PutSink:
|
168
|
+
def __init__(self):
|
169
|
+
self.items = []
|
170
|
+
|
171
|
+
async def put(self, item: Dict[str, Any]) -> None:
|
172
|
+
self.items.append(item)
|
173
|
+
|
174
|
+
async def test():
|
175
|
+
sink = PutSink()
|
176
|
+
resource = TestResource(sse=sink)
|
177
|
+
|
178
|
+
await resource.emit("test_event", key="value")
|
179
|
+
|
180
|
+
assert len(sink.items) == 1
|
181
|
+
item = sink.items[0]
|
182
|
+
assert item["event"] == "test_event"
|
183
|
+
assert json.loads(item["data"]) == {"key": "value"}
|
184
|
+
|
185
|
+
run_async_test(test())
|
186
|
+
|
187
|
+
|
188
|
+
def test_sse_no_emitter_no_error():
|
189
|
+
"""Test that emit on resource without emitter doesn't raise."""
|
190
|
+
resource = TestResource()
|
191
|
+
# Should not raise error
|
192
|
+
async def test():
|
193
|
+
await resource.emit("test_event", key="value")
|
194
|
+
|
195
|
+
run_async_test(test())
|
196
|
+
|
197
|
+
|
198
|
+
def test_sse_emission_after_close():
|
199
|
+
"""Test that emit after close is no-op."""
|
200
|
+
async def test():
|
201
|
+
sink = MockSSESink()
|
202
|
+
resource = TestResource(sse=sink)
|
203
|
+
|
204
|
+
await resource.aclose()
|
205
|
+
await resource.emit("test_event", key="value") # Should not raise
|
206
|
+
|
207
|
+
assert len(sink.events) == 0
|
208
|
+
|
209
|
+
run_async_test(test())
|
210
|
+
|
211
|
+
|
212
|
+
# ------------------------------ Cleanup Interplay Tests ------------------------------
|
213
|
+
|
214
|
+
def test_sync_cleanup_called_on_sync_close():
|
215
|
+
"""Test that sync cleanup is called during sync close."""
|
216
|
+
resource = TestResource()
|
217
|
+
resource.close()
|
218
|
+
assert resource.cleanup_called
|
219
|
+
assert not resource.acleanup_called
|
220
|
+
|
221
|
+
|
222
|
+
def test_async_cleanup_called_on_async_close():
|
223
|
+
"""Test that async cleanup is called during async close."""
|
224
|
+
async def test():
|
225
|
+
resource = TestResource()
|
226
|
+
await resource.aclose()
|
227
|
+
assert resource.acleanup_called
|
228
|
+
assert not resource.cleanup_called
|
229
|
+
|
230
|
+
run_async_test(test())
|
231
|
+
|
232
|
+
|
233
|
+
# ------------------------------ Logger Tests ------------------------------
|
234
|
+
|
235
|
+
def test_logger_ownership():
|
236
|
+
"""Test that logger is owned when not provided externally."""
|
237
|
+
resource = TestResource()
|
238
|
+
assert resource._owns_logger
|
239
|
+
assert resource.logger is not None
|
240
|
+
|
241
|
+
|
242
|
+
def test_external_logger_not_owned():
|
243
|
+
"""Test that external logger is not owned."""
|
244
|
+
external_logger = Logger.default_logger("test")
|
245
|
+
resource = TestResource(logger=external_logger)
|
246
|
+
assert not resource._owns_logger
|
247
|
+
assert resource.logger is external_logger
|
248
|
+
|
249
|
+
|
250
|
+
def test_logger_level_configuration():
|
251
|
+
"""Test logger level configuration based on verbose/debug flags."""
|
252
|
+
# Default (warning level)
|
253
|
+
resource = TestResource()
|
254
|
+
assert hasattr(resource.logger, 'level')
|
255
|
+
|
256
|
+
# Verbose (info level)
|
257
|
+
resource = TestResource(verbose=True)
|
258
|
+
assert hasattr(resource.logger, 'level')
|
259
|
+
|
260
|
+
# Debug (debug level)
|
261
|
+
resource = TestResource(debug=True)
|
262
|
+
assert hasattr(resource.logger, 'level')
|
263
|
+
|
264
|
+
|
265
|
+
# ------------------------------ Lazy Instantiation Tests ------------------------------
|
266
|
+
|
267
|
+
def test_lazy_fs_instantiation():
|
268
|
+
"""Test lazy filesystem instantiation via factory."""
|
269
|
+
fs_instance = MockFileSystem()
|
270
|
+
factory_called = [False]
|
271
|
+
|
272
|
+
def fs_factory():
|
273
|
+
factory_called[0] = True
|
274
|
+
return fs_instance
|
275
|
+
|
276
|
+
resource = TestResource(fs_factory=fs_factory)
|
277
|
+
assert not factory_called[0] # Not called yet
|
278
|
+
|
279
|
+
fs = resource._ensure_fs()
|
280
|
+
assert factory_called[0]
|
281
|
+
assert fs is fs_instance
|
282
|
+
assert resource.fs is fs_instance
|
283
|
+
|
284
|
+
|
285
|
+
def test_lazy_sse_instantiation():
|
286
|
+
"""Test lazy SSE instantiation via factory."""
|
287
|
+
sink_instance = MockSSESink()
|
288
|
+
factory_called = [False]
|
289
|
+
|
290
|
+
def sse_factory():
|
291
|
+
factory_called[0] = True
|
292
|
+
return sink_instance
|
293
|
+
|
294
|
+
resource = TestResource(sse_factory=sse_factory)
|
295
|
+
assert not factory_called[0] # Not called yet
|
296
|
+
|
297
|
+
sse = resource._ensure_sse()
|
298
|
+
assert factory_called[0]
|
299
|
+
assert sse is sink_instance
|
300
|
+
assert resource._sse is sink_instance
|
301
|
+
|
302
|
+
|
303
|
+
def test_lazy_fs_not_called_if_fs_provided():
|
304
|
+
"""Test that factory is not called if fs is provided directly."""
|
305
|
+
fs_instance = MockFileSystem()
|
306
|
+
factory = MagicMock()
|
307
|
+
|
308
|
+
resource = TestResource(fs=fs_instance, fs_factory=factory)
|
309
|
+
fs = resource._ensure_fs()
|
310
|
+
|
311
|
+
assert fs is fs_instance
|
312
|
+
factory.assert_not_called()
|
313
|
+
|
314
|
+
|
315
|
+
def test_lazy_sse_not_called_if_sse_provided():
|
316
|
+
"""Test that factory is not called if sse is provided directly."""
|
317
|
+
sink_instance = MockSSESink()
|
318
|
+
factory = MagicMock()
|
319
|
+
|
320
|
+
resource = TestResource(sse=sink_instance, sse_factory=factory)
|
321
|
+
sse = resource._ensure_sse()
|
322
|
+
|
323
|
+
assert sse is sink_instance
|
324
|
+
factory.assert_not_called()
|
325
|
+
|
326
|
+
|
327
|
+
# ------------------------------ Thread Safety Tests ------------------------------
|
328
|
+
|
329
|
+
def test_thread_safe_close():
|
330
|
+
"""Test that close operations are thread-safe."""
|
331
|
+
resource = TestResource()
|
332
|
+
|
333
|
+
results = []
|
334
|
+
errors = []
|
335
|
+
|
336
|
+
def close_resource():
|
337
|
+
try:
|
338
|
+
resource.close()
|
339
|
+
results.append("success")
|
340
|
+
except Exception as e:
|
341
|
+
errors.append(str(e))
|
342
|
+
results.append(f"error: {e}")
|
343
|
+
|
344
|
+
# Start multiple threads trying to close simultaneously
|
345
|
+
threads = [threading.Thread(target=close_resource) for _ in range(5)]
|
346
|
+
for t in threads:
|
347
|
+
t.start()
|
348
|
+
for t in threads:
|
349
|
+
t.join()
|
350
|
+
|
351
|
+
# Debug information
|
352
|
+
print(f"Results: {results}")
|
353
|
+
print(f"Errors: {errors}")
|
354
|
+
print(f"Resource closed: {resource.closed}")
|
355
|
+
|
356
|
+
# Should have at least one success (the first one) and no exceptions
|
357
|
+
success_count = results.count("success")
|
358
|
+
error_count = len([r for r in results if r.startswith("error")])
|
359
|
+
|
360
|
+
# At least one should succeed
|
361
|
+
assert success_count >= 1, f"Expected at least 1 success, got {success_count}"
|
362
|
+
# No errors should occur
|
363
|
+
assert error_count == 0, f"Expected 0 errors, got {error_count}"
|
364
|
+
# Resource should be closed
|
365
|
+
assert resource.closed, "Resource should be closed"
|
366
|
+
|
367
|
+
|
368
|
+
# ------------------------------ Individual Test Functions ------------------------------
|
369
|
+
|
370
|
+
# You can now run individual tests like this:
|
371
|
+
if __name__ == "__main__":
|
372
|
+
# Run individual tests
|
373
|
+
test_double_close_no_error()
|
374
|
+
print("✓ test_double_close_no_error passed")
|
375
|
+
|
376
|
+
test_sync_cleanup_called_on_sync_close()
|
377
|
+
print("✓ test_sync_cleanup_called_on_sync_close passed")
|
378
|
+
|
379
|
+
test_logger_ownership()
|
380
|
+
print("✓ test_logger_ownership passed")
|
381
|
+
|
382
|
+
test_external_logger_not_owned()
|
383
|
+
print("✓ test_external_logger_not_owned passed")
|
384
|
+
|
385
|
+
test_lazy_fs_instantiation()
|
386
|
+
print("✓ test_lazy_fs_instantiation passed")
|
387
|
+
|
388
|
+
test_lazy_sse_instantiation()
|
389
|
+
print("✓ test_lazy_sse_instantiation passed")
|
390
|
+
|
391
|
+
test_lazy_fs_not_called_if_fs_provided()
|
392
|
+
print("✓ test_lazy_fs_not_called_if_fs_provided passed")
|
393
|
+
|
394
|
+
test_lazy_sse_not_called_if_sse_provided()
|
395
|
+
print("✓ test_lazy_sse_not_called_if_sse_provided passed")
|
396
|
+
|
397
|
+
test_thread_safe_close()
|
398
|
+
print("✓ test_thread_safe_close passed")
|
399
|
+
|
400
|
+
test_auto_sse_creation()
|
401
|
+
print("✓ test_auto_sse_creation passed")
|
402
|
+
|
403
|
+
print("All tests completed!")
|
@@ -441,257 +441,3 @@ class ManagedResource(abc.ABC):
|
|
441
441
|
except Exception:
|
442
442
|
pass
|
443
443
|
|
444
|
-
## Before SSE handling
|
445
|
-
|
446
|
-
# import abc
|
447
|
-
# import threading
|
448
|
-
# import weakref
|
449
|
-
# from typing import Self, Optional, Callable
|
450
|
-
#
|
451
|
-
# import fsspec
|
452
|
-
#
|
453
|
-
# from sibi_dst.utils import Logger
|
454
|
-
#
|
455
|
-
#
|
456
|
-
# class ManagedResource(abc.ABC):
|
457
|
-
# """
|
458
|
-
# Boilerplate ABC for components that manage a logger and an optional fsspec filesystem,
|
459
|
-
# with sync/async lifecycle helpers, lazy FS creation via an optional factory, and
|
460
|
-
# configurable cleanup-error logging.
|
461
|
-
# """
|
462
|
-
#
|
463
|
-
# def __init__(
|
464
|
-
# self,
|
465
|
-
# *,
|
466
|
-
# verbose: bool = False,
|
467
|
-
# debug: bool = False,
|
468
|
-
# log_cleanup_errors: bool = True,
|
469
|
-
# logger: Optional[Logger] = None,
|
470
|
-
# fs: Optional[fsspec.AbstractFileSystem] = None,
|
471
|
-
# fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None,
|
472
|
-
# **_: object,
|
473
|
-
# ) -> None:
|
474
|
-
# # ---- Declared upfront for type checkers
|
475
|
-
# self.logger: Logger
|
476
|
-
# self.fs: Optional[fsspec.AbstractFileSystem] = None
|
477
|
-
# self._fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None
|
478
|
-
# self._owns_logger: bool = False
|
479
|
-
# self._owns_fs: bool = False
|
480
|
-
# self._is_closed: bool = False
|
481
|
-
# self._closing: bool = False
|
482
|
-
# self._close_lock = threading.RLock()
|
483
|
-
#
|
484
|
-
# self.verbose = verbose
|
485
|
-
# self.debug = debug
|
486
|
-
# self._log_cleanup_errors = log_cleanup_errors
|
487
|
-
#
|
488
|
-
# # ---- Logger ownership
|
489
|
-
# if logger is None:
|
490
|
-
# self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
|
491
|
-
# self._owns_logger = True
|
492
|
-
# level = Logger.DEBUG if self.debug else (Logger.INFO if self.verbose else Logger.WARNING)
|
493
|
-
# self.logger.set_level(level)
|
494
|
-
# else:
|
495
|
-
# self.logger = logger
|
496
|
-
# self._owns_logger = False # do not mutate external logger
|
497
|
-
#
|
498
|
-
# # ---- FS ownership & lazy creation
|
499
|
-
# if fs is not None:
|
500
|
-
# self.fs = fs
|
501
|
-
# self._owns_fs = False
|
502
|
-
# self._fs_factory = None
|
503
|
-
# elif fs_factory is not None:
|
504
|
-
# # Lazy: don't create until first use
|
505
|
-
# self._fs_factory = fs_factory
|
506
|
-
# self._owns_fs = True # we will own it *if* created
|
507
|
-
# self.fs = None
|
508
|
-
# else:
|
509
|
-
# self.fs = None
|
510
|
-
# self._owns_fs = False
|
511
|
-
# self._fs_factory = None
|
512
|
-
#
|
513
|
-
# # Register a GC-time finalizer that does not capture self
|
514
|
-
# self_ref = weakref.ref(self)
|
515
|
-
# self._finalizer = weakref.finalize(self, self._finalize_static, self_ref)
|
516
|
-
#
|
517
|
-
# if self.debug:
|
518
|
-
# try:
|
519
|
-
# self.logger.debug("Component %s initialized. %s", self.__class__.__name__, repr(self))
|
520
|
-
# except Exception:
|
521
|
-
# pass
|
522
|
-
#
|
523
|
-
# # ---------- Introspection ----------
|
524
|
-
# @property
|
525
|
-
# def is_closed(self) -> bool:
|
526
|
-
# return self._is_closed
|
527
|
-
#
|
528
|
-
# @property
|
529
|
-
# def closed(self) -> bool: # alias
|
530
|
-
# return self._is_closed
|
531
|
-
#
|
532
|
-
# def __repr__(self) -> str:
|
533
|
-
# class_name = self.__class__.__name__
|
534
|
-
# logger_status = "own" if self._owns_logger else "external"
|
535
|
-
# if self.fs is None and self._fs_factory is not None:
|
536
|
-
# fs_status = "own(lazy)"
|
537
|
-
# elif self.fs is None:
|
538
|
-
# fs_status = "none"
|
539
|
-
# else:
|
540
|
-
# fs_status = "own" if self._owns_fs else "external"
|
541
|
-
# return (f"<{class_name} debug={self.debug} verbose={self.verbose} "
|
542
|
-
# f"log_cleanup_errors={self._log_cleanup_errors} "
|
543
|
-
# f"logger={logger_status} fs={fs_status}>")
|
544
|
-
#
|
545
|
-
# # ---------- Subclass hooks ----------
|
546
|
-
# def _cleanup(self) -> None:
|
547
|
-
# """Sync cleanup for resources created BY THE SUBCLASS."""
|
548
|
-
# return
|
549
|
-
#
|
550
|
-
# async def _acleanup(self) -> None:
|
551
|
-
# """Async cleanup for resources created BY THE SUBCLASS."""
|
552
|
-
# return
|
553
|
-
#
|
554
|
-
# # ---------- FS helpers ----------
|
555
|
-
# def _ensure_fs(self) -> Optional[fsspec.AbstractFileSystem]:
|
556
|
-
# """Create the FS lazily if a factory was provided. Return fs (or None)."""
|
557
|
-
# if self.fs is None and self._fs_factory is not None:
|
558
|
-
# created = self._fs_factory()
|
559
|
-
# if not isinstance(created, fsspec.AbstractFileSystem):
|
560
|
-
# raise TypeError(f"fs_factory() must return fsspec.AbstractFileSystem, got {type(created)!r}")
|
561
|
-
# self.fs = created
|
562
|
-
# # _owns_fs already True when factory is present
|
563
|
-
# return self.fs
|
564
|
-
#
|
565
|
-
# def require_fs(self) -> fsspec.AbstractFileSystem:
|
566
|
-
# """Return a filesystem or raise if not configured/creatable."""
|
567
|
-
# fs = self._ensure_fs()
|
568
|
-
# if fs is None:
|
569
|
-
# raise RuntimeError(
|
570
|
-
# f"{self.__class__.__name__}: filesystem is required but not configured"
|
571
|
-
# )
|
572
|
-
# return fs
|
573
|
-
#
|
574
|
-
# # ---------- Shared shutdown helpers (no logging; safe for late shutdown) ----------
|
575
|
-
# def _release_owned_fs(self) -> None:
|
576
|
-
# if self._owns_fs:
|
577
|
-
# # ensure creation state is respected even if never used
|
578
|
-
# _ = self.fs or None # no-op; if never created, nothing to close
|
579
|
-
# if self.fs is not None:
|
580
|
-
# close = getattr(self.fs, "close", None)
|
581
|
-
# try:
|
582
|
-
# if callable(close):
|
583
|
-
# close()
|
584
|
-
# finally:
|
585
|
-
# self.fs = None
|
586
|
-
#
|
587
|
-
# def _shutdown_logger(self) -> None:
|
588
|
-
# if self._owns_logger:
|
589
|
-
# try:
|
590
|
-
# self.logger.shutdown()
|
591
|
-
# except Exception:
|
592
|
-
# pass
|
593
|
-
#
|
594
|
-
# def _shutdown_owned_resources(self) -> None:
|
595
|
-
# self._release_owned_fs()
|
596
|
-
# self._shutdown_logger()
|
597
|
-
#
|
598
|
-
# # ---------- Public lifecycle (sync) ----------
|
599
|
-
# def close(self) -> None:
|
600
|
-
# with self._close_lock:
|
601
|
-
# if self._is_closed or self._closing:
|
602
|
-
# return
|
603
|
-
# self._closing = True
|
604
|
-
#
|
605
|
-
# try:
|
606
|
-
# self._cleanup()
|
607
|
-
# except Exception:
|
608
|
-
# # Only include traceback when debug=True
|
609
|
-
# if self._log_cleanup_errors:
|
610
|
-
# try:
|
611
|
-
# self.logger.error(
|
612
|
-
# "Error during %s._cleanup()", self.__class__.__name__,
|
613
|
-
# exc_info=self.debug
|
614
|
-
# )
|
615
|
-
# except Exception:
|
616
|
-
# pass
|
617
|
-
# raise
|
618
|
-
# finally:
|
619
|
-
# with self._close_lock:
|
620
|
-
# self._is_closed = True
|
621
|
-
# self._closing = False
|
622
|
-
# self._shutdown_owned_resources()
|
623
|
-
# if self.debug:
|
624
|
-
# try:
|
625
|
-
# self.logger.debug("Component %s closed.", self.__class__.__name__)
|
626
|
-
# except Exception:
|
627
|
-
# pass
|
628
|
-
#
|
629
|
-
# # ---------- Public lifecycle (async) ----------
|
630
|
-
# async def aclose(self) -> None:
|
631
|
-
# with self._close_lock:
|
632
|
-
# if self._is_closed or self._closing:
|
633
|
-
# return
|
634
|
-
# self._closing = True
|
635
|
-
#
|
636
|
-
# try:
|
637
|
-
# await self._acleanup()
|
638
|
-
# except Exception:
|
639
|
-
# # Only include traceback when debug=True
|
640
|
-
# if self._log_cleanup_errors:
|
641
|
-
# try:
|
642
|
-
# self.logger.error(
|
643
|
-
# "Error during %s._acleanup()", self.__class__.__name__,
|
644
|
-
# exc_info=self.debug
|
645
|
-
# )
|
646
|
-
# except Exception:
|
647
|
-
# pass
|
648
|
-
# raise
|
649
|
-
# finally:
|
650
|
-
# with self._close_lock:
|
651
|
-
# self._is_closed = True
|
652
|
-
# self._closing = False
|
653
|
-
# self._shutdown_owned_resources()
|
654
|
-
# if self.debug:
|
655
|
-
# try:
|
656
|
-
# self.logger.debug("Async component %s closed.", self.__class__.__name__)
|
657
|
-
# except Exception:
|
658
|
-
# pass
|
659
|
-
#
|
660
|
-
# # ---------- Context managers ----------
|
661
|
-
# def __enter__(self) -> Self:
|
662
|
-
# return self
|
663
|
-
#
|
664
|
-
# def __exit__(self, exc_type, exc, tb) -> bool:
|
665
|
-
# self.close()
|
666
|
-
# return False # propagate exceptions
|
667
|
-
#
|
668
|
-
# async def __aenter__(self) -> Self:
|
669
|
-
# return self
|
670
|
-
#
|
671
|
-
# async def __aexit__(self, exc_type, exc, tb) -> bool:
|
672
|
-
# await self.aclose()
|
673
|
-
# return False
|
674
|
-
#
|
675
|
-
# # ---------- Finalizer ( at Garbage Collection-time absolutely silent) ----------
|
676
|
-
# @staticmethod
|
677
|
-
# def _finalize_static(ref: "weakref.ReferenceType[ManagedResource]") -> None:
|
678
|
-
# obj = ref()
|
679
|
-
# if obj is None:
|
680
|
-
# return
|
681
|
-
# # No logging here; interpreter may be tearing down.
|
682
|
-
# # Best-effort silent cleanup; avoid locks and context managers.
|
683
|
-
# try:
|
684
|
-
# if not obj._is_closed:
|
685
|
-
# try:
|
686
|
-
# obj._cleanup()
|
687
|
-
# except Exception:
|
688
|
-
# pass
|
689
|
-
# obj._is_closed = True
|
690
|
-
# try:
|
691
|
-
# obj._shutdown_owned_resources()
|
692
|
-
# except Exception:
|
693
|
-
# pass
|
694
|
-
# except Exception:
|
695
|
-
# # do not show anything at garbage collection time
|
696
|
-
# pass
|
697
|
-
#
|
@@ -2,10 +2,13 @@ from .base_parquet_artifact import BaseParquetArtifact
|
|
2
2
|
from .base_data_cube import BaseDataCube
|
3
3
|
from .base_attacher import make_attacher
|
4
4
|
from .base_parquet_reader import BaseParquetReader
|
5
|
+
from .hybrid_data_loader import HybridDataLoader
|
6
|
+
|
5
7
|
__all__ = [
|
6
8
|
"BaseDataCube",
|
7
9
|
"BaseParquetArtifact",
|
8
10
|
"make_attacher",
|
9
|
-
"BaseParquetReader"
|
11
|
+
"BaseParquetReader",
|
12
|
+
"HybridDataLoader",
|
10
13
|
]
|
11
14
|
|