sibi-dst 2025.9.4__tar.gz → 2025.9.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/PKG-INFO +1 -1
  2. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/pyproject.toml +1 -1
  3. sibi_dst-2025.9.6/sibi_dst/tests/test_baseclass.py +403 -0
  4. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/base.py +0 -254
  5. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/__init__.py +4 -1
  6. sibi_dst-2025.9.6/sibi_dst/utils/boilerplate/hybrid_data_loader.py +144 -0
  7. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/clickhouse_writer.py +138 -13
  8. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/dask_utils.py +1 -1
  9. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/README.md +0 -0
  10. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/__init__.py +0 -0
  11. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/__init__.py +0 -0
  12. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_artifact_updater_async.py +0 -0
  13. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_artifact_updater_threaded.py +0 -0
  14. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_df_helper.py +0 -0
  15. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_parquet_artifact.py +0 -0
  16. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/_parquet_reader.py +0 -0
  17. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/__init__.py +0 -0
  18. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/http/__init__.py +0 -0
  19. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/http/_http_config.py +0 -0
  20. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/parquet/__init__.py +0 -0
  21. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/parquet/_parquet_options.py +0 -0
  22. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/__init__.py +0 -0
  23. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  24. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
  25. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  26. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  27. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
  28. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
  29. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/__init__.py +0 -0
  30. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/_defaults.py +0 -0
  31. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/_filter_handler.py +0 -0
  32. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/_params_config.py +0 -0
  33. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/core/_query_config.py +0 -0
  34. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/df_helper/data_cleaner.py +0 -0
  35. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/geopy_helper/__init__.py +0 -0
  36. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/geopy_helper/geo_location_service.py +0 -0
  37. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/geopy_helper/utils.py +0 -0
  38. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/__init__.py +0 -0
  39. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/base_osm_map.py +0 -0
  40. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/basemaps/__init__.py +0 -0
  41. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/basemaps/calendar_html.py +0 -0
  42. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/basemaps/route_map_plotter.py +0 -0
  43. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/basemaps/router_plotter.py +0 -0
  44. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/route_path_builder.py +0 -0
  45. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/osmnx_helper/utils.py +0 -0
  46. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/tests/__init__.py +0 -0
  47. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/tests/test_data_wrapper_class.py +0 -0
  48. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/__init__.py +0 -0
  49. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/async_utils.py +0 -0
  50. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/base_attacher.py +0 -0
  51. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/base_data_cube.py +0 -0
  52. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/base_parquet_artifact.py +0 -0
  53. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/boilerplate/base_parquet_reader.py +0 -0
  54. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/business_days.py +0 -0
  55. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/credentials.py +0 -0
  56. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/data_from_http_source.py +0 -0
  57. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/data_utils.py +0 -0
  58. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/data_wrapper.py +0 -0
  59. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/date_utils.py +0 -0
  60. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/df_utils.py +0 -0
  61. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/file_age_checker.py +0 -0
  62. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/file_utils.py +0 -0
  63. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/filepath_generator.py +0 -0
  64. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/iceberg_saver.py +0 -0
  65. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/log_utils.py +0 -0
  66. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/manifest_manager.py +0 -0
  67. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/parquet_saver.py +0 -0
  68. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/periods.py +0 -0
  69. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/phone_formatter.py +0 -0
  70. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/progress/__init__.py +0 -0
  71. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/progress/jobs.py +0 -0
  72. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/progress/sse_runner.py +0 -0
  73. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/storage_config.py +0 -0
  74. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/storage_hive.py +0 -0
  75. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/storage_manager.py +0 -0
  76. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/update_planner.py +0 -0
  77. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/webdav_client.py +0 -0
  78. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/utils/write_gatekeeper.py +0 -0
  79. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/__init__.py +0 -0
  80. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/__init__.py +0 -0
  81. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/_df_helper.py +0 -0
  82. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/__init__.py +0 -0
  83. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/__init__.py +0 -0
  84. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  85. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  86. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  87. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlalchemy/_model_builder.py +0 -0
  88. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/__init__.py +0 -0
  89. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/_db_connection.py +0 -0
  90. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/_io_dask.py +0 -0
  91. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/_load_from_db.py +0 -0
  92. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/backends/sqlmodel/_model_builder.py +0 -0
  93. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/core/__init__.py +0 -0
  94. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/core/_filter_handler.py +0 -0
  95. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/core/_params_config.py +0 -0
  96. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/df_helper/core/_query_config.py +0 -0
  97. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/utils/__init__.py +0 -0
  98. {sibi_dst-2025.9.4 → sibi_dst-2025.9.6}/sibi_dst/v2/utils/log_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sibi-dst
3
- Version: 2025.9.4
3
+ Version: 2025.9.6
4
4
  Summary: Data Science Toolkit
5
5
  Author: Luis Valverde
6
6
  Author-email: lvalverdeb@gmail.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sibi-dst"
3
- version = "2025.9.4"
3
+ version = "2025.9.6"
4
4
  description = "Data Science Toolkit"
5
5
  authors = ["Luis Valverde <lvalverdeb@gmail.com>"]
6
6
  readme = "README.md"
@@ -0,0 +1,403 @@
1
+ import asyncio
2
+ import json
3
+ import threading
4
+ from typing import Any, Dict
5
+ from unittest.mock import MagicMock
6
+
7
+ import fsspec
8
+
9
+ from sibi_dst.utils import Logger
10
+ from sibi_dst.utils import ManagedResource
11
+ from sibi_dst.utils.base import _QueueSSE # Replace 'your_module' with actual module name
12
+
13
+
14
+ # ------------------------------ Test Fixtures ------------------------------
15
+
16
+ class TestResource(ManagedResource):
17
+ def __init__(self, *args, **kwargs):
18
+ super().__init__(*args, **kwargs)
19
+ self.cleanup_called = False
20
+ self.acleanup_called = False
21
+
22
+ def _cleanup(self) -> None:
23
+ self.cleanup_called = True
24
+ super()._cleanup()
25
+
26
+ async def _acleanup(self) -> None:
27
+ self.acleanup_called = True
28
+ await super()._acleanup()
29
+
30
+
31
+ class MockSSESink:
32
+ def __init__(self):
33
+ self.events = []
34
+ self.closed = False
35
+
36
+ async def send(self, event: str, data: Dict[str, Any]) -> None:
37
+ self.events.append({"event": event, "data": data})
38
+
39
+ async def aclose(self) -> None:
40
+ self.closed = True
41
+
42
+
43
+ class MockSyncSSESink:
44
+ def __init__(self):
45
+ self.events = []
46
+ self.closed = False
47
+
48
+ def send(self, event: str, data: Dict[str, Any]) -> None:
49
+ self.events.append({"event": event, "data": data})
50
+
51
+ def close(self) -> None:
52
+ self.closed = True
53
+
54
+
55
+ # ------------------------------ Mock fsspec filesystem ------------------------------
56
+
57
+ class MockFileSystem(fsspec.AbstractFileSystem):
58
+ def __init__(self, **kwargs):
59
+ super().__init__(**kwargs)
60
+ self.closed = False
61
+
62
+ def close(self):
63
+ self.closed = True
64
+
65
+
66
+ # ------------------------------ Utility for Event Loop ------------------------------
67
+
68
+ def run_async_test(coro):
69
+ """Run async test safely in different environments."""
70
+ try:
71
+ # Try to get existing event loop (for Jupyter/IPython)
72
+ loop = asyncio.get_event_loop()
73
+ if loop.is_running():
74
+ # In Jupyter, create a new task
75
+ task = loop.create_task(coro)
76
+ return task
77
+ else:
78
+ return loop.run_until_complete(coro)
79
+ except RuntimeError:
80
+ # No event loop running, use asyncio.run()
81
+ return asyncio.run(coro)
82
+
83
+
84
+ # ------------------------------ Lifecycle Tests ------------------------------
85
+
86
+ def test_double_close_no_error():
87
+ """Test that calling close() multiple times doesn't raise errors."""
88
+ resource = TestResource()
89
+ resource.close()
90
+ resource.close() # Should not raise
91
+ assert resource.closed
92
+
93
+
94
+ def test_double_aclose_no_error():
95
+ """Test that calling aclose() multiple times doesn't raise errors."""
96
+ async def test():
97
+ resource = TestResource()
98
+ await resource.aclose()
99
+ await resource.aclose() # Should not raise
100
+ assert resource.closed
101
+
102
+ run_async_test(test())
103
+
104
+
105
+ def test_context_manager_sync():
106
+ """Test sync context manager behavior."""
107
+ with TestResource() as resource:
108
+ assert not resource.closed
109
+ assert resource.closed
110
+ assert resource.cleanup_called
111
+
112
+
113
+ def test_context_manager_async():
114
+ """Test async context manager behavior."""
115
+ async def test():
116
+ async with TestResource() as resource:
117
+ assert not resource.closed
118
+ assert resource.closed
119
+ assert resource.acleanup_called
120
+
121
+ run_async_test(test())
122
+
123
+
124
+ # ------------------------------ SSE Emission Tests ------------------------------
125
+
126
+ def test_auto_sse_creation():
127
+ """Test automatic SSE creation when auto_sse=True."""
128
+ resource = TestResource(auto_sse=True)
129
+ sse = resource.get_sse()
130
+ assert sse is not None
131
+ assert isinstance(sse, _QueueSSE)
132
+ assert resource._owns_sse
133
+
134
+
135
+ def test_sse_emission_with_async_sink():
136
+ """Test SSE emission with async send method."""
137
+ async def test():
138
+ sink = MockSSESink()
139
+ resource = TestResource(sse=sink)
140
+
141
+ await resource.emit("test_event", key="value")
142
+
143
+ assert len(sink.events) == 1
144
+ assert sink.events[0]["event"] == "test_event"
145
+ assert sink.events[0]["data"] == {"key": "value"}
146
+
147
+ run_async_test(test())
148
+
149
+
150
+ def test_sse_emission_with_sync_sink():
151
+ """Test SSE emission with sync send method wrapped in async."""
152
+ sink = MockSyncSSESink()
153
+ resource = TestResource(sse=sink)
154
+
155
+ async def test():
156
+ await resource.emit("test_event", key="value")
157
+
158
+ assert len(sink.events) == 1
159
+ assert sink.events[0]["event"] == "test_event"
160
+ assert sink.events[0]["data"] == {"key": "value"}
161
+
162
+ run_async_test(test())
163
+
164
+
165
+ def test_sse_put_method_support():
166
+ """Test SSE emission with put method."""
167
+ class PutSink:
168
+ def __init__(self):
169
+ self.items = []
170
+
171
+ async def put(self, item: Dict[str, Any]) -> None:
172
+ self.items.append(item)
173
+
174
+ async def test():
175
+ sink = PutSink()
176
+ resource = TestResource(sse=sink)
177
+
178
+ await resource.emit("test_event", key="value")
179
+
180
+ assert len(sink.items) == 1
181
+ item = sink.items[0]
182
+ assert item["event"] == "test_event"
183
+ assert json.loads(item["data"]) == {"key": "value"}
184
+
185
+ run_async_test(test())
186
+
187
+
188
+ def test_sse_no_emitter_no_error():
189
+ """Test that emit on resource without emitter doesn't raise."""
190
+ resource = TestResource()
191
+ # Should not raise error
192
+ async def test():
193
+ await resource.emit("test_event", key="value")
194
+
195
+ run_async_test(test())
196
+
197
+
198
+ def test_sse_emission_after_close():
199
+ """Test that emit after close is no-op."""
200
+ async def test():
201
+ sink = MockSSESink()
202
+ resource = TestResource(sse=sink)
203
+
204
+ await resource.aclose()
205
+ await resource.emit("test_event", key="value") # Should not raise
206
+
207
+ assert len(sink.events) == 0
208
+
209
+ run_async_test(test())
210
+
211
+
212
+ # ------------------------------ Cleanup Interplay Tests ------------------------------
213
+
214
+ def test_sync_cleanup_called_on_sync_close():
215
+ """Test that sync cleanup is called during sync close."""
216
+ resource = TestResource()
217
+ resource.close()
218
+ assert resource.cleanup_called
219
+ assert not resource.acleanup_called
220
+
221
+
222
+ def test_async_cleanup_called_on_async_close():
223
+ """Test that async cleanup is called during async close."""
224
+ async def test():
225
+ resource = TestResource()
226
+ await resource.aclose()
227
+ assert resource.acleanup_called
228
+ assert not resource.cleanup_called
229
+
230
+ run_async_test(test())
231
+
232
+
233
+ # ------------------------------ Logger Tests ------------------------------
234
+
235
+ def test_logger_ownership():
236
+ """Test that logger is owned when not provided externally."""
237
+ resource = TestResource()
238
+ assert resource._owns_logger
239
+ assert resource.logger is not None
240
+
241
+
242
+ def test_external_logger_not_owned():
243
+ """Test that external logger is not owned."""
244
+ external_logger = Logger.default_logger("test")
245
+ resource = TestResource(logger=external_logger)
246
+ assert not resource._owns_logger
247
+ assert resource.logger is external_logger
248
+
249
+
250
+ def test_logger_level_configuration():
251
+ """Test logger level configuration based on verbose/debug flags."""
252
+ # Default (warning level)
253
+ resource = TestResource()
254
+ assert hasattr(resource.logger, 'level')
255
+
256
+ # Verbose (info level)
257
+ resource = TestResource(verbose=True)
258
+ assert hasattr(resource.logger, 'level')
259
+
260
+ # Debug (debug level)
261
+ resource = TestResource(debug=True)
262
+ assert hasattr(resource.logger, 'level')
263
+
264
+
265
+ # ------------------------------ Lazy Instantiation Tests ------------------------------
266
+
267
+ def test_lazy_fs_instantiation():
268
+ """Test lazy filesystem instantiation via factory."""
269
+ fs_instance = MockFileSystem()
270
+ factory_called = [False]
271
+
272
+ def fs_factory():
273
+ factory_called[0] = True
274
+ return fs_instance
275
+
276
+ resource = TestResource(fs_factory=fs_factory)
277
+ assert not factory_called[0] # Not called yet
278
+
279
+ fs = resource._ensure_fs()
280
+ assert factory_called[0]
281
+ assert fs is fs_instance
282
+ assert resource.fs is fs_instance
283
+
284
+
285
+ def test_lazy_sse_instantiation():
286
+ """Test lazy SSE instantiation via factory."""
287
+ sink_instance = MockSSESink()
288
+ factory_called = [False]
289
+
290
+ def sse_factory():
291
+ factory_called[0] = True
292
+ return sink_instance
293
+
294
+ resource = TestResource(sse_factory=sse_factory)
295
+ assert not factory_called[0] # Not called yet
296
+
297
+ sse = resource._ensure_sse()
298
+ assert factory_called[0]
299
+ assert sse is sink_instance
300
+ assert resource._sse is sink_instance
301
+
302
+
303
+ def test_lazy_fs_not_called_if_fs_provided():
304
+ """Test that factory is not called if fs is provided directly."""
305
+ fs_instance = MockFileSystem()
306
+ factory = MagicMock()
307
+
308
+ resource = TestResource(fs=fs_instance, fs_factory=factory)
309
+ fs = resource._ensure_fs()
310
+
311
+ assert fs is fs_instance
312
+ factory.assert_not_called()
313
+
314
+
315
+ def test_lazy_sse_not_called_if_sse_provided():
316
+ """Test that factory is not called if sse is provided directly."""
317
+ sink_instance = MockSSESink()
318
+ factory = MagicMock()
319
+
320
+ resource = TestResource(sse=sink_instance, sse_factory=factory)
321
+ sse = resource._ensure_sse()
322
+
323
+ assert sse is sink_instance
324
+ factory.assert_not_called()
325
+
326
+
327
+ # ------------------------------ Thread Safety Tests ------------------------------
328
+
329
+ def test_thread_safe_close():
330
+ """Test that close operations are thread-safe."""
331
+ resource = TestResource()
332
+
333
+ results = []
334
+ errors = []
335
+
336
+ def close_resource():
337
+ try:
338
+ resource.close()
339
+ results.append("success")
340
+ except Exception as e:
341
+ errors.append(str(e))
342
+ results.append(f"error: {e}")
343
+
344
+ # Start multiple threads trying to close simultaneously
345
+ threads = [threading.Thread(target=close_resource) for _ in range(5)]
346
+ for t in threads:
347
+ t.start()
348
+ for t in threads:
349
+ t.join()
350
+
351
+ # Debug information
352
+ print(f"Results: {results}")
353
+ print(f"Errors: {errors}")
354
+ print(f"Resource closed: {resource.closed}")
355
+
356
+ # Should have at least one success (the first one) and no exceptions
357
+ success_count = results.count("success")
358
+ error_count = len([r for r in results if r.startswith("error")])
359
+
360
+ # At least one should succeed
361
+ assert success_count >= 1, f"Expected at least 1 success, got {success_count}"
362
+ # No errors should occur
363
+ assert error_count == 0, f"Expected 0 errors, got {error_count}"
364
+ # Resource should be closed
365
+ assert resource.closed, "Resource should be closed"
366
+
367
+
368
+ # ------------------------------ Individual Test Functions ------------------------------
369
+
370
+ # You can now run individual tests like this:
371
+ if __name__ == "__main__":
372
+ # Run individual tests
373
+ test_double_close_no_error()
374
+ print("✓ test_double_close_no_error passed")
375
+
376
+ test_sync_cleanup_called_on_sync_close()
377
+ print("✓ test_sync_cleanup_called_on_sync_close passed")
378
+
379
+ test_logger_ownership()
380
+ print("✓ test_logger_ownership passed")
381
+
382
+ test_external_logger_not_owned()
383
+ print("✓ test_external_logger_not_owned passed")
384
+
385
+ test_lazy_fs_instantiation()
386
+ print("✓ test_lazy_fs_instantiation passed")
387
+
388
+ test_lazy_sse_instantiation()
389
+ print("✓ test_lazy_sse_instantiation passed")
390
+
391
+ test_lazy_fs_not_called_if_fs_provided()
392
+ print("✓ test_lazy_fs_not_called_if_fs_provided passed")
393
+
394
+ test_lazy_sse_not_called_if_sse_provided()
395
+ print("✓ test_lazy_sse_not_called_if_sse_provided passed")
396
+
397
+ test_thread_safe_close()
398
+ print("✓ test_thread_safe_close passed")
399
+
400
+ test_auto_sse_creation()
401
+ print("✓ test_auto_sse_creation passed")
402
+
403
+ print("All tests completed!")
@@ -441,257 +441,3 @@ class ManagedResource(abc.ABC):
441
441
  except Exception:
442
442
  pass
443
443
 
444
- ## Before SSE handling
445
-
446
- # import abc
447
- # import threading
448
- # import weakref
449
- # from typing import Self, Optional, Callable
450
- #
451
- # import fsspec
452
- #
453
- # from sibi_dst.utils import Logger
454
- #
455
- #
456
- # class ManagedResource(abc.ABC):
457
- # """
458
- # Boilerplate ABC for components that manage a logger and an optional fsspec filesystem,
459
- # with sync/async lifecycle helpers, lazy FS creation via an optional factory, and
460
- # configurable cleanup-error logging.
461
- # """
462
- #
463
- # def __init__(
464
- # self,
465
- # *,
466
- # verbose: bool = False,
467
- # debug: bool = False,
468
- # log_cleanup_errors: bool = True,
469
- # logger: Optional[Logger] = None,
470
- # fs: Optional[fsspec.AbstractFileSystem] = None,
471
- # fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None,
472
- # **_: object,
473
- # ) -> None:
474
- # # ---- Declared upfront for type checkers
475
- # self.logger: Logger
476
- # self.fs: Optional[fsspec.AbstractFileSystem] = None
477
- # self._fs_factory: Optional[Callable[[], fsspec.AbstractFileSystem]] = None
478
- # self._owns_logger: bool = False
479
- # self._owns_fs: bool = False
480
- # self._is_closed: bool = False
481
- # self._closing: bool = False
482
- # self._close_lock = threading.RLock()
483
- #
484
- # self.verbose = verbose
485
- # self.debug = debug
486
- # self._log_cleanup_errors = log_cleanup_errors
487
- #
488
- # # ---- Logger ownership
489
- # if logger is None:
490
- # self.logger = Logger.default_logger(logger_name=self.__class__.__name__)
491
- # self._owns_logger = True
492
- # level = Logger.DEBUG if self.debug else (Logger.INFO if self.verbose else Logger.WARNING)
493
- # self.logger.set_level(level)
494
- # else:
495
- # self.logger = logger
496
- # self._owns_logger = False # do not mutate external logger
497
- #
498
- # # ---- FS ownership & lazy creation
499
- # if fs is not None:
500
- # self.fs = fs
501
- # self._owns_fs = False
502
- # self._fs_factory = None
503
- # elif fs_factory is not None:
504
- # # Lazy: don't create until first use
505
- # self._fs_factory = fs_factory
506
- # self._owns_fs = True # we will own it *if* created
507
- # self.fs = None
508
- # else:
509
- # self.fs = None
510
- # self._owns_fs = False
511
- # self._fs_factory = None
512
- #
513
- # # Register a GC-time finalizer that does not capture self
514
- # self_ref = weakref.ref(self)
515
- # self._finalizer = weakref.finalize(self, self._finalize_static, self_ref)
516
- #
517
- # if self.debug:
518
- # try:
519
- # self.logger.debug("Component %s initialized. %s", self.__class__.__name__, repr(self))
520
- # except Exception:
521
- # pass
522
- #
523
- # # ---------- Introspection ----------
524
- # @property
525
- # def is_closed(self) -> bool:
526
- # return self._is_closed
527
- #
528
- # @property
529
- # def closed(self) -> bool: # alias
530
- # return self._is_closed
531
- #
532
- # def __repr__(self) -> str:
533
- # class_name = self.__class__.__name__
534
- # logger_status = "own" if self._owns_logger else "external"
535
- # if self.fs is None and self._fs_factory is not None:
536
- # fs_status = "own(lazy)"
537
- # elif self.fs is None:
538
- # fs_status = "none"
539
- # else:
540
- # fs_status = "own" if self._owns_fs else "external"
541
- # return (f"<{class_name} debug={self.debug} verbose={self.verbose} "
542
- # f"log_cleanup_errors={self._log_cleanup_errors} "
543
- # f"logger={logger_status} fs={fs_status}>")
544
- #
545
- # # ---------- Subclass hooks ----------
546
- # def _cleanup(self) -> None:
547
- # """Sync cleanup for resources created BY THE SUBCLASS."""
548
- # return
549
- #
550
- # async def _acleanup(self) -> None:
551
- # """Async cleanup for resources created BY THE SUBCLASS."""
552
- # return
553
- #
554
- # # ---------- FS helpers ----------
555
- # def _ensure_fs(self) -> Optional[fsspec.AbstractFileSystem]:
556
- # """Create the FS lazily if a factory was provided. Return fs (or None)."""
557
- # if self.fs is None and self._fs_factory is not None:
558
- # created = self._fs_factory()
559
- # if not isinstance(created, fsspec.AbstractFileSystem):
560
- # raise TypeError(f"fs_factory() must return fsspec.AbstractFileSystem, got {type(created)!r}")
561
- # self.fs = created
562
- # # _owns_fs already True when factory is present
563
- # return self.fs
564
- #
565
- # def require_fs(self) -> fsspec.AbstractFileSystem:
566
- # """Return a filesystem or raise if not configured/creatable."""
567
- # fs = self._ensure_fs()
568
- # if fs is None:
569
- # raise RuntimeError(
570
- # f"{self.__class__.__name__}: filesystem is required but not configured"
571
- # )
572
- # return fs
573
- #
574
- # # ---------- Shared shutdown helpers (no logging; safe for late shutdown) ----------
575
- # def _release_owned_fs(self) -> None:
576
- # if self._owns_fs:
577
- # # ensure creation state is respected even if never used
578
- # _ = self.fs or None # no-op; if never created, nothing to close
579
- # if self.fs is not None:
580
- # close = getattr(self.fs, "close", None)
581
- # try:
582
- # if callable(close):
583
- # close()
584
- # finally:
585
- # self.fs = None
586
- #
587
- # def _shutdown_logger(self) -> None:
588
- # if self._owns_logger:
589
- # try:
590
- # self.logger.shutdown()
591
- # except Exception:
592
- # pass
593
- #
594
- # def _shutdown_owned_resources(self) -> None:
595
- # self._release_owned_fs()
596
- # self._shutdown_logger()
597
- #
598
- # # ---------- Public lifecycle (sync) ----------
599
- # def close(self) -> None:
600
- # with self._close_lock:
601
- # if self._is_closed or self._closing:
602
- # return
603
- # self._closing = True
604
- #
605
- # try:
606
- # self._cleanup()
607
- # except Exception:
608
- # # Only include traceback when debug=True
609
- # if self._log_cleanup_errors:
610
- # try:
611
- # self.logger.error(
612
- # "Error during %s._cleanup()", self.__class__.__name__,
613
- # exc_info=self.debug
614
- # )
615
- # except Exception:
616
- # pass
617
- # raise
618
- # finally:
619
- # with self._close_lock:
620
- # self._is_closed = True
621
- # self._closing = False
622
- # self._shutdown_owned_resources()
623
- # if self.debug:
624
- # try:
625
- # self.logger.debug("Component %s closed.", self.__class__.__name__)
626
- # except Exception:
627
- # pass
628
- #
629
- # # ---------- Public lifecycle (async) ----------
630
- # async def aclose(self) -> None:
631
- # with self._close_lock:
632
- # if self._is_closed or self._closing:
633
- # return
634
- # self._closing = True
635
- #
636
- # try:
637
- # await self._acleanup()
638
- # except Exception:
639
- # # Only include traceback when debug=True
640
- # if self._log_cleanup_errors:
641
- # try:
642
- # self.logger.error(
643
- # "Error during %s._acleanup()", self.__class__.__name__,
644
- # exc_info=self.debug
645
- # )
646
- # except Exception:
647
- # pass
648
- # raise
649
- # finally:
650
- # with self._close_lock:
651
- # self._is_closed = True
652
- # self._closing = False
653
- # self._shutdown_owned_resources()
654
- # if self.debug:
655
- # try:
656
- # self.logger.debug("Async component %s closed.", self.__class__.__name__)
657
- # except Exception:
658
- # pass
659
- #
660
- # # ---------- Context managers ----------
661
- # def __enter__(self) -> Self:
662
- # return self
663
- #
664
- # def __exit__(self, exc_type, exc, tb) -> bool:
665
- # self.close()
666
- # return False # propagate exceptions
667
- #
668
- # async def __aenter__(self) -> Self:
669
- # return self
670
- #
671
- # async def __aexit__(self, exc_type, exc, tb) -> bool:
672
- # await self.aclose()
673
- # return False
674
- #
675
- # # ---------- Finalizer ( at Garbage Collection-time absolutely silent) ----------
676
- # @staticmethod
677
- # def _finalize_static(ref: "weakref.ReferenceType[ManagedResource]") -> None:
678
- # obj = ref()
679
- # if obj is None:
680
- # return
681
- # # No logging here; interpreter may be tearing down.
682
- # # Best-effort silent cleanup; avoid locks and context managers.
683
- # try:
684
- # if not obj._is_closed:
685
- # try:
686
- # obj._cleanup()
687
- # except Exception:
688
- # pass
689
- # obj._is_closed = True
690
- # try:
691
- # obj._shutdown_owned_resources()
692
- # except Exception:
693
- # pass
694
- # except Exception:
695
- # # do not show anything at garbage collection time
696
- # pass
697
- #
@@ -2,10 +2,13 @@ from .base_parquet_artifact import BaseParquetArtifact
2
2
  from .base_data_cube import BaseDataCube
3
3
  from .base_attacher import make_attacher
4
4
  from .base_parquet_reader import BaseParquetReader
5
+ from .hybrid_data_loader import HybridDataLoader
6
+
5
7
  __all__ = [
6
8
  "BaseDataCube",
7
9
  "BaseParquetArtifact",
8
10
  "make_attacher",
9
- "BaseParquetReader"
11
+ "BaseParquetReader",
12
+ "HybridDataLoader",
10
13
  ]
11
14