sibi-flux 2025.12.0__tar.gz → 2026.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/PKG-INFO +43 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/README.md +40 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/pyproject.toml +47 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/__init__.py +4 -4
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet_engine/executor.py +1 -1
- sibi_flux-2026.1.2/src/sibi_flux/cli.py +45 -0
- sibi_flux-2026.1.2/src/sibi_flux/config/__init__.py +3 -0
- {sibi_flux-2025.12.0/src/sibi_flux/conf → sibi_flux-2026.1.2/src/sibi_flux/config}/settings.py +7 -7
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/async_core.py +1 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/client_manager.py +5 -2
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/core.py +3 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/datacube/_data_cube.py +12 -3
- sibi_flux-2026.1.2/src/sibi_flux/datacube/cli.py +1247 -0
- sibi_flux-2026.1.2/src/sibi_flux/datacube/config_engine.py +219 -0
- sibi_flux-2026.1.2/src/sibi_flux/datacube/field_factory.py +131 -0
- sibi_flux-2026.1.2/src/sibi_flux/datacube/field_mapper.py +243 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/datacube/field_registry.py +2 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/datacube/generator.py +322 -90
- sibi_flux-2026.1.2/src/sibi_flux/datacube/orchestrator.py +297 -0
- sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_cube_router.py → sibi_flux-2026.1.2/src/sibi_flux/datacube/router.py +2 -3
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dataset/_dataset.py +1 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/_df_helper.py +2 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/_params.py +6 -6
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_validator/_df_validator.py +5 -0
- sibi_flux-2026.1.2/src/sibi_flux/init/core.py +159 -0
- sibi_flux-2026.1.2/src/sibi_flux/init/discovery_updater.py +99 -0
- sibi_flux-2026.1.2/src/sibi_flux/init/env.py +86 -0
- sibi_flux-2026.1.2/src/sibi_flux/init/env_engine.py +151 -0
- sibi_flux-2026.1.2/src/sibi_flux/init/env_generator.py +554 -0
- sibi_flux-2026.1.2/src/sibi_flux/init/templates/__init__.py +0 -0
- sibi_flux-2026.1.2/src/sibi_flux/init/templates/discovery_params.yaml +45 -0
- sibi_flux-2026.1.2/src/sibi_flux/init/templates/gen_dc.py +137 -0
- sibi_flux-2026.1.2/src/sibi_flux/init/templates/property_template.yaml +10 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/mcp/__init__.py +10 -2
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/mcp/router.py +1 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/osmnx_helper/__init__.py +12 -4
- sibi_flux-2026.1.2/src/sibi_flux/parquet/__init__.py +8 -0
- sibi_flux-2026.1.2/src/sibi_flux/parquet/readers/__init__.py +4 -0
- {sibi_flux-2025.12.0/src/sibi_flux → sibi_flux-2026.1.2/src/sibi_flux/parquet}/readers/base.py +1 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/pipelines/base.py +1 -2
- sibi_flux-2026.1.2/src/sibi_flux/py.typed +0 -0
- sibi_flux-2026.1.2/src/sibi_flux/readers/__init__.py +3 -0
- sibi_flux-2026.1.2/src/sibi_flux/readers/base.py +3 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.2/src/sibi_flux}/storage/_storage_manager.py +4 -4
- {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.2/src/sibi_flux}/storage/factory.py +1 -1
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/__init__.py +1 -2
- sibi_flux-2025.12.0/src/sibi_flux/datacube/config_engine.py +0 -152
- sibi_flux-2025.12.0/src/sibi_flux/datacube/field_factory.py +0 -48
- sibi_flux-2025.12.0/src/sibi_flux/datacube/orchestrator.py +0 -171
- sibi_flux-2025.12.0/src/sibi_flux/readers/__init__.py +0 -3
- sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/__init__.py +0 -19
- sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_data_cube.py +0 -132
- sibi_flux-2025.12.0/src/sibi_flux/utils/credentials/__init__.py +0 -3
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_dst/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/base.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet_engine/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet_engine/manifest.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet_engine/planner.py +0 -0
- /sibi_flux-2025.12.0/src/sibi_flux/utils/credentials/_config_manager.py → /sibi_flux-2026.1.2/src/sibi_flux/config/manager.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/core/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/core/managed_resource/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/core/managed_resource/_managed_resource.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/core/type_maps/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/exceptions.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/datacube/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dataset/__init__.py +0 -0
- /sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/hybrid_data_loader.py → /sibi_flux-2026.1.2/src/sibi_flux/dataset/hybrid_loader.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/async_enricher.py +0 -0
- /sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_attacher.py → /sibi_flux-2026.1.2/src/sibi_flux/df_enricher/attacher.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/merger.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/specs.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/types.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/_strategies.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/http/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/http/_http_config.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/parquet/_parquet_options.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/_defaults.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/_filter_handler.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/_params_config.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/_query_config.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_validator/__init__.py +0 -0
- /sibi_flux-2025.12.0/src/sibi_flux/py.typed → /sibi_flux-2026.1.2/src/sibi_flux/init/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/logger/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/logger/_logger.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/mcp/client.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/orchestration/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/orchestration/_artifact_orchestrator.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/orchestration/_pipeline_executor.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/osmnx_helper/_pbf_handler.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/osmnx_helper/graph_loader.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/osmnx_helper/utils.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux → sibi_flux-2026.1.2/src/sibi_flux/parquet}/readers/parquet.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils/parquet_saver → sibi_flux-2026.1.2/src/sibi_flux/parquet/saver}/__init__.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils/parquet_saver → sibi_flux-2026.1.2/src/sibi_flux/parquet/saver}/_parquet_saver.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils/parquet_saver → sibi_flux-2026.1.2/src/sibi_flux/parquet/saver}/_write_gatekeeper.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/pipelines/__init__.py +0 -0
- /sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_pipeline_template.py → /sibi_flux-2026.1.2/src/sibi_flux/pipelines/template.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.2/src/sibi_flux}/storage/__init__.py +0 -0
- {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.2/src/sibi_flux}/storage/_fs_registry.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/clickhouse_writer/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/clickhouse_writer/_clickhouse_writer.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/common.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/dask_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/data_utils/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/data_utils/_data_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/dataframe_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/date_utils/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/date_utils/_business_days.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/date_utils/_date_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/date_utils/_file_age_checker.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/file_utils.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/filepath_generator/__init__.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/filepath_generator/_filepath_generator.py +0 -0
- {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/retry.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sibi-flux
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2026.1.2
|
|
4
4
|
Summary: Sibi Toolkit: A collection of tools for Data Analysis/Engineering.
|
|
5
5
|
Author: Luis Valverde
|
|
6
6
|
Author-email: Luis Valverde <lvalverdeb@gmail.com>
|
|
@@ -27,6 +27,8 @@ Requires-Dist: httpx>=0.28.1
|
|
|
27
27
|
Requires-Dist: opentelemetry-api>=1.38.0
|
|
28
28
|
Requires-Dist: opentelemetry-exporter-otlp>=1.38.0
|
|
29
29
|
Requires-Dist: opentelemetry-sdk>=1.38.0
|
|
30
|
+
Requires-Dist: deep-translator>=1.11.4
|
|
31
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
30
32
|
Requires-Dist: sibi-flux[distributed,geospatial,mcp] ; extra == 'complete'
|
|
31
33
|
Requires-Dist: distributed>=2025.11.0 ; extra == 'distributed'
|
|
32
34
|
Requires-Dist: osmnx>=2.0.7 ; extra == 'geospatial'
|
|
@@ -281,3 +283,43 @@ async with GenericMcpClient(url="http://localhost:8000/sse") as client:
|
|
|
281
283
|
# Call a tool
|
|
282
284
|
vat = await client.call_tool("calculate_vat", arguments={"amount": 100.0})
|
|
283
285
|
```
|
|
286
|
+
|
|
287
|
+
### 8. Datacube Generation (`gen_dc.py`)
|
|
288
|
+
|
|
289
|
+
Automate the creation of Datacube classes and Field Maps from your database schema.
|
|
290
|
+
|
|
291
|
+
**Configuration (`discovery_params.yaml`)**
|
|
292
|
+
|
|
293
|
+
Define your generation rules in a hierarchical configuration file:
|
|
294
|
+
|
|
295
|
+
```yaml
|
|
296
|
+
defaults:
|
|
297
|
+
backend: sqlalchemy
|
|
298
|
+
class_suffix: Dc
|
|
299
|
+
|
|
300
|
+
discovery:
|
|
301
|
+
whitelist_file: whitelist.yaml
|
|
302
|
+
rules_file: discovery_rules.yaml
|
|
303
|
+
|
|
304
|
+
generation:
|
|
305
|
+
enable_field_maps: true
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
**Commands**
|
|
309
|
+
|
|
310
|
+
1. **Discover**: Introspects the database and updates your whitelist/registry.
|
|
311
|
+
```bash
|
|
312
|
+
uv run poe dc-discover
|
|
313
|
+
```
|
|
314
|
+
* **Whitelist**: Explicitly define tables to generate. Support `custom_name` to override class names.
|
|
315
|
+
* **Rules**: Regex-based patterns to match tables.
|
|
316
|
+
|
|
317
|
+
2. **Sync**: Generates Python code (Datacubes and Field Maps) based on the registry.
|
|
318
|
+
```bash
|
|
319
|
+
uv run poe dc-sync --force
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
**Key Features**
|
|
323
|
+
- **Custom Naming**: Add `custom_name: MyCube` to `whitelist.yaml` to override generated names.
|
|
324
|
+
- **Hierarchical Config**: Strict validation of generation parameters.
|
|
325
|
+
- **Field Maps**: Auto-generates type-safe mapping files for every table.
|
|
@@ -231,3 +231,43 @@ async with GenericMcpClient(url="http://localhost:8000/sse") as client:
|
|
|
231
231
|
# Call a tool
|
|
232
232
|
vat = await client.call_tool("calculate_vat", arguments={"amount": 100.0})
|
|
233
233
|
```
|
|
234
|
+
|
|
235
|
+
### 8. Datacube Generation (`gen_dc.py`)
|
|
236
|
+
|
|
237
|
+
Automate the creation of Datacube classes and Field Maps from your database schema.
|
|
238
|
+
|
|
239
|
+
**Configuration (`discovery_params.yaml`)**
|
|
240
|
+
|
|
241
|
+
Define your generation rules in a hierarchical configuration file:
|
|
242
|
+
|
|
243
|
+
```yaml
|
|
244
|
+
defaults:
|
|
245
|
+
backend: sqlalchemy
|
|
246
|
+
class_suffix: Dc
|
|
247
|
+
|
|
248
|
+
discovery:
|
|
249
|
+
whitelist_file: whitelist.yaml
|
|
250
|
+
rules_file: discovery_rules.yaml
|
|
251
|
+
|
|
252
|
+
generation:
|
|
253
|
+
enable_field_maps: true
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
**Commands**
|
|
257
|
+
|
|
258
|
+
1. **Discover**: Introspects the database and updates your whitelist/registry.
|
|
259
|
+
```bash
|
|
260
|
+
uv run poe dc-discover
|
|
261
|
+
```
|
|
262
|
+
* **Whitelist**: Explicitly define tables to generate. Support `custom_name` to override class names.
|
|
263
|
+
* **Rules**: Regex-based patterns to match tables.
|
|
264
|
+
|
|
265
|
+
2. **Sync**: Generates Python code (Datacubes and Field Maps) based on the registry.
|
|
266
|
+
```bash
|
|
267
|
+
uv run poe dc-sync --force
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
**Key Features**
|
|
271
|
+
- **Custom Naming**: Add `custom_name: MyCube` to `whitelist.yaml` to override generated names.
|
|
272
|
+
- **Hierarchical Config**: Strict validation of generation parameters.
|
|
273
|
+
- **Field Maps**: Auto-generates type-safe mapping files for every table.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sibi-flux"
|
|
3
|
-
version = "
|
|
3
|
+
version = "2026.1.2"
|
|
4
4
|
description = "Sibi Toolkit: A collection of tools for Data Analysis/Engineering."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -33,6 +33,8 @@ dependencies = [
|
|
|
33
33
|
"opentelemetry-api>=1.38.0",
|
|
34
34
|
"opentelemetry-exporter-otlp>=1.38.0",
|
|
35
35
|
"opentelemetry-sdk>=1.38.0",
|
|
36
|
+
"deep-translator>=1.11.4",
|
|
37
|
+
"pyyaml>=6.0.3",
|
|
36
38
|
]
|
|
37
39
|
|
|
38
40
|
[project.optional-dependencies]
|
|
@@ -59,6 +61,9 @@ complete = [
|
|
|
59
61
|
"sibi-flux[distributed,geospatial,mcp]"
|
|
60
62
|
]
|
|
61
63
|
|
|
64
|
+
[project.scripts]
|
|
65
|
+
sibi-flux = "sibi_flux.cli:app"
|
|
66
|
+
|
|
62
67
|
|
|
63
68
|
[dependency-groups]
|
|
64
69
|
dev = [
|
|
@@ -89,6 +94,35 @@ build-backend = "uv_build"
|
|
|
89
94
|
module-root = "src"
|
|
90
95
|
module-name = ["sibi_flux", "sibi_dst"]
|
|
91
96
|
|
|
97
|
+
[tool.uv.workspace]
|
|
98
|
+
members = [
|
|
99
|
+
"test-project-alpha",
|
|
100
|
+
"test-project-beta",
|
|
101
|
+
"test-project-gamma",
|
|
102
|
+
"test-project-delta",
|
|
103
|
+
"test-project-epsilon",
|
|
104
|
+
"test-project-zeta",
|
|
105
|
+
"test-project-eta",
|
|
106
|
+
"test-project-theta",
|
|
107
|
+
"test-project-iota",
|
|
108
|
+
"test-project-kappa",
|
|
109
|
+
"test-project-lambda",
|
|
110
|
+
"test-project-alpha/latest",
|
|
111
|
+
"test-project-mu",
|
|
112
|
+
"test-project-mu/latest",
|
|
113
|
+
"test-project-xi",
|
|
114
|
+
"test-project-xi/latest",
|
|
115
|
+
"test-project-omicron",
|
|
116
|
+
"test-project-omicron/latest",
|
|
117
|
+
"test-project-pi",
|
|
118
|
+
"test-project-pi/latest",
|
|
119
|
+
"test-project-rho",
|
|
120
|
+
"test-project-rho/latest",
|
|
121
|
+
"test-project-sigma",
|
|
122
|
+
"test-project-sigma/latest",
|
|
123
|
+
"test-project-tau",
|
|
124
|
+
]
|
|
125
|
+
|
|
92
126
|
[tool.pytest.ini_options]
|
|
93
127
|
pythonpath = ["src", "."]
|
|
94
128
|
testpaths = ["tests"]
|
|
@@ -97,6 +131,7 @@ filterwarnings = ["ignore::DeprecationWarning"]
|
|
|
97
131
|
|
|
98
132
|
|
|
99
133
|
[tool.poe.tasks]
|
|
134
|
+
publish = { cmd = "uv publish", envfile = ".env" }
|
|
100
135
|
dev = """
|
|
101
136
|
uvicorn solutions.main:app
|
|
102
137
|
--reload
|
|
@@ -108,6 +143,17 @@ uvicorn solutions.main:app
|
|
|
108
143
|
"""
|
|
109
144
|
test = { cmd = "pytest tests/"}
|
|
110
145
|
lint = "black src/"
|
|
146
|
+
build = "uv build"
|
|
147
|
+
dc-sync = "python solutions/generators/datacubes/gen_dc.py sync"
|
|
148
|
+
dc-init = "python solutions/generators/datacubes/gen_dc.py init"
|
|
149
|
+
dc-discover = "python solutions/generators/datacubes/gen_dc.py discover"
|
|
150
|
+
dc-scan = "python solutions/generators/datacubes/gen_dc.py scan"
|
|
151
|
+
dc-match = "python solutions/generators/datacubes/gen_dc.py match"
|
|
152
|
+
dc-map = "python solutions/generators/datacubes/gen_dc.py map"
|
|
153
|
+
|
|
154
|
+
[tool.poe.tasks.release]
|
|
155
|
+
sequence = ["build","publish"]
|
|
156
|
+
envfile = ".env" # Loads the token for the whole sequence
|
|
111
157
|
|
|
112
158
|
[tool.commitizen]
|
|
113
159
|
name = "cz_conventional_commits"
|
|
@@ -21,11 +21,12 @@ from sibi_flux.df_validator._df_validator import DfValidator
|
|
|
21
21
|
|
|
22
22
|
# Artifacts
|
|
23
23
|
from sibi_flux.artifacts import ParquetArtifact, BaseArtifact as Artifact
|
|
24
|
-
from sibi_flux.
|
|
24
|
+
from sibi_flux.parquet import ParquetReader
|
|
25
25
|
|
|
26
26
|
# Utilities (Sub-packages)
|
|
27
27
|
from sibi_flux import dask_cluster
|
|
28
|
-
from sibi_flux.utils import
|
|
28
|
+
from sibi_flux.utils import clickhouse_writer
|
|
29
|
+
from sibi_flux import parquet
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
__all__ = [
|
|
@@ -43,7 +44,6 @@ __all__ = [
|
|
|
43
44
|
"Artifact",
|
|
44
45
|
"ParquetReader",
|
|
45
46
|
"dask_cluster",
|
|
46
|
-
"
|
|
47
|
-
"parquet_saver",
|
|
47
|
+
"parquet",
|
|
48
48
|
"clickhouse_writer",
|
|
49
49
|
]
|
{sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet_engine/executor.py
RENAMED
|
@@ -13,7 +13,7 @@ import functools
|
|
|
13
13
|
from tqdm import tqdm
|
|
14
14
|
import pandas as pd
|
|
15
15
|
from sibi_flux.core import ManagedResource
|
|
16
|
-
from sibi_flux.
|
|
16
|
+
from sibi_flux.parquet import ParquetSaver
|
|
17
17
|
|
|
18
18
|
from sibi_flux.utils import ensure_slash
|
|
19
19
|
from sibi_flux.utils.retry import with_retry
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from sibi_flux.init.core import initialize_project
|
|
6
|
+
|
|
7
|
+
app = typer.Typer(help="Sibi Flux CLI")
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
@app.callback()
|
|
11
|
+
def callback():
|
|
12
|
+
"""
|
|
13
|
+
Sibi Flux CLI
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@app.command()
|
|
17
|
+
def init(
|
|
18
|
+
project_name: str = typer.Argument(..., help="Name of the project to create"),
|
|
19
|
+
lib: bool = typer.Option(False, "--lib", help="Initialize as a library project (passed to uv init)"),
|
|
20
|
+
app: bool = typer.Option(False, "--app", help="Initialize as an application project (passed to uv init)")
|
|
21
|
+
):
|
|
22
|
+
"""
|
|
23
|
+
Initialize a new Sibi Flux project.
|
|
24
|
+
|
|
25
|
+
Creates a new directory <project_name>, initializes it with 'uv',
|
|
26
|
+
and adds 'sibi-flux' as a dependency.
|
|
27
|
+
"""
|
|
28
|
+
initialize_project(project_name, lib, app)
|
|
29
|
+
|
|
30
|
+
@app.command()
|
|
31
|
+
def env(
|
|
32
|
+
project_path: Path = typer.Argument(Path("."), help="Project root directory"),
|
|
33
|
+
env_file: Optional[Path] = typer.Option(None, "--env-file", "-e", help="Path to environment file (defaults to .env)"),
|
|
34
|
+
cleanup: bool = typer.Option(False, "--cleanup", help="Remove existing configuration files"),
|
|
35
|
+
production: bool = typer.Option(False, "--production", "-p", help="Generate production skeleton (no hardcoded values)"),
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
Initialize configuration files (settings.py, credentials) based on .env
|
|
39
|
+
"""
|
|
40
|
+
from sibi_flux.init.env import init_env
|
|
41
|
+
init_env(project_path, env_file, cleanup=cleanup, production_mode=production)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
app()
|
{sibi_flux-2025.12.0/src/sibi_flux/conf → sibi_flux-2026.1.2/src/sibi_flux/config}/settings.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional, Any
|
|
1
|
+
from typing import Optional, Any, ClassVar
|
|
2
2
|
from pydantic import SecretStr
|
|
3
3
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
4
4
|
|
|
@@ -9,6 +9,8 @@ class SibiBaseSettings(BaseSettings):
|
|
|
9
9
|
model_config = SettingsConfigDict(
|
|
10
10
|
env_file=".env", env_file_encoding="utf-8", extra="ignore"
|
|
11
11
|
)
|
|
12
|
+
|
|
13
|
+
conf_name: ClassVar[str] = ""
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class FsSettings(SibiBaseSettings):
|
|
@@ -84,13 +86,8 @@ class DatabaseSettings(SibiBaseSettings):
|
|
|
84
86
|
"""Generic SQL Database settings."""
|
|
85
87
|
|
|
86
88
|
db_url: str = "sqlite:///:memory:"
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
class ClickhouseBaseSettings(SibiBaseSettings):
|
|
90
|
-
"""Base settings for ClickHouse connection."""
|
|
91
|
-
|
|
92
89
|
host: str = "localhost"
|
|
93
|
-
port: int =
|
|
90
|
+
port: int = 5432
|
|
94
91
|
database: str = "default"
|
|
95
92
|
user: str = "default"
|
|
96
93
|
password: SecretStr = SecretStr("secret")
|
|
@@ -102,9 +99,12 @@ class ClickhouseBaseSettings(SibiBaseSettings):
|
|
|
102
99
|
"dbname": self.database,
|
|
103
100
|
"user": self.user,
|
|
104
101
|
"password": self.password.get_secret_value() if self.password else None,
|
|
102
|
+
"db_url": self.db_url,
|
|
105
103
|
}
|
|
106
104
|
|
|
107
105
|
|
|
106
|
+
|
|
107
|
+
|
|
108
108
|
class RedisBaseSettings(SibiBaseSettings):
|
|
109
109
|
"""Base settings for Redis connection."""
|
|
110
110
|
|
|
@@ -23,6 +23,7 @@ import shutil
|
|
|
23
23
|
|
|
24
24
|
try:
|
|
25
25
|
from dask.distributed import Client, LocalCluster, get_client
|
|
26
|
+
|
|
26
27
|
HAS_DISTRIBUTED = True
|
|
27
28
|
except ImportError:
|
|
28
29
|
Client = object
|
|
@@ -231,8 +232,10 @@ class DaskClientMixin:
|
|
|
231
232
|
def _init_dask_client(self, **kwargs) -> None:
|
|
232
233
|
self._init_params = kwargs
|
|
233
234
|
if not HAS_DISTRIBUTED:
|
|
234
|
-
|
|
235
|
-
|
|
235
|
+
self.logger.info(
|
|
236
|
+
"Dask Distributed not installed. Skipping cluster initialization."
|
|
237
|
+
)
|
|
238
|
+
return
|
|
236
239
|
|
|
237
240
|
if kwargs.get("dask_client"):
|
|
238
241
|
self.dask_client = kwargs["dask_client"]
|
|
@@ -14,15 +14,18 @@ from typing import Any, Callable, Dict, List, Optional, TypeVar
|
|
|
14
14
|
import dask
|
|
15
15
|
import dask.dataframe as dd
|
|
16
16
|
import pandas as pd
|
|
17
|
+
|
|
17
18
|
try:
|
|
18
19
|
from dask.distributed import Client, Future
|
|
19
20
|
from dask.distributed import wait as dask_wait
|
|
20
21
|
except ImportError:
|
|
21
22
|
Client = object
|
|
22
23
|
Future = object
|
|
24
|
+
|
|
23
25
|
def dask_wait(*args, **kwargs):
|
|
24
26
|
pass
|
|
25
27
|
|
|
28
|
+
|
|
26
29
|
# Project-specific imports
|
|
27
30
|
from .client_manager import get_persistent_client
|
|
28
31
|
from .exceptions import RECOVERABLE_COMMS
|
|
@@ -175,9 +175,10 @@ class Datacube(DfHelper):
|
|
|
175
175
|
# but we log it for debugging.
|
|
176
176
|
self.logger.debug(f"Schema inference skipped: {e}")
|
|
177
177
|
|
|
178
|
-
def
|
|
178
|
+
def validate_data(self, df: DataFrameType) -> DataFrameType:
|
|
179
179
|
"""
|
|
180
180
|
Runs DfValidator if a schema is configured.
|
|
181
|
+
Overrides BaseDatacube hook.
|
|
181
182
|
"""
|
|
182
183
|
schema = self.config.get("validation_schema")
|
|
183
184
|
if not schema:
|
|
@@ -203,6 +204,14 @@ class Datacube(DfHelper):
|
|
|
203
204
|
|
|
204
205
|
return validator.get_df()
|
|
205
206
|
|
|
207
|
+
async def avalidate_data(self, df: DataFrameType) -> DataFrameType:
|
|
208
|
+
"""
|
|
209
|
+
Asynchronous validation hook.
|
|
210
|
+
Offloads synchronous validation (CPU bound) to a thread.
|
|
211
|
+
"""
|
|
212
|
+
import asyncio
|
|
213
|
+
return await asyncio.to_thread(self.validate_data, df)
|
|
214
|
+
|
|
206
215
|
def get_ddl(self, table_name: Optional[str] = None) -> str:
|
|
207
216
|
"""
|
|
208
217
|
Generates ClickHouse DDL for the current cube.
|
|
@@ -230,7 +239,7 @@ class Datacube(DfHelper):
|
|
|
230
239
|
# 3. Apply Transform Hook
|
|
231
240
|
df = self.fix_data(df, **kwargs)
|
|
232
241
|
# 4. Validate
|
|
233
|
-
df = self.
|
|
242
|
+
df = self.validate_data(df)
|
|
234
243
|
else:
|
|
235
244
|
self.logger.debug(f"No data loaded by {self.__class__.__name__}")
|
|
236
245
|
|
|
@@ -254,7 +263,7 @@ class Datacube(DfHelper):
|
|
|
254
263
|
# 3. Apply Async Transform Hook
|
|
255
264
|
df = await self.afix_data(df, **kwargs)
|
|
256
265
|
# 4. Validate (CPU bound)
|
|
257
|
-
df = await
|
|
266
|
+
df = await self.avalidate_data(df)
|
|
258
267
|
else:
|
|
259
268
|
self.logger.debug(f"No data loaded by {self.__class__.__name__}")
|
|
260
269
|
|