sibi-flux 2025.12.0__tar.gz → 2026.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/PKG-INFO +43 -1
  2. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/README.md +40 -0
  3. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/pyproject.toml +47 -1
  4. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/__init__.py +4 -4
  5. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet_engine/executor.py +1 -1
  6. sibi_flux-2026.1.2/src/sibi_flux/cli.py +45 -0
  7. sibi_flux-2026.1.2/src/sibi_flux/config/__init__.py +3 -0
  8. {sibi_flux-2025.12.0/src/sibi_flux/conf → sibi_flux-2026.1.2/src/sibi_flux/config}/settings.py +7 -7
  9. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/async_core.py +1 -0
  10. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/client_manager.py +5 -2
  11. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/core.py +3 -0
  12. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/datacube/_data_cube.py +12 -3
  13. sibi_flux-2026.1.2/src/sibi_flux/datacube/cli.py +1247 -0
  14. sibi_flux-2026.1.2/src/sibi_flux/datacube/config_engine.py +219 -0
  15. sibi_flux-2026.1.2/src/sibi_flux/datacube/field_factory.py +131 -0
  16. sibi_flux-2026.1.2/src/sibi_flux/datacube/field_mapper.py +243 -0
  17. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/datacube/field_registry.py +2 -0
  18. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/datacube/generator.py +322 -90
  19. sibi_flux-2026.1.2/src/sibi_flux/datacube/orchestrator.py +297 -0
  20. sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_cube_router.py → sibi_flux-2026.1.2/src/sibi_flux/datacube/router.py +2 -3
  21. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dataset/_dataset.py +1 -1
  22. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/_df_helper.py +2 -1
  23. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/_params.py +6 -6
  24. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_validator/_df_validator.py +5 -0
  25. sibi_flux-2026.1.2/src/sibi_flux/init/core.py +159 -0
  26. sibi_flux-2026.1.2/src/sibi_flux/init/discovery_updater.py +99 -0
  27. sibi_flux-2026.1.2/src/sibi_flux/init/env.py +86 -0
  28. sibi_flux-2026.1.2/src/sibi_flux/init/env_engine.py +151 -0
  29. sibi_flux-2026.1.2/src/sibi_flux/init/env_generator.py +554 -0
  30. sibi_flux-2026.1.2/src/sibi_flux/init/templates/__init__.py +0 -0
  31. sibi_flux-2026.1.2/src/sibi_flux/init/templates/discovery_params.yaml +45 -0
  32. sibi_flux-2026.1.2/src/sibi_flux/init/templates/gen_dc.py +137 -0
  33. sibi_flux-2026.1.2/src/sibi_flux/init/templates/property_template.yaml +10 -0
  34. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/mcp/__init__.py +10 -2
  35. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/mcp/router.py +1 -1
  36. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/osmnx_helper/__init__.py +12 -4
  37. sibi_flux-2026.1.2/src/sibi_flux/parquet/__init__.py +8 -0
  38. sibi_flux-2026.1.2/src/sibi_flux/parquet/readers/__init__.py +4 -0
  39. {sibi_flux-2025.12.0/src/sibi_flux → sibi_flux-2026.1.2/src/sibi_flux/parquet}/readers/base.py +1 -1
  40. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/pipelines/base.py +1 -2
  41. sibi_flux-2026.1.2/src/sibi_flux/py.typed +0 -0
  42. sibi_flux-2026.1.2/src/sibi_flux/readers/__init__.py +3 -0
  43. sibi_flux-2026.1.2/src/sibi_flux/readers/base.py +3 -0
  44. {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.2/src/sibi_flux}/storage/_storage_manager.py +4 -4
  45. {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.2/src/sibi_flux}/storage/factory.py +1 -1
  46. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/__init__.py +1 -2
  47. sibi_flux-2025.12.0/src/sibi_flux/datacube/config_engine.py +0 -152
  48. sibi_flux-2025.12.0/src/sibi_flux/datacube/field_factory.py +0 -48
  49. sibi_flux-2025.12.0/src/sibi_flux/datacube/orchestrator.py +0 -171
  50. sibi_flux-2025.12.0/src/sibi_flux/readers/__init__.py +0 -3
  51. sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/__init__.py +0 -19
  52. sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_data_cube.py +0 -132
  53. sibi_flux-2025.12.0/src/sibi_flux/utils/credentials/__init__.py +0 -3
  54. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_dst/__init__.py +0 -0
  55. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/__init__.py +0 -0
  56. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/base.py +0 -0
  57. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet.py +0 -0
  58. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet_engine/__init__.py +0 -0
  59. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet_engine/manifest.py +0 -0
  60. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/artifacts/parquet_engine/planner.py +0 -0
  61. /sibi_flux-2025.12.0/src/sibi_flux/utils/credentials/_config_manager.py → /sibi_flux-2026.1.2/src/sibi_flux/config/manager.py +0 -0
  62. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/core/__init__.py +0 -0
  63. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/core/managed_resource/__init__.py +0 -0
  64. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/core/managed_resource/_managed_resource.py +0 -0
  65. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/core/type_maps/__init__.py +0 -0
  66. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/__init__.py +0 -0
  67. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/exceptions.py +0 -0
  68. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dask_cluster/utils.py +0 -0
  69. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/datacube/__init__.py +0 -0
  70. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/dataset/__init__.py +0 -0
  71. /sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/hybrid_data_loader.py → /sibi_flux-2026.1.2/src/sibi_flux/dataset/hybrid_loader.py +0 -0
  72. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/__init__.py +0 -0
  73. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/async_enricher.py +0 -0
  74. /sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_attacher.py → /sibi_flux-2026.1.2/src/sibi_flux/df_enricher/attacher.py +0 -0
  75. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/merger.py +0 -0
  76. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/specs.py +0 -0
  77. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_enricher/types.py +0 -0
  78. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/__init__.py +0 -0
  79. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/__init__.py +0 -0
  80. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/_strategies.py +0 -0
  81. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/http/__init__.py +0 -0
  82. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/http/_http_config.py +0 -0
  83. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/parquet/__init__.py +0 -0
  84. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/parquet/_parquet_options.py +0 -0
  85. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/__init__.py +0 -0
  86. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  87. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
  88. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  89. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  90. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
  91. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
  92. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/backends/utils.py +0 -0
  93. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/__init__.py +0 -0
  94. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/_defaults.py +0 -0
  95. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/_filter_handler.py +0 -0
  96. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/_params_config.py +0 -0
  97. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_helper/core/_query_config.py +0 -0
  98. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/df_validator/__init__.py +0 -0
  99. /sibi_flux-2025.12.0/src/sibi_flux/py.typed → /sibi_flux-2026.1.2/src/sibi_flux/init/__init__.py +0 -0
  100. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/logger/__init__.py +0 -0
  101. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/logger/_logger.py +0 -0
  102. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/mcp/client.py +0 -0
  103. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/orchestration/__init__.py +0 -0
  104. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/orchestration/_artifact_orchestrator.py +0 -0
  105. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/orchestration/_pipeline_executor.py +0 -0
  106. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/osmnx_helper/_pbf_handler.py +0 -0
  107. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/osmnx_helper/graph_loader.py +0 -0
  108. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/osmnx_helper/utils.py +0 -0
  109. {sibi_flux-2025.12.0/src/sibi_flux → sibi_flux-2026.1.2/src/sibi_flux/parquet}/readers/parquet.py +0 -0
  110. {sibi_flux-2025.12.0/src/sibi_flux/utils/parquet_saver → sibi_flux-2026.1.2/src/sibi_flux/parquet/saver}/__init__.py +0 -0
  111. {sibi_flux-2025.12.0/src/sibi_flux/utils/parquet_saver → sibi_flux-2026.1.2/src/sibi_flux/parquet/saver}/_parquet_saver.py +0 -0
  112. {sibi_flux-2025.12.0/src/sibi_flux/utils/parquet_saver → sibi_flux-2026.1.2/src/sibi_flux/parquet/saver}/_write_gatekeeper.py +0 -0
  113. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/pipelines/__init__.py +0 -0
  114. /sibi_flux-2025.12.0/src/sibi_flux/utils/boilerplate/base_pipeline_template.py → /sibi_flux-2026.1.2/src/sibi_flux/pipelines/template.py +0 -0
  115. {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.2/src/sibi_flux}/storage/__init__.py +0 -0
  116. {sibi_flux-2025.12.0/src/sibi_flux/utils → sibi_flux-2026.1.2/src/sibi_flux}/storage/_fs_registry.py +0 -0
  117. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/clickhouse_writer/__init__.py +0 -0
  118. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/clickhouse_writer/_clickhouse_writer.py +0 -0
  119. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/common.py +0 -0
  120. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/dask_utils.py +0 -0
  121. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/data_utils/__init__.py +0 -0
  122. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/data_utils/_data_utils.py +0 -0
  123. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/dataframe_utils.py +0 -0
  124. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/date_utils/__init__.py +0 -0
  125. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/date_utils/_business_days.py +0 -0
  126. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/date_utils/_date_utils.py +0 -0
  127. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/date_utils/_file_age_checker.py +0 -0
  128. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/file_utils.py +0 -0
  129. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/filepath_generator/__init__.py +0 -0
  130. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/filepath_generator/_filepath_generator.py +0 -0
  131. {sibi_flux-2025.12.0 → sibi_flux-2026.1.2}/src/sibi_flux/utils/retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sibi-flux
3
- Version: 2025.12.0
3
+ Version: 2026.1.2
4
4
  Summary: Sibi Toolkit: A collection of tools for Data Analysis/Engineering.
5
5
  Author: Luis Valverde
6
6
  Author-email: Luis Valverde <lvalverdeb@gmail.com>
@@ -27,6 +27,8 @@ Requires-Dist: httpx>=0.28.1
27
27
  Requires-Dist: opentelemetry-api>=1.38.0
28
28
  Requires-Dist: opentelemetry-exporter-otlp>=1.38.0
29
29
  Requires-Dist: opentelemetry-sdk>=1.38.0
30
+ Requires-Dist: deep-translator>=1.11.4
31
+ Requires-Dist: pyyaml>=6.0.3
30
32
  Requires-Dist: sibi-flux[distributed,geospatial,mcp] ; extra == 'complete'
31
33
  Requires-Dist: distributed>=2025.11.0 ; extra == 'distributed'
32
34
  Requires-Dist: osmnx>=2.0.7 ; extra == 'geospatial'
@@ -281,3 +283,43 @@ async with GenericMcpClient(url="http://localhost:8000/sse") as client:
281
283
  # Call a tool
282
284
  vat = await client.call_tool("calculate_vat", arguments={"amount": 100.0})
283
285
  ```
286
+
287
+ ### 8. Datacube Generation (`gen_dc.py`)
288
+
289
+ Automate the creation of Datacube classes and Field Maps from your database schema.
290
+
291
+ **Configuration (`discovery_params.yaml`)**
292
+
293
+ Define your generation rules in a hierarchical configuration file:
294
+
295
+ ```yaml
296
+ defaults:
297
+ backend: sqlalchemy
298
+ class_suffix: Dc
299
+
300
+ discovery:
301
+ whitelist_file: whitelist.yaml
302
+ rules_file: discovery_rules.yaml
303
+
304
+ generation:
305
+ enable_field_maps: true
306
+ ```
307
+
308
+ **Commands**
309
+
310
+ 1. **Discover**: Introspects the database and updates your whitelist/registry.
311
+ ```bash
312
+ uv run poe dc-discover
313
+ ```
314
+ * **Whitelist**: Explicitly define tables to generate. Support `custom_name` to override class names.
315
+ * **Rules**: Regex-based patterns to match tables.
316
+
317
+ 2. **Sync**: Generates Python code (Datacubes and Field Maps) based on the registry.
318
+ ```bash
319
+ uv run poe dc-sync --force
320
+ ```
321
+
322
+ **Key Features**
323
+ - **Custom Naming**: Add `custom_name: MyCube` to `whitelist.yaml` to override generated names.
324
+ - **Hierarchical Config**: Strict validation of generation parameters.
325
+ - **Field Maps**: Auto-generates type-safe mapping files for every table.
@@ -231,3 +231,43 @@ async with GenericMcpClient(url="http://localhost:8000/sse") as client:
231
231
  # Call a tool
232
232
  vat = await client.call_tool("calculate_vat", arguments={"amount": 100.0})
233
233
  ```
234
+
235
+ ### 8. Datacube Generation (`gen_dc.py`)
236
+
237
+ Automate the creation of Datacube classes and Field Maps from your database schema.
238
+
239
+ **Configuration (`discovery_params.yaml`)**
240
+
241
+ Define your generation rules in a hierarchical configuration file:
242
+
243
+ ```yaml
244
+ defaults:
245
+ backend: sqlalchemy
246
+ class_suffix: Dc
247
+
248
+ discovery:
249
+ whitelist_file: whitelist.yaml
250
+ rules_file: discovery_rules.yaml
251
+
252
+ generation:
253
+ enable_field_maps: true
254
+ ```
255
+
256
+ **Commands**
257
+
258
+ 1. **Discover**: Introspects the database and updates your whitelist/registry.
259
+ ```bash
260
+ uv run poe dc-discover
261
+ ```
262
+ * **Whitelist**: Explicitly define tables to generate. Support `custom_name` to override class names.
263
+ * **Rules**: Regex-based patterns to match tables.
264
+
265
+ 2. **Sync**: Generates Python code (Datacubes and Field Maps) based on the registry.
266
+ ```bash
267
+ uv run poe dc-sync --force
268
+ ```
269
+
270
+ **Key Features**
271
+ - **Custom Naming**: Add `custom_name: MyCube` to `whitelist.yaml` to override generated names.
272
+ - **Hierarchical Config**: Strict validation of generation parameters.
273
+ - **Field Maps**: Auto-generates type-safe mapping files for every table.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sibi-flux"
3
- version = "2025.12.0"
3
+ version = "2026.1.2"
4
4
  description = "Sibi Toolkit: A collection of tools for Data Analysis/Engineering."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -33,6 +33,8 @@ dependencies = [
33
33
  "opentelemetry-api>=1.38.0",
34
34
  "opentelemetry-exporter-otlp>=1.38.0",
35
35
  "opentelemetry-sdk>=1.38.0",
36
+ "deep-translator>=1.11.4",
37
+ "pyyaml>=6.0.3",
36
38
  ]
37
39
 
38
40
  [project.optional-dependencies]
@@ -59,6 +61,9 @@ complete = [
59
61
  "sibi-flux[distributed,geospatial,mcp]"
60
62
  ]
61
63
 
64
+ [project.scripts]
65
+ sibi-flux = "sibi_flux.cli:app"
66
+
62
67
 
63
68
  [dependency-groups]
64
69
  dev = [
@@ -89,6 +94,35 @@ build-backend = "uv_build"
89
94
  module-root = "src"
90
95
  module-name = ["sibi_flux", "sibi_dst"]
91
96
 
97
+ [tool.uv.workspace]
98
+ members = [
99
+ "test-project-alpha",
100
+ "test-project-beta",
101
+ "test-project-gamma",
102
+ "test-project-delta",
103
+ "test-project-epsilon",
104
+ "test-project-zeta",
105
+ "test-project-eta",
106
+ "test-project-theta",
107
+ "test-project-iota",
108
+ "test-project-kappa",
109
+ "test-project-lambda",
110
+ "test-project-alpha/latest",
111
+ "test-project-mu",
112
+ "test-project-mu/latest",
113
+ "test-project-xi",
114
+ "test-project-xi/latest",
115
+ "test-project-omicron",
116
+ "test-project-omicron/latest",
117
+ "test-project-pi",
118
+ "test-project-pi/latest",
119
+ "test-project-rho",
120
+ "test-project-rho/latest",
121
+ "test-project-sigma",
122
+ "test-project-sigma/latest",
123
+ "test-project-tau",
124
+ ]
125
+
92
126
  [tool.pytest.ini_options]
93
127
  pythonpath = ["src", "."]
94
128
  testpaths = ["tests"]
@@ -97,6 +131,7 @@ filterwarnings = ["ignore::DeprecationWarning"]
97
131
 
98
132
 
99
133
  [tool.poe.tasks]
134
+ publish = { cmd = "uv publish", envfile = ".env" }
100
135
  dev = """
101
136
  uvicorn solutions.main:app
102
137
  --reload
@@ -108,6 +143,17 @@ uvicorn solutions.main:app
108
143
  """
109
144
  test = { cmd = "pytest tests/"}
110
145
  lint = "black src/"
146
+ build = "uv build"
147
+ dc-sync = "python solutions/generators/datacubes/gen_dc.py sync"
148
+ dc-init = "python solutions/generators/datacubes/gen_dc.py init"
149
+ dc-discover = "python solutions/generators/datacubes/gen_dc.py discover"
150
+ dc-scan = "python solutions/generators/datacubes/gen_dc.py scan"
151
+ dc-match = "python solutions/generators/datacubes/gen_dc.py match"
152
+ dc-map = "python solutions/generators/datacubes/gen_dc.py map"
153
+
154
+ [tool.poe.tasks.release]
155
+ sequence = ["build","publish"]
156
+ envfile = ".env" # Loads the token for the whole sequence
111
157
 
112
158
  [tool.commitizen]
113
159
  name = "cz_conventional_commits"
@@ -21,11 +21,12 @@ from sibi_flux.df_validator._df_validator import DfValidator
21
21
 
22
22
  # Artifacts
23
23
  from sibi_flux.artifacts import ParquetArtifact, BaseArtifact as Artifact
24
- from sibi_flux.readers.parquet import ParquetReader
24
+ from sibi_flux.parquet import ParquetReader
25
25
 
26
26
  # Utilities (Sub-packages)
27
27
  from sibi_flux import dask_cluster
28
- from sibi_flux.utils import boilerplate, parquet_saver, clickhouse_writer
28
+ from sibi_flux.utils import clickhouse_writer
29
+ from sibi_flux import parquet
29
30
 
30
31
 
31
32
  __all__ = [
@@ -43,7 +44,6 @@ __all__ = [
43
44
  "Artifact",
44
45
  "ParquetReader",
45
46
  "dask_cluster",
46
- "boilerplate",
47
- "parquet_saver",
47
+ "parquet",
48
48
  "clickhouse_writer",
49
49
  ]
@@ -13,7 +13,7 @@ import functools
13
13
  from tqdm import tqdm
14
14
  import pandas as pd
15
15
  from sibi_flux.core import ManagedResource
16
- from sibi_flux.utils.parquet_saver import ParquetSaver
16
+ from sibi_flux.parquet import ParquetSaver
17
17
 
18
18
  from sibi_flux.utils import ensure_slash
19
19
  from sibi_flux.utils.retry import with_retry
@@ -0,0 +1,45 @@
1
+ import typer
2
+ from typing import Optional
3
+ from pathlib import Path
4
+ from rich.console import Console
5
+ from sibi_flux.init.core import initialize_project
6
+
7
+ app = typer.Typer(help="Sibi Flux CLI")
8
+ console = Console()
9
+
10
+ @app.callback()
11
+ def callback():
12
+ """
13
+ Sibi Flux CLI
14
+ """
15
+
16
+ @app.command()
17
+ def init(
18
+ project_name: str = typer.Argument(..., help="Name of the project to create"),
19
+ lib: bool = typer.Option(False, "--lib", help="Initialize as a library project (passed to uv init)"),
20
+ app: bool = typer.Option(False, "--app", help="Initialize as an application project (passed to uv init)")
21
+ ):
22
+ """
23
+ Initialize a new Sibi Flux project.
24
+
25
+ Creates a new directory <project_name>, initializes it with 'uv',
26
+ and adds 'sibi-flux' as a dependency.
27
+ """
28
+ initialize_project(project_name, lib, app)
29
+
30
+ @app.command()
31
+ def env(
32
+ project_path: Path = typer.Argument(Path("."), help="Project root directory"),
33
+ env_file: Optional[Path] = typer.Option(None, "--env-file", "-e", help="Path to environment file (defaults to .env)"),
34
+ cleanup: bool = typer.Option(False, "--cleanup", help="Remove existing configuration files"),
35
+ production: bool = typer.Option(False, "--production", "-p", help="Generate production skeleton (no hardcoded values)"),
36
+ ):
37
+ """
38
+ Initialize configuration files (settings.py, credentials) based on .env
39
+ """
40
+ from sibi_flux.init.env import init_env
41
+ init_env(project_path, env_file, cleanup=cleanup, production_mode=production)
42
+
43
+
44
+ if __name__ == "__main__":
45
+ app()
@@ -0,0 +1,3 @@
1
+ from .manager import ConfigManager
2
+
3
+ __all__ = ["ConfigManager"]
@@ -1,4 +1,4 @@
1
- from typing import Optional, Any
1
+ from typing import Optional, Any, ClassVar
2
2
  from pydantic import SecretStr
3
3
  from pydantic_settings import BaseSettings, SettingsConfigDict
4
4
 
@@ -9,6 +9,8 @@ class SibiBaseSettings(BaseSettings):
9
9
  model_config = SettingsConfigDict(
10
10
  env_file=".env", env_file_encoding="utf-8", extra="ignore"
11
11
  )
12
+
13
+ conf_name: ClassVar[str] = ""
12
14
 
13
15
 
14
16
  class FsSettings(SibiBaseSettings):
@@ -84,13 +86,8 @@ class DatabaseSettings(SibiBaseSettings):
84
86
  """Generic SQL Database settings."""
85
87
 
86
88
  db_url: str = "sqlite:///:memory:"
87
-
88
-
89
- class ClickhouseBaseSettings(SibiBaseSettings):
90
- """Base settings for ClickHouse connection."""
91
-
92
89
  host: str = "localhost"
93
- port: int = 8123
90
+ port: int = 5432
94
91
  database: str = "default"
95
92
  user: str = "default"
96
93
  password: SecretStr = SecretStr("secret")
@@ -102,9 +99,12 @@ class ClickhouseBaseSettings(SibiBaseSettings):
102
99
  "dbname": self.database,
103
100
  "user": self.user,
104
101
  "password": self.password.get_secret_value() if self.password else None,
102
+ "db_url": self.db_url,
105
103
  }
106
104
 
107
105
 
106
+
107
+
108
108
  class RedisBaseSettings(SibiBaseSettings):
109
109
  """Base settings for Redis connection."""
110
110
 
@@ -4,6 +4,7 @@ Async utilities for Dask operations.
4
4
 
5
5
  from typing import Any, Optional
6
6
  import asyncio
7
+
7
8
  try:
8
9
  from dask.distributed import Client
9
10
  except ImportError:
@@ -23,6 +23,7 @@ import shutil
23
23
 
24
24
  try:
25
25
  from dask.distributed import Client, LocalCluster, get_client
26
+
26
27
  HAS_DISTRIBUTED = True
27
28
  except ImportError:
28
29
  Client = object
@@ -231,8 +232,10 @@ class DaskClientMixin:
231
232
  def _init_dask_client(self, **kwargs) -> None:
232
233
  self._init_params = kwargs
233
234
  if not HAS_DISTRIBUTED:
234
- self.logger.info("Dask Distributed not installed. Skipping cluster initialization.")
235
- return
235
+ self.logger.info(
236
+ "Dask Distributed not installed. Skipping cluster initialization."
237
+ )
238
+ return
236
239
 
237
240
  if kwargs.get("dask_client"):
238
241
  self.dask_client = kwargs["dask_client"]
@@ -14,15 +14,18 @@ from typing import Any, Callable, Dict, List, Optional, TypeVar
14
14
  import dask
15
15
  import dask.dataframe as dd
16
16
  import pandas as pd
17
+
17
18
  try:
18
19
  from dask.distributed import Client, Future
19
20
  from dask.distributed import wait as dask_wait
20
21
  except ImportError:
21
22
  Client = object
22
23
  Future = object
24
+
23
25
  def dask_wait(*args, **kwargs):
24
26
  pass
25
27
 
28
+
26
29
  # Project-specific imports
27
30
  from .client_manager import get_persistent_client
28
31
  from .exceptions import RECOVERABLE_COMMS
@@ -175,9 +175,10 @@ class Datacube(DfHelper):
175
175
  # but we log it for debugging.
176
176
  self.logger.debug(f"Schema inference skipped: {e}")
177
177
 
178
- def _validate(self, df: DataFrameType) -> DataFrameType:
178
+ def validate_data(self, df: DataFrameType) -> DataFrameType:
179
179
  """
180
180
  Runs DfValidator if a schema is configured.
181
+ Overrides BaseDatacube hook.
181
182
  """
182
183
  schema = self.config.get("validation_schema")
183
184
  if not schema:
@@ -203,6 +204,14 @@ class Datacube(DfHelper):
203
204
 
204
205
  return validator.get_df()
205
206
 
207
+ async def avalidate_data(self, df: DataFrameType) -> DataFrameType:
208
+ """
209
+ Asynchronous validation hook.
210
+ Offloads synchronous validation (CPU bound) to a thread.
211
+ """
212
+ import asyncio
213
+ return await asyncio.to_thread(self.validate_data, df)
214
+
206
215
  def get_ddl(self, table_name: Optional[str] = None) -> str:
207
216
  """
208
217
  Generates ClickHouse DDL for the current cube.
@@ -230,7 +239,7 @@ class Datacube(DfHelper):
230
239
  # 3. Apply Transform Hook
231
240
  df = self.fix_data(df, **kwargs)
232
241
  # 4. Validate
233
- df = self._validate(df)
242
+ df = self.validate_data(df)
234
243
  else:
235
244
  self.logger.debug(f"No data loaded by {self.__class__.__name__}")
236
245
 
@@ -254,7 +263,7 @@ class Datacube(DfHelper):
254
263
  # 3. Apply Async Transform Hook
255
264
  df = await self.afix_data(df, **kwargs)
256
265
  # 4. Validate (CPU bound)
257
- df = await asyncio.to_thread(self._validate, df)
266
+ df = await self.avalidate_data(df)
258
267
  else:
259
268
  self.logger.debug(f"No data loaded by {self.__class__.__name__}")
260
269