sibi-flux 2026.1.8__tar.gz → 2026.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/PKG-INFO +1 -1
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/pyproject.toml +2 -1
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/cli.py +156 -60
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/config_engine.py +9 -1
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/generator.py +26 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/cube_proposer.py +7 -3
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/README.md +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_dst/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/base.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/executor.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/manifest.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/planner.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/cli.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/config/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/config/manager.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/config/settings.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/core/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/core/managed_resource/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/core/managed_resource/_managed_resource.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/core/type_maps/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/async_core.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/client_manager.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/core.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/exceptions.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/utils.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/_data_cube.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/field_factory.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/field_mapper.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/field_registry.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/orchestrator.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/router.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dataset/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dataset/_dataset.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dataset/hybrid_loader.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/async_enricher.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/attacher.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/merger.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/specs.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/types.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/_df_helper.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/_params.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/_strategies.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/http/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/http/_http_config.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/parquet/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/parquet/_parquet_options.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/utils.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/_defaults.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/_filter_handler.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/_params_config.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/_query_config.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_validator/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_validator/_df_validator.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/app.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/core.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/cube_extender.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/discovery_updater.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/env.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/env_engine.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/env_generator.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/rule_generator.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/discovery_params.yaml +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/gen_dc.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/property_template.yaml +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/logger/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/logger/_logger.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/mcp/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/mcp/client.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/mcp/router.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/orchestration/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/orchestration/_artifact_orchestrator.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/orchestration/_pipeline_executor.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/osmnx_helper/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/osmnx_helper/_pbf_handler.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/osmnx_helper/graph_loader.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/osmnx_helper/utils.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/readers/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/readers/base.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/readers/parquet.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/saver/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/saver/_parquet_saver.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/saver/_write_gatekeeper.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/pipelines/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/pipelines/base.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/pipelines/template.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/py.typed +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/readers/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/readers/base.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/storage/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/storage/_fs_registry.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/storage/_storage_manager.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/storage/factory.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/clickhouse_writer/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/clickhouse_writer/_clickhouse_writer.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/common.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/dask_utils.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/data_utils/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/data_utils/_data_utils.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/dataframe_utils.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/date_utils/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/date_utils/_business_days.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/date_utils/_date_utils.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/date_utils/_file_age_checker.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/file_utils.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/filepath_generator/__init__.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/filepath_generator/_filepath_generator.py +0 -0
- {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/retry.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sibi-flux"
|
|
3
|
-
version = "2026.1.
|
|
3
|
+
version = "2026.1.10"
|
|
4
4
|
description = "Sibi Toolkit: A collection of tools for Data Analysis/Engineering."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -97,6 +97,7 @@ members = [
|
|
|
97
97
|
"test-prj",
|
|
98
98
|
"test_prj",
|
|
99
99
|
"test_prj_verify",
|
|
100
|
+
"clean-test-prj",
|
|
100
101
|
]
|
|
101
102
|
|
|
102
103
|
[tool.pytest.ini_options]
|
|
@@ -203,7 +203,7 @@ def set_context_defaults(
|
|
|
203
203
|
|
|
204
204
|
|
|
205
205
|
def _get_db_url_callback(
|
|
206
|
-
registry: DatacubeRegistry, db_url_map: Optional[str]
|
|
206
|
+
registry: DatacubeRegistry, db_url_map: Optional[str], params: Optional[Dict[str, Any]] = None
|
|
207
207
|
) -> Callable[[str], str]:
|
|
208
208
|
"""Helper to create a callback that resolves DB URLs from CLI overrides or registry."""
|
|
209
209
|
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
@@ -212,8 +212,24 @@ def _get_db_url_callback(
|
|
|
212
212
|
# 1. CLI Override
|
|
213
213
|
if conf_name in cli_urls:
|
|
214
214
|
return cli_urls[conf_name]
|
|
215
|
+
|
|
215
216
|
# 2. Dynamic Resolution
|
|
216
|
-
|
|
217
|
+
imports = registry.global_imports
|
|
218
|
+
|
|
219
|
+
# Check specific import_spec from params
|
|
220
|
+
if params and "databases" in params:
|
|
221
|
+
for db in params.get("databases", []):
|
|
222
|
+
ref = db.get("connection_ref") or db.get("connection_obj")
|
|
223
|
+
if ref == conf_name:
|
|
224
|
+
spec = db.get("import_spec")
|
|
225
|
+
if spec and isinstance(spec, dict):
|
|
226
|
+
# Prepend specific module
|
|
227
|
+
imports = [spec.get("module")] + imports
|
|
228
|
+
elif db.get("global_import"):
|
|
229
|
+
imports = [db.get("global_import")] + imports
|
|
230
|
+
break
|
|
231
|
+
|
|
232
|
+
url = resolve_db_url(conf_name, imports)
|
|
217
233
|
if url:
|
|
218
234
|
return url
|
|
219
235
|
raise ValueError(
|
|
@@ -283,9 +299,16 @@ def sync(
|
|
|
283
299
|
|
|
284
300
|
flat_tables = {}
|
|
285
301
|
for grp, tbls in existing_reg_data.items():
|
|
286
|
-
if isinstance(tbls, dict):
|
|
302
|
+
if grp == "tables" and isinstance(tbls, dict):
|
|
303
|
+
# Modern format: tables are in 'tables' dict
|
|
287
304
|
for t, t_meta in tbls.items():
|
|
288
|
-
|
|
305
|
+
flat_tables[t] = t_meta
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
if isinstance(tbls, dict) and grp != "global_imports":
|
|
309
|
+
# Legacy Scoped Format (grp is connection_obj)
|
|
310
|
+
for t, t_meta in tbls.items():
|
|
311
|
+
# Inject/Overwrite connection_obj only if implied by scope
|
|
289
312
|
t_meta["connection_obj"] = grp
|
|
290
313
|
flat_tables[t] = t_meta
|
|
291
314
|
|
|
@@ -295,6 +318,11 @@ def sync(
|
|
|
295
318
|
console.print(f"[yellow]Warning: Could not load existing registry: {e}[/yellow]")
|
|
296
319
|
|
|
297
320
|
registry = DatacubeRegistry(config_data, params=context.params)
|
|
321
|
+
|
|
322
|
+
# Fix 2 Re-applied: Ensure global_imports are populated from params if registry loaded from file didn't have them
|
|
323
|
+
# This is critical for Clean Project + Force flows where registry might be partial or JIT
|
|
324
|
+
if not registry.global_imports and context.params.get("global_imports"):
|
|
325
|
+
registry.global_imports = context.params.get("global_imports")
|
|
298
326
|
|
|
299
327
|
# --- Aggregation Phase ---
|
|
300
328
|
# --- Aggregation Phase ---
|
|
@@ -453,7 +481,7 @@ def sync(
|
|
|
453
481
|
registry.valid_paths = list(valid_paths)
|
|
454
482
|
registry.valid_fieldmap_paths = context.valid_fieldmap_paths
|
|
455
483
|
|
|
456
|
-
get_url = _get_db_url_callback(registry, db_url_map)
|
|
484
|
+
get_url = _get_db_url_callback(registry, db_url_map, params=context.params)
|
|
457
485
|
|
|
458
486
|
# Group tables by target file
|
|
459
487
|
file_groups = registry.group_tables_by_file()
|
|
@@ -770,8 +798,22 @@ def discover(
|
|
|
770
798
|
if not wl_filename:
|
|
771
799
|
# Default convention: discovery_whitelist_<db_name>.yaml
|
|
772
800
|
wl_filename = f"discovery_whitelist_{conn_obj}.yaml"
|
|
801
|
+
|
|
773
802
|
whitelist_path = config_path.parent / wl_filename
|
|
774
803
|
|
|
804
|
+
# Fallback 1: Auto-generated per-db (from scan/map fallback logic)
|
|
805
|
+
if not whitelist_path.exists():
|
|
806
|
+
fb = config_path.parent / f"discovery_whitelist_{conn_obj}.yaml"
|
|
807
|
+
if fb.exists():
|
|
808
|
+
whitelist_path = fb
|
|
809
|
+
|
|
810
|
+
# Fallback 2: Global whitelist.yaml (Generated by 'whitelist' command)
|
|
811
|
+
if not whitelist_path.exists():
|
|
812
|
+
global_wl = config_path.parent / "whitelist.yaml"
|
|
813
|
+
if global_wl.exists():
|
|
814
|
+
console.print(f"[dim]Fallback: Using global {global_wl.name} for {db_name}[/dim]")
|
|
815
|
+
whitelist_path = global_wl
|
|
816
|
+
|
|
775
817
|
# Determine rules path
|
|
776
818
|
rules_filename = db_config.get("rules_file")
|
|
777
819
|
if not rules_filename:
|
|
@@ -1015,20 +1057,31 @@ def scan(
|
|
|
1015
1057
|
|
|
1016
1058
|
# Resolve global output file
|
|
1017
1059
|
# First check nested discovery block (standard), then root (fallback)
|
|
1018
|
-
|
|
1060
|
+
# Resolve global output file
|
|
1061
|
+
# First check nested discovery block (standard), then root (fallback)
|
|
1062
|
+
discovery_conf = params.get("paths", {}).get("discovery") or params.get("discovery", {})
|
|
1019
1063
|
global_tables_file = (
|
|
1020
1064
|
discovery_conf.get("all_tables_file")
|
|
1021
1065
|
or params.get("all_tables_file")
|
|
1022
|
-
or "all_tables.yaml"
|
|
1066
|
+
or "dataobjects/globals/all_tables.yaml"
|
|
1023
1067
|
)
|
|
1024
1068
|
|
|
1025
1069
|
# If using absolute path (resolved from context), use it directly
|
|
1026
|
-
# Otherwise treat as relative to config file location
|
|
1070
|
+
# Otherwise treat as relative to PROJECT ROOT (not config file location)
|
|
1027
1071
|
path_obj = Path(global_tables_file)
|
|
1028
1072
|
if path_obj.is_absolute():
|
|
1029
1073
|
global_tables_path = path_obj
|
|
1030
1074
|
else:
|
|
1031
|
-
|
|
1075
|
+
try:
|
|
1076
|
+
# Heuristic: config is in generators/datacubes
|
|
1077
|
+
project_root = config_path.parent.parent.parent
|
|
1078
|
+
except Exception:
|
|
1079
|
+
project_root = Path.cwd()
|
|
1080
|
+
global_tables_path = project_root / global_tables_file
|
|
1081
|
+
|
|
1082
|
+
# Ensure dir exists
|
|
1083
|
+
if not global_tables_path.parent.exists():
|
|
1084
|
+
global_tables_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1032
1085
|
|
|
1033
1086
|
# Load existing data to preserve config for DBs not being scanned
|
|
1034
1087
|
all_tables_data = {}
|
|
@@ -1102,7 +1155,7 @@ def drift(
|
|
|
1102
1155
|
config_data = _load_and_resolve_config(config_path)
|
|
1103
1156
|
|
|
1104
1157
|
registry = DatacubeRegistry(config_data)
|
|
1105
|
-
get_url = _get_db_url_callback(registry, db_url_map)
|
|
1158
|
+
get_url = _get_db_url_callback(registry, db_url_map, params=config_data)
|
|
1106
1159
|
cli_urls = json.loads(db_url_map) if db_url_map else {}
|
|
1107
1160
|
|
|
1108
1161
|
drift_table = Table(title="Schema Drift Analysis")
|
|
@@ -1203,6 +1256,8 @@ def propose_rules(
|
|
|
1203
1256
|
dry_run: bool = typer.Option(
|
|
1204
1257
|
False, "--dry-run", help="Preview rules without saving"
|
|
1205
1258
|
),
|
|
1259
|
+
conf_override: Optional[str] = typer.Option(None, "--rules", help="Override rules file"),
|
|
1260
|
+
force: bool = typer.Option(False, "--force", "-f", help="Force overwrite existing rules"),
|
|
1206
1261
|
):
|
|
1207
1262
|
"""
|
|
1208
1263
|
Analyzes all_tables.yaml and proposes new discovery rules.
|
|
@@ -1219,23 +1274,35 @@ def propose_rules(
|
|
|
1219
1274
|
# The resolved config will have absolute paths for these if the helper worked.
|
|
1220
1275
|
params = context.params
|
|
1221
1276
|
|
|
1222
|
-
# Prefer resolved values if available
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1277
|
+
# Prefer resolved values if available, but check nested 'paths' structure first
|
|
1278
|
+
# Structure: paths -> discovery -> all_tables_file
|
|
1279
|
+
discovery_conf = params.get("paths", {}).get("discovery") or params.get("discovery", {})
|
|
1280
|
+
all_tables_file = (
|
|
1281
|
+
discovery_conf.get("all_tables_file")
|
|
1282
|
+
or params.get("all_tables_file")
|
|
1283
|
+
or "dataobjects/globals/all_tables.yaml"
|
|
1284
|
+
)
|
|
1285
|
+
|
|
1286
|
+
path_obj = Path(all_tables_file)
|
|
1287
|
+
if path_obj.is_absolute():
|
|
1288
|
+
all_tables_path = path_obj
|
|
1228
1289
|
else:
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1290
|
+
try:
|
|
1291
|
+
# Heuristic: config is in generators/datacubes
|
|
1292
|
+
project_root = config_path.parent.parent.parent
|
|
1293
|
+
except Exception:
|
|
1294
|
+
project_root = Path.cwd()
|
|
1295
|
+
all_tables_path = project_root / all_tables_file
|
|
1296
|
+
|
|
1297
|
+
# Verify Existence
|
|
1298
|
+
if not all_tables_path.exists():
|
|
1299
|
+
console.print(f"[red]Error: {all_tables_path} not found. Run 'dc scan' first.[/red]")
|
|
1300
|
+
raise typer.Exit(code=1)
|
|
1236
1301
|
|
|
1237
1302
|
# Rules File
|
|
1238
|
-
if
|
|
1303
|
+
if conf_override:
|
|
1304
|
+
rules_path = Path(conf_override)
|
|
1305
|
+
elif "discovery" in resolved_config and "rules_file" in resolved_config["discovery"]:
|
|
1239
1306
|
rules_path = Path(resolved_config["discovery"]["rules_file"])
|
|
1240
1307
|
else:
|
|
1241
1308
|
raw_rules = (
|
|
@@ -1245,11 +1312,12 @@ def propose_rules(
|
|
|
1245
1312
|
)
|
|
1246
1313
|
rules_path = config_path.parent / raw_rules
|
|
1247
1314
|
|
|
1248
|
-
if
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
)
|
|
1252
|
-
|
|
1315
|
+
# Guard: Require Force if Rules Exist
|
|
1316
|
+
if rules_path.exists() and not force and not dry_run:
|
|
1317
|
+
console.print(f"[yellow]Rules file exists at {rules_path}.[/yellow]")
|
|
1318
|
+
console.print("[yellow]Skipping proposal generation to protect manual edits.[/yellow]")
|
|
1319
|
+
console.print("[dim]Use --force to overwrite/update rules.[/dim]")
|
|
1320
|
+
return
|
|
1253
1321
|
|
|
1254
1322
|
engine = RuleEngine(all_tables_path, rules_path)
|
|
1255
1323
|
engine.load()
|
|
@@ -1554,7 +1622,7 @@ def workflow(
|
|
|
1554
1622
|
# Step 0: Scan & Propose Rules (Critical for new projects/drift)
|
|
1555
1623
|
console.print(f"\n[bold cyan]Step 0a: Scan (Introspection)[/]")
|
|
1556
1624
|
scan(
|
|
1557
|
-
config_file=config_file,
|
|
1625
|
+
config_file=config_file if config_file else None,
|
|
1558
1626
|
db_url_map=db_url_map,
|
|
1559
1627
|
env_file=env_file,
|
|
1560
1628
|
db_name=db_name,
|
|
@@ -1562,8 +1630,19 @@ def workflow(
|
|
|
1562
1630
|
|
|
1563
1631
|
console.print(f"\n[bold cyan]Step 0b: Propose Rules[/]")
|
|
1564
1632
|
propose_rules(
|
|
1633
|
+
config_file=config_file if config_file else None,
|
|
1634
|
+
dry_run=dry_run,
|
|
1635
|
+
conf_override=None,
|
|
1636
|
+
force=force,
|
|
1637
|
+
)
|
|
1638
|
+
|
|
1639
|
+
console.print(f"\n[bold cyan]Step 0c: Generate Whitelist[/]")
|
|
1640
|
+
whitelist(
|
|
1565
1641
|
config_file=config_file,
|
|
1566
|
-
|
|
1642
|
+
db_name=db_name,
|
|
1643
|
+
db_url_map=db_url_map,
|
|
1644
|
+
env_file=env_file,
|
|
1645
|
+
force=force,
|
|
1567
1646
|
)
|
|
1568
1647
|
|
|
1569
1648
|
# Step 1: Discover
|
|
@@ -1582,17 +1661,10 @@ def workflow(
|
|
|
1582
1661
|
force=False, # Discovery force is about fields usually?
|
|
1583
1662
|
)
|
|
1584
1663
|
|
|
1585
|
-
# Step 2: Map
|
|
1586
|
-
console.print(f"\n[bold cyan]Step 2: Map Generation[/]")
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
db_url_map=db_url_map,
|
|
1590
|
-
env_file=env_file,
|
|
1591
|
-
db_name=db_name,
|
|
1592
|
-
force=force,
|
|
1593
|
-
)
|
|
1594
|
-
|
|
1595
|
-
# Step 3: Sync
|
|
1664
|
+
# Step 2: Map (Skipped - Handled by Sync)
|
|
1665
|
+
# console.print(f"\n[bold cyan]Step 2: Map Generation (Integrated into Sync)[/]")
|
|
1666
|
+
|
|
1667
|
+
# Step 3: Sync -> Generates Code + Field Maps
|
|
1596
1668
|
console.print(f"\n[bold cyan]Step 3: Sync (Code Generation)[/]")
|
|
1597
1669
|
sync(
|
|
1598
1670
|
config_file=config_file,
|
|
@@ -1892,8 +1964,38 @@ def whitelist(
|
|
|
1892
1964
|
)
|
|
1893
1965
|
continue
|
|
1894
1966
|
|
|
1895
|
-
|
|
1896
|
-
tables =
|
|
1967
|
+
# 0. Try to load tables from all_tables.yaml (Scan Output)
|
|
1968
|
+
tables = []
|
|
1969
|
+
all_tables_file = (
|
|
1970
|
+
params.get("paths", {}).get("discovery", {}).get("all_tables_file")
|
|
1971
|
+
or params.get("all_tables_file")
|
|
1972
|
+
or "dataobjects/globals/all_tables.yaml"
|
|
1973
|
+
)
|
|
1974
|
+
# Resolve path
|
|
1975
|
+
if not Path(all_tables_file).is_absolute():
|
|
1976
|
+
try:
|
|
1977
|
+
prj_root = config_path.parent.parent.parent
|
|
1978
|
+
except Exception:
|
|
1979
|
+
prj_root = Path.cwd()
|
|
1980
|
+
all_tables_path = prj_root / all_tables_file
|
|
1981
|
+
else:
|
|
1982
|
+
all_tables_path = Path(all_tables_file)
|
|
1983
|
+
|
|
1984
|
+
if all_tables_path.exists():
|
|
1985
|
+
try:
|
|
1986
|
+
with open(all_tables_path, "r") as f:
|
|
1987
|
+
all_tables_data = yaml.safe_load(f) or {}
|
|
1988
|
+
tables = all_tables_data.get(conn_obj) or []
|
|
1989
|
+
except Exception:
|
|
1990
|
+
pass
|
|
1991
|
+
|
|
1992
|
+
if tables:
|
|
1993
|
+
console.print(f"[green]Loaded {len(tables)} tables from {all_tables_file} for {conn_obj}.[/green]")
|
|
1994
|
+
else:
|
|
1995
|
+
# Fallback to Live Introspection
|
|
1996
|
+
console.print(f"[yellow]Warning: {conn_obj} not found in all_tables.yaml (or file missing). Falling back to live DB introspection.[/yellow]")
|
|
1997
|
+
insp = inspect(engine)
|
|
1998
|
+
tables = insp.get_table_names()
|
|
1897
1999
|
|
|
1898
2000
|
# Determine path (Config Driven)
|
|
1899
2001
|
discovery_cfg = params.get("paths", {}).get("discovery") or params.get(
|
|
@@ -2079,25 +2181,19 @@ def whitelist(
|
|
|
2079
2181
|
# Construct new table map
|
|
2080
2182
|
new_table_map = {}
|
|
2081
2183
|
|
|
2082
|
-
# 1. Retained: Merge existing with new rule metadata
|
|
2083
|
-
# Priority: Existing (Manual) > New (Rule)
|
|
2084
|
-
# BUT: Enforce calculated paths to avoid stale absolute paths
|
|
2085
2184
|
for t in retained:
|
|
2086
|
-
existing_meta = existing_tables_map
|
|
2185
|
+
existing_meta = existing_tables_map.get(t, {})
|
|
2087
2186
|
rule_meta = filtered_tables[t]
|
|
2088
|
-
# Merge: update rule defaults only if not set in existing
|
|
2089
|
-
merged = rule_meta.copy()
|
|
2090
|
-
merged.update(existing_meta) # Existing overwrites rule
|
|
2091
2187
|
|
|
2092
|
-
#
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
if
|
|
2099
|
-
merged["
|
|
2100
|
-
|
|
2188
|
+
# Strict Rewrite Logic (User Request)
|
|
2189
|
+
# We ONLY preserve 'custom_name' from existing entries.
|
|
2190
|
+
# All other metadata (domain, template, paths) must come strictly from rules/calculation.
|
|
2191
|
+
custom_name = existing_meta.get("custom_name")
|
|
2192
|
+
|
|
2193
|
+
merged = rule_meta.copy()
|
|
2194
|
+
if custom_name:
|
|
2195
|
+
merged["custom_name"] = custom_name
|
|
2196
|
+
|
|
2101
2197
|
new_table_map[t] = merged
|
|
2102
2198
|
|
|
2103
2199
|
# 2. Add new
|
|
@@ -89,10 +89,16 @@ class ConfigurationEngine:
|
|
|
89
89
|
# Context not found and no default key
|
|
90
90
|
rules = []
|
|
91
91
|
|
|
92
|
+
# Sort Rules by Specificity (Length Descending)
|
|
93
|
+
# This prevents short prefixes (e.g. 'data_') from shadowing longer ones or exact matches if they overlap
|
|
94
|
+
# though usually exact matches don't overlap with delimiter-based prefixes.
|
|
95
|
+
# But for 'user_' vs 'use_', 'user_' (len 5) should come before 'use_' (len 4).
|
|
96
|
+
sorted_rules = sorted(rules, key=lambda x: len(x.get("pattern", "")), reverse=True)
|
|
97
|
+
|
|
92
98
|
return GeneratorConfig(
|
|
93
99
|
version=1.2,
|
|
94
100
|
settings=GlobalSettings(**settings_data),
|
|
95
|
-
discovery_rules=[DiscoveryRule(**r) for r in
|
|
101
|
+
discovery_rules=[DiscoveryRule(**r) for r in sorted_rules],
|
|
96
102
|
)
|
|
97
103
|
|
|
98
104
|
def _resolve_cubes_root(self) -> str:
|
|
@@ -213,6 +219,8 @@ class ConfigurationEngine:
|
|
|
213
219
|
"field_map": field_map_module, # 'field_map' template logic relies on discovery result often
|
|
214
220
|
"connection_obj": conn_obj,
|
|
215
221
|
"class_name": class_name,
|
|
222
|
+
# Pass template through for downstream consumers (e.g. whitelist generator)
|
|
223
|
+
"output_template": rule.output_template,
|
|
216
224
|
}
|
|
217
225
|
|
|
218
226
|
|
|
@@ -778,6 +778,32 @@ def generate_datacube_module_code(
|
|
|
778
778
|
else:
|
|
779
779
|
imports.add(base_imp)
|
|
780
780
|
|
|
781
|
+
# Config Object Import
|
|
782
|
+
# We need to resolve import for conf_obj (e.g. clickhouse_conf)
|
|
783
|
+
# Using registry.params which contains databases list with import_spec
|
|
784
|
+
params = registry.params
|
|
785
|
+
dbs = params.get("databases", [])
|
|
786
|
+
for db in dbs:
|
|
787
|
+
ref = db.get("connection_ref") or db.get("connection_obj")
|
|
788
|
+
if ref == conf_obj:
|
|
789
|
+
spec = db.get("import_spec")
|
|
790
|
+
if spec and isinstance(spec, dict):
|
|
791
|
+
mod = spec.get("module")
|
|
792
|
+
sym = spec.get("symbol")
|
|
793
|
+
if mod and sym:
|
|
794
|
+
imports.add(f"from {mod} import {sym}")
|
|
795
|
+
elif db.get("global_import"):
|
|
796
|
+
# Legacy or simple string import
|
|
797
|
+
imp_str = db.get("global_import")
|
|
798
|
+
if imp_str and sym and sym in imp_str:
|
|
799
|
+
imports.add(imp_str)
|
|
800
|
+
elif imp_str: # Fallback to add specific object if simple import
|
|
801
|
+
# Try to parse or just add "from ... import conf_obj" logic?
|
|
802
|
+
# If global_import is "from conf.credentials import *" -> hard to know
|
|
803
|
+
# If it is "from conf.credentials import replica_conf" -> easy
|
|
804
|
+
imports.add(imp_str)
|
|
805
|
+
break
|
|
806
|
+
|
|
781
807
|
details = registry.get_table_details(table_name)
|
|
782
808
|
field_map_str = details.get("field_map")
|
|
783
809
|
sticky_filters = details.get("sticky_filters")
|
|
@@ -56,6 +56,8 @@ def propose_cubes(db_domain: str, app_name: str) -> None:
|
|
|
56
56
|
|
|
57
57
|
# Registry Structure: {conf_obj: {table_name: {class_name: ..., path: ...}}}
|
|
58
58
|
for conf_obj, tables in global_data.items():
|
|
59
|
+
if not isinstance(tables, dict):
|
|
60
|
+
continue
|
|
59
61
|
for table_name, meta in tables.items():
|
|
60
62
|
path_str = meta.get("path", "")
|
|
61
63
|
class_name = meta.get("class_name")
|
|
@@ -100,14 +102,16 @@ def propose_cubes(db_domain: str, app_name: str) -> None:
|
|
|
100
102
|
|
|
101
103
|
# 4. Update App Registry
|
|
102
104
|
|
|
103
|
-
# Load existing
|
|
104
105
|
current_config = {"cubes": []}
|
|
105
106
|
if registry_path.exists():
|
|
106
107
|
try:
|
|
107
108
|
with open(registry_path, "r") as f:
|
|
108
109
|
loaded = yaml.safe_load(f)
|
|
109
|
-
if loaded
|
|
110
|
-
|
|
110
|
+
if loaded:
|
|
111
|
+
current_config = loaded
|
|
112
|
+
if "cubes" not in current_config or current_config["cubes"] is None:
|
|
113
|
+
current_config["cubes"] = []
|
|
114
|
+
|
|
111
115
|
except Exception:
|
|
112
116
|
pass
|
|
113
117
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/__init__.py
RENAMED
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/executor.py
RENAMED
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/manifest.py
RENAMED
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/planner.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/core/managed_resource/_managed_resource.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/http/__init__.py
RENAMED
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/http/_http_config.py
RENAMED
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/parquet/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_io_dask.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/discovery_params.yaml
RENAMED
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/property_template.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/orchestration/_artifact_orchestrator.py
RENAMED
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/orchestration/_pipeline_executor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/clickhouse_writer/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/date_utils/_file_age_checker.py
RENAMED
|
File without changes
|
|
File without changes
|
{sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/filepath_generator/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|