sibi-flux 2026.1.8__tar.gz → 2026.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/PKG-INFO +1 -1
  2. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/pyproject.toml +2 -1
  3. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/cli.py +156 -60
  4. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/config_engine.py +9 -1
  5. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/generator.py +26 -0
  6. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/cube_proposer.py +7 -3
  7. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/README.md +0 -0
  8. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_dst/__init__.py +0 -0
  9. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/__init__.py +0 -0
  10. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/__init__.py +0 -0
  11. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/base.py +0 -0
  12. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet.py +0 -0
  13. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/__init__.py +0 -0
  14. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/executor.py +0 -0
  15. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/manifest.py +0 -0
  16. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/artifacts/parquet_engine/planner.py +0 -0
  17. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/cli.py +0 -0
  18. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/config/__init__.py +0 -0
  19. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/config/manager.py +0 -0
  20. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/config/settings.py +0 -0
  21. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/core/__init__.py +0 -0
  22. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/core/managed_resource/__init__.py +0 -0
  23. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/core/managed_resource/_managed_resource.py +0 -0
  24. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/core/type_maps/__init__.py +0 -0
  25. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/__init__.py +0 -0
  26. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/async_core.py +0 -0
  27. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/client_manager.py +0 -0
  28. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/core.py +0 -0
  29. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/exceptions.py +0 -0
  30. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dask_cluster/utils.py +0 -0
  31. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/__init__.py +0 -0
  32. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/_data_cube.py +0 -0
  33. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/field_factory.py +0 -0
  34. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/field_mapper.py +0 -0
  35. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/field_registry.py +0 -0
  36. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/orchestrator.py +0 -0
  37. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/datacube/router.py +0 -0
  38. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dataset/__init__.py +0 -0
  39. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dataset/_dataset.py +0 -0
  40. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/dataset/hybrid_loader.py +0 -0
  41. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/__init__.py +0 -0
  42. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/async_enricher.py +0 -0
  43. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/attacher.py +0 -0
  44. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/merger.py +0 -0
  45. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/specs.py +0 -0
  46. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_enricher/types.py +0 -0
  47. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/__init__.py +0 -0
  48. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/_df_helper.py +0 -0
  49. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/__init__.py +0 -0
  50. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/_params.py +0 -0
  51. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/_strategies.py +0 -0
  52. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/http/__init__.py +0 -0
  53. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/http/_http_config.py +0 -0
  54. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/parquet/__init__.py +0 -0
  55. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/parquet/_parquet_options.py +0 -0
  56. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/__init__.py +0 -0
  57. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_connection.py +0 -0
  58. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_db_gatekeeper.py +0 -0
  59. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_io_dask.py +0 -0
  60. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_load_from_db.py +0 -0
  61. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_model_registry.py +0 -0
  62. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/sqlalchemy/_sql_model_builder.py +0 -0
  63. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/backends/utils.py +0 -0
  64. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/__init__.py +0 -0
  65. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/_defaults.py +0 -0
  66. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/_filter_handler.py +0 -0
  67. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/_params_config.py +0 -0
  68. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_helper/core/_query_config.py +0 -0
  69. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_validator/__init__.py +0 -0
  70. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/df_validator/_df_validator.py +0 -0
  71. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/__init__.py +0 -0
  72. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/app.py +0 -0
  73. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/core.py +0 -0
  74. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/cube_extender.py +0 -0
  75. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/discovery_updater.py +0 -0
  76. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/env.py +0 -0
  77. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/env_engine.py +0 -0
  78. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/env_generator.py +0 -0
  79. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/rule_generator.py +0 -0
  80. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/__init__.py +0 -0
  81. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/discovery_params.yaml +0 -0
  82. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/gen_dc.py +0 -0
  83. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/init/templates/property_template.yaml +0 -0
  84. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/logger/__init__.py +0 -0
  85. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/logger/_logger.py +0 -0
  86. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/mcp/__init__.py +0 -0
  87. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/mcp/client.py +0 -0
  88. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/mcp/router.py +0 -0
  89. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/orchestration/__init__.py +0 -0
  90. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/orchestration/_artifact_orchestrator.py +0 -0
  91. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/orchestration/_pipeline_executor.py +0 -0
  92. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/osmnx_helper/__init__.py +0 -0
  93. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/osmnx_helper/_pbf_handler.py +0 -0
  94. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/osmnx_helper/graph_loader.py +0 -0
  95. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/osmnx_helper/utils.py +0 -0
  96. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/__init__.py +0 -0
  97. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/readers/__init__.py +0 -0
  98. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/readers/base.py +0 -0
  99. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/readers/parquet.py +0 -0
  100. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/saver/__init__.py +0 -0
  101. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/saver/_parquet_saver.py +0 -0
  102. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/parquet/saver/_write_gatekeeper.py +0 -0
  103. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/pipelines/__init__.py +0 -0
  104. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/pipelines/base.py +0 -0
  105. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/pipelines/template.py +0 -0
  106. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/py.typed +0 -0
  107. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/readers/__init__.py +0 -0
  108. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/readers/base.py +0 -0
  109. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/storage/__init__.py +0 -0
  110. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/storage/_fs_registry.py +0 -0
  111. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/storage/_storage_manager.py +0 -0
  112. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/storage/factory.py +0 -0
  113. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/__init__.py +0 -0
  114. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/clickhouse_writer/__init__.py +0 -0
  115. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/clickhouse_writer/_clickhouse_writer.py +0 -0
  116. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/common.py +0 -0
  117. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/dask_utils.py +0 -0
  118. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/data_utils/__init__.py +0 -0
  119. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/data_utils/_data_utils.py +0 -0
  120. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/dataframe_utils.py +0 -0
  121. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/date_utils/__init__.py +0 -0
  122. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/date_utils/_business_days.py +0 -0
  123. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/date_utils/_date_utils.py +0 -0
  124. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/date_utils/_file_age_checker.py +0 -0
  125. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/file_utils.py +0 -0
  126. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/filepath_generator/__init__.py +0 -0
  127. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/filepath_generator/_filepath_generator.py +0 -0
  128. {sibi_flux-2026.1.8 → sibi_flux-2026.1.10}/src/sibi_flux/utils/retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sibi-flux
3
- Version: 2026.1.8
3
+ Version: 2026.1.10
4
4
  Summary: Sibi Toolkit: A collection of tools for Data Analysis/Engineering.
5
5
  Author: Luis Valverde
6
6
  Author-email: Luis Valverde <lvalverdeb@gmail.com>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sibi-flux"
3
- version = "2026.1.8"
3
+ version = "2026.1.10"
4
4
  description = "Sibi Toolkit: A collection of tools for Data Analysis/Engineering."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -97,6 +97,7 @@ members = [
97
97
  "test-prj",
98
98
  "test_prj",
99
99
  "test_prj_verify",
100
+ "clean-test-prj",
100
101
  ]
101
102
 
102
103
  [tool.pytest.ini_options]
@@ -203,7 +203,7 @@ def set_context_defaults(
203
203
 
204
204
 
205
205
  def _get_db_url_callback(
206
- registry: DatacubeRegistry, db_url_map: Optional[str]
206
+ registry: DatacubeRegistry, db_url_map: Optional[str], params: Optional[Dict[str, Any]] = None
207
207
  ) -> Callable[[str], str]:
208
208
  """Helper to create a callback that resolves DB URLs from CLI overrides or registry."""
209
209
  cli_urls = json.loads(db_url_map) if db_url_map else {}
@@ -212,8 +212,24 @@ def _get_db_url_callback(
212
212
  # 1. CLI Override
213
213
  if conf_name in cli_urls:
214
214
  return cli_urls[conf_name]
215
+
215
216
  # 2. Dynamic Resolution
216
- url = resolve_db_url(conf_name, registry.global_imports)
217
+ imports = registry.global_imports
218
+
219
+ # Check specific import_spec from params
220
+ if params and "databases" in params:
221
+ for db in params.get("databases", []):
222
+ ref = db.get("connection_ref") or db.get("connection_obj")
223
+ if ref == conf_name:
224
+ spec = db.get("import_spec")
225
+ if spec and isinstance(spec, dict):
226
+ # Prepend specific module
227
+ imports = [spec.get("module")] + imports
228
+ elif db.get("global_import"):
229
+ imports = [db.get("global_import")] + imports
230
+ break
231
+
232
+ url = resolve_db_url(conf_name, imports)
217
233
  if url:
218
234
  return url
219
235
  raise ValueError(
@@ -283,9 +299,16 @@ def sync(
283
299
 
284
300
  flat_tables = {}
285
301
  for grp, tbls in existing_reg_data.items():
286
- if isinstance(tbls, dict):
302
+ if grp == "tables" and isinstance(tbls, dict):
303
+ # Modern format: tables are in 'tables' dict
287
304
  for t, t_meta in tbls.items():
288
- # Inject the config object (group key) so DatacubeRegistry knows the connection
305
+ flat_tables[t] = t_meta
306
+ continue
307
+
308
+ if isinstance(tbls, dict) and grp != "global_imports":
309
+ # Legacy Scoped Format (grp is connection_obj)
310
+ for t, t_meta in tbls.items():
311
+ # Inject/Overwrite connection_obj only if implied by scope
289
312
  t_meta["connection_obj"] = grp
290
313
  flat_tables[t] = t_meta
291
314
 
@@ -295,6 +318,11 @@ def sync(
295
318
  console.print(f"[yellow]Warning: Could not load existing registry: {e}[/yellow]")
296
319
 
297
320
  registry = DatacubeRegistry(config_data, params=context.params)
321
+
322
+ # Fix 2 Re-applied: Ensure global_imports are populated from params if registry loaded from file didn't have them
323
+ # This is critical for Clean Project + Force flows where registry might be partial or JIT
324
+ if not registry.global_imports and context.params.get("global_imports"):
325
+ registry.global_imports = context.params.get("global_imports")
298
326
 
299
327
  # --- Aggregation Phase ---
300
328
  # --- Aggregation Phase ---
@@ -453,7 +481,7 @@ def sync(
453
481
  registry.valid_paths = list(valid_paths)
454
482
  registry.valid_fieldmap_paths = context.valid_fieldmap_paths
455
483
 
456
- get_url = _get_db_url_callback(registry, db_url_map)
484
+ get_url = _get_db_url_callback(registry, db_url_map, params=context.params)
457
485
 
458
486
  # Group tables by target file
459
487
  file_groups = registry.group_tables_by_file()
@@ -770,8 +798,22 @@ def discover(
770
798
  if not wl_filename:
771
799
  # Default convention: discovery_whitelist_<db_name>.yaml
772
800
  wl_filename = f"discovery_whitelist_{conn_obj}.yaml"
801
+
773
802
  whitelist_path = config_path.parent / wl_filename
774
803
 
804
+ # Fallback 1: Auto-generated per-db (from scan/map fallback logic)
805
+ if not whitelist_path.exists():
806
+ fb = config_path.parent / f"discovery_whitelist_{conn_obj}.yaml"
807
+ if fb.exists():
808
+ whitelist_path = fb
809
+
810
+ # Fallback 2: Global whitelist.yaml (Generated by 'whitelist' command)
811
+ if not whitelist_path.exists():
812
+ global_wl = config_path.parent / "whitelist.yaml"
813
+ if global_wl.exists():
814
+ console.print(f"[dim]Fallback: Using global {global_wl.name} for {db_name}[/dim]")
815
+ whitelist_path = global_wl
816
+
775
817
  # Determine rules path
776
818
  rules_filename = db_config.get("rules_file")
777
819
  if not rules_filename:
@@ -1015,20 +1057,31 @@ def scan(
1015
1057
 
1016
1058
  # Resolve global output file
1017
1059
  # First check nested discovery block (standard), then root (fallback)
1018
- discovery_conf = params.get("discovery", {})
1060
+ # Resolve global output file
1061
+ # First check nested discovery block (standard), then root (fallback)
1062
+ discovery_conf = params.get("paths", {}).get("discovery") or params.get("discovery", {})
1019
1063
  global_tables_file = (
1020
1064
  discovery_conf.get("all_tables_file")
1021
1065
  or params.get("all_tables_file")
1022
- or "all_tables.yaml"
1066
+ or "dataobjects/globals/all_tables.yaml"
1023
1067
  )
1024
1068
 
1025
1069
  # If using absolute path (resolved from context), use it directly
1026
- # Otherwise treat as relative to config file location
1070
+ # Otherwise treat as relative to PROJECT ROOT (not config file location)
1027
1071
  path_obj = Path(global_tables_file)
1028
1072
  if path_obj.is_absolute():
1029
1073
  global_tables_path = path_obj
1030
1074
  else:
1031
- global_tables_path = config_path.parent / global_tables_file
1075
+ try:
1076
+ # Heuristic: config is in generators/datacubes
1077
+ project_root = config_path.parent.parent.parent
1078
+ except Exception:
1079
+ project_root = Path.cwd()
1080
+ global_tables_path = project_root / global_tables_file
1081
+
1082
+ # Ensure dir exists
1083
+ if not global_tables_path.parent.exists():
1084
+ global_tables_path.parent.mkdir(parents=True, exist_ok=True)
1032
1085
 
1033
1086
  # Load existing data to preserve config for DBs not being scanned
1034
1087
  all_tables_data = {}
@@ -1102,7 +1155,7 @@ def drift(
1102
1155
  config_data = _load_and_resolve_config(config_path)
1103
1156
 
1104
1157
  registry = DatacubeRegistry(config_data)
1105
- get_url = _get_db_url_callback(registry, db_url_map)
1158
+ get_url = _get_db_url_callback(registry, db_url_map, params=config_data)
1106
1159
  cli_urls = json.loads(db_url_map) if db_url_map else {}
1107
1160
 
1108
1161
  drift_table = Table(title="Schema Drift Analysis")
@@ -1203,6 +1256,8 @@ def propose_rules(
1203
1256
  dry_run: bool = typer.Option(
1204
1257
  False, "--dry-run", help="Preview rules without saving"
1205
1258
  ),
1259
+ conf_override: Optional[str] = typer.Option(None, "--rules", help="Override rules file"),
1260
+ force: bool = typer.Option(False, "--force", "-f", help="Force overwrite existing rules"),
1206
1261
  ):
1207
1262
  """
1208
1263
  Analyzes all_tables.yaml and proposes new discovery rules.
@@ -1219,23 +1274,35 @@ def propose_rules(
1219
1274
  # The resolved config will have absolute paths for these if the helper worked.
1220
1275
  params = context.params
1221
1276
 
1222
- # Prefer resolved values if available
1223
- if (
1224
- "discovery" in resolved_config
1225
- and "all_tables_file" in resolved_config["discovery"]
1226
- ):
1227
- all_tables_path = Path(resolved_config["discovery"]["all_tables_file"])
1277
+ # Prefer resolved values if available, but check nested 'paths' structure first
1278
+ # Structure: paths -> discovery -> all_tables_file
1279
+ discovery_conf = params.get("paths", {}).get("discovery") or params.get("discovery", {})
1280
+ all_tables_file = (
1281
+ discovery_conf.get("all_tables_file")
1282
+ or params.get("all_tables_file")
1283
+ or "dataobjects/globals/all_tables.yaml"
1284
+ )
1285
+
1286
+ path_obj = Path(all_tables_file)
1287
+ if path_obj.is_absolute():
1288
+ all_tables_path = path_obj
1228
1289
  else:
1229
- # Fallback to manual resolution (legacy or if not in discovery block)
1230
- raw_val = (
1231
- params.get("discovery", {}).get("all_tables_file")
1232
- or params.get("all_tables_file")
1233
- or "all_tables.yaml"
1234
- )
1235
- all_tables_path = config_path.parent / raw_val
1290
+ try:
1291
+ # Heuristic: config is in generators/datacubes
1292
+ project_root = config_path.parent.parent.parent
1293
+ except Exception:
1294
+ project_root = Path.cwd()
1295
+ all_tables_path = project_root / all_tables_file
1296
+
1297
+ # Verify Existence
1298
+ if not all_tables_path.exists():
1299
+ console.print(f"[red]Error: {all_tables_path} not found. Run 'dc scan' first.[/red]")
1300
+ raise typer.Exit(code=1)
1236
1301
 
1237
1302
  # Rules File
1238
- if "discovery" in resolved_config and "rules_file" in resolved_config["discovery"]:
1303
+ if conf_override:
1304
+ rules_path = Path(conf_override)
1305
+ elif "discovery" in resolved_config and "rules_file" in resolved_config["discovery"]:
1239
1306
  rules_path = Path(resolved_config["discovery"]["rules_file"])
1240
1307
  else:
1241
1308
  raw_rules = (
@@ -1245,11 +1312,12 @@ def propose_rules(
1245
1312
  )
1246
1313
  rules_path = config_path.parent / raw_rules
1247
1314
 
1248
- if not all_tables_path.exists():
1249
- console.print(
1250
- f"[red]Error: {all_tables_path} not found. Run 'dc-scan' first.[/red]"
1251
- )
1252
- raise typer.Exit(code=1)
1315
+ # Guard: Require Force if Rules Exist
1316
+ if rules_path.exists() and not force and not dry_run:
1317
+ console.print(f"[yellow]Rules file exists at {rules_path}.[/yellow]")
1318
+ console.print("[yellow]Skipping proposal generation to protect manual edits.[/yellow]")
1319
+ console.print("[dim]Use --force to overwrite/update rules.[/dim]")
1320
+ return
1253
1321
 
1254
1322
  engine = RuleEngine(all_tables_path, rules_path)
1255
1323
  engine.load()
@@ -1554,7 +1622,7 @@ def workflow(
1554
1622
  # Step 0: Scan & Propose Rules (Critical for new projects/drift)
1555
1623
  console.print(f"\n[bold cyan]Step 0a: Scan (Introspection)[/]")
1556
1624
  scan(
1557
- config_file=config_file,
1625
+ config_file=config_file if config_file else None,
1558
1626
  db_url_map=db_url_map,
1559
1627
  env_file=env_file,
1560
1628
  db_name=db_name,
@@ -1562,8 +1630,19 @@ def workflow(
1562
1630
 
1563
1631
  console.print(f"\n[bold cyan]Step 0b: Propose Rules[/]")
1564
1632
  propose_rules(
1633
+ config_file=config_file if config_file else None,
1634
+ dry_run=dry_run,
1635
+ conf_override=None,
1636
+ force=force,
1637
+ )
1638
+
1639
+ console.print(f"\n[bold cyan]Step 0c: Generate Whitelist[/]")
1640
+ whitelist(
1565
1641
  config_file=config_file,
1566
- dry_run=dry_run, # Preview rules if dry-running
1642
+ db_name=db_name,
1643
+ db_url_map=db_url_map,
1644
+ env_file=env_file,
1645
+ force=force,
1567
1646
  )
1568
1647
 
1569
1648
  # Step 1: Discover
@@ -1582,17 +1661,10 @@ def workflow(
1582
1661
  force=False, # Discovery force is about fields usually?
1583
1662
  )
1584
1663
 
1585
- # Step 2: Map
1586
- console.print(f"\n[bold cyan]Step 2: Map Generation[/]")
1587
- map(
1588
- config_file=config_file,
1589
- db_url_map=db_url_map,
1590
- env_file=env_file,
1591
- db_name=db_name,
1592
- force=force,
1593
- )
1594
-
1595
- # Step 3: Sync
1664
+ # Step 2: Map (Skipped - Handled by Sync)
1665
+ # console.print(f"\n[bold cyan]Step 2: Map Generation (Integrated into Sync)[/]")
1666
+
1667
+ # Step 3: Sync -> Generates Code + Field Maps
1596
1668
  console.print(f"\n[bold cyan]Step 3: Sync (Code Generation)[/]")
1597
1669
  sync(
1598
1670
  config_file=config_file,
@@ -1892,8 +1964,38 @@ def whitelist(
1892
1964
  )
1893
1965
  continue
1894
1966
 
1895
- insp = inspect(engine)
1896
- tables = insp.get_table_names()
1967
+ # 0. Try to load tables from all_tables.yaml (Scan Output)
1968
+ tables = []
1969
+ all_tables_file = (
1970
+ params.get("paths", {}).get("discovery", {}).get("all_tables_file")
1971
+ or params.get("all_tables_file")
1972
+ or "dataobjects/globals/all_tables.yaml"
1973
+ )
1974
+ # Resolve path
1975
+ if not Path(all_tables_file).is_absolute():
1976
+ try:
1977
+ prj_root = config_path.parent.parent.parent
1978
+ except Exception:
1979
+ prj_root = Path.cwd()
1980
+ all_tables_path = prj_root / all_tables_file
1981
+ else:
1982
+ all_tables_path = Path(all_tables_file)
1983
+
1984
+ if all_tables_path.exists():
1985
+ try:
1986
+ with open(all_tables_path, "r") as f:
1987
+ all_tables_data = yaml.safe_load(f) or {}
1988
+ tables = all_tables_data.get(conn_obj) or []
1989
+ except Exception:
1990
+ pass
1991
+
1992
+ if tables:
1993
+ console.print(f"[green]Loaded {len(tables)} tables from {all_tables_file} for {conn_obj}.[/green]")
1994
+ else:
1995
+ # Fallback to Live Introspection
1996
+ console.print(f"[yellow]Warning: {conn_obj} not found in all_tables.yaml (or file missing). Falling back to live DB introspection.[/yellow]")
1997
+ insp = inspect(engine)
1998
+ tables = insp.get_table_names()
1897
1999
 
1898
2000
  # Determine path (Config Driven)
1899
2001
  discovery_cfg = params.get("paths", {}).get("discovery") or params.get(
@@ -2079,25 +2181,19 @@ def whitelist(
2079
2181
  # Construct new table map
2080
2182
  new_table_map = {}
2081
2183
 
2082
- # 1. Retained: Merge existing with new rule metadata
2083
- # Priority: Existing (Manual) > New (Rule)
2084
- # BUT: Enforce calculated paths to avoid stale absolute paths
2085
2184
  for t in retained:
2086
- existing_meta = existing_tables_map[t]
2185
+ existing_meta = existing_tables_map.get(t, {})
2087
2186
  rule_meta = filtered_tables[t]
2088
- # Merge: update rule defaults only if not set in existing
2089
- merged = rule_meta.copy()
2090
- merged.update(existing_meta) # Existing overwrites rule
2091
2187
 
2092
- # Legacy Cleanup: We moved custom_name to Registry
2093
- merged.pop("custom_name", None)
2094
-
2095
- # Restore calculated paths (Enforce Relative)
2096
- if "datacube_path" in rule_meta:
2097
- merged["datacube_path"] = rule_meta["datacube_path"]
2098
- if "field_map_path" in rule_meta:
2099
- merged["field_map_path"] = rule_meta["field_map_path"]
2100
-
2188
+ # Strict Rewrite Logic (User Request)
2189
+ # We ONLY preserve 'custom_name' from existing entries.
2190
+ # All other metadata (domain, template, paths) must come strictly from rules/calculation.
2191
+ custom_name = existing_meta.get("custom_name")
2192
+
2193
+ merged = rule_meta.copy()
2194
+ if custom_name:
2195
+ merged["custom_name"] = custom_name
2196
+
2101
2197
  new_table_map[t] = merged
2102
2198
 
2103
2199
  # 2. Add new
@@ -89,10 +89,16 @@ class ConfigurationEngine:
89
89
  # Context not found and no default key
90
90
  rules = []
91
91
 
92
+ # Sort Rules by Specificity (Length Descending)
93
+ # This prevents short prefixes (e.g. 'data_') from shadowing longer ones or exact matches if they overlap
94
+ # though usually exact matches don't overlap with delimiter-based prefixes.
95
+ # But for 'user_' vs 'use_', 'user_' (len 5) should come before 'use_' (len 4).
96
+ sorted_rules = sorted(rules, key=lambda x: len(x.get("pattern", "")), reverse=True)
97
+
92
98
  return GeneratorConfig(
93
99
  version=1.2,
94
100
  settings=GlobalSettings(**settings_data),
95
- discovery_rules=[DiscoveryRule(**r) for r in rules],
101
+ discovery_rules=[DiscoveryRule(**r) for r in sorted_rules],
96
102
  )
97
103
 
98
104
  def _resolve_cubes_root(self) -> str:
@@ -213,6 +219,8 @@ class ConfigurationEngine:
213
219
  "field_map": field_map_module, # 'field_map' template logic relies on discovery result often
214
220
  "connection_obj": conn_obj,
215
221
  "class_name": class_name,
222
+ # Pass template through for downstream consumers (e.g. whitelist generator)
223
+ "output_template": rule.output_template,
216
224
  }
217
225
 
218
226
 
@@ -778,6 +778,32 @@ def generate_datacube_module_code(
778
778
  else:
779
779
  imports.add(base_imp)
780
780
 
781
+ # Config Object Import
782
+ # We need to resolve import for conf_obj (e.g. clickhouse_conf)
783
+ # Using registry.params which contains databases list with import_spec
784
+ params = registry.params
785
+ dbs = params.get("databases", [])
786
+ for db in dbs:
787
+ ref = db.get("connection_ref") or db.get("connection_obj")
788
+ if ref == conf_obj:
789
+ spec = db.get("import_spec")
790
+ if spec and isinstance(spec, dict):
791
+ mod = spec.get("module")
792
+ sym = spec.get("symbol")
793
+ if mod and sym:
794
+ imports.add(f"from {mod} import {sym}")
795
+ elif db.get("global_import"):
796
+ # Legacy or simple string import
797
+ imp_str = db.get("global_import")
798
+ if imp_str and sym and sym in imp_str:
799
+ imports.add(imp_str)
800
+ elif imp_str: # Fallback to add specific object if simple import
801
+ # Try to parse or just add "from ... import conf_obj" logic?
802
+ # If global_import is "from conf.credentials import *" -> hard to know
803
+ # If it is "from conf.credentials import replica_conf" -> easy
804
+ imports.add(imp_str)
805
+ break
806
+
781
807
  details = registry.get_table_details(table_name)
782
808
  field_map_str = details.get("field_map")
783
809
  sticky_filters = details.get("sticky_filters")
@@ -56,6 +56,8 @@ def propose_cubes(db_domain: str, app_name: str) -> None:
56
56
 
57
57
  # Registry Structure: {conf_obj: {table_name: {class_name: ..., path: ...}}}
58
58
  for conf_obj, tables in global_data.items():
59
+ if not isinstance(tables, dict):
60
+ continue
59
61
  for table_name, meta in tables.items():
60
62
  path_str = meta.get("path", "")
61
63
  class_name = meta.get("class_name")
@@ -100,14 +102,16 @@ def propose_cubes(db_domain: str, app_name: str) -> None:
100
102
 
101
103
  # 4. Update App Registry
102
104
 
103
- # Load existing
104
105
  current_config = {"cubes": []}
105
106
  if registry_path.exists():
106
107
  try:
107
108
  with open(registry_path, "r") as f:
108
109
  loaded = yaml.safe_load(f)
109
- if loaded and "cubes" in loaded:
110
- current_config = loaded
110
+ if loaded:
111
+ current_config = loaded
112
+ if "cubes" not in current_config or current_config["cubes"] is None:
113
+ current_config["cubes"] = []
114
+
111
115
  except Exception:
112
116
  pass
113
117
 
File without changes