PyPI - sibi-flux - Versions diffs - 2026.1.2__py3-none-any.whl → 2026.1.4__py3-none-any.whl - Mend

sibi-flux 2026.1.2py3-none-any.whl → 2026.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

sibi_flux/__init__.py +0 -1
sibi_flux/cli.py +29 -6
sibi_flux/config/settings.py +20 -1
sibi_flux/datacube/_data_cube.py +1 -0
sibi_flux/datacube/cli.py +1637 -415
sibi_flux/datacube/config_engine.py +36 -18
sibi_flux/datacube/field_factory.py +72 -28
sibi_flux/datacube/field_mapper.py +93 -69
sibi_flux/datacube/field_registry.py +1 -1
sibi_flux/datacube/generator.py +255 -181
sibi_flux/datacube/orchestrator.py +309 -37
sibi_flux/datacube/router.py +5 -0
sibi_flux/df_helper/backends/__init__.py +0 -1
sibi_flux/df_validator/_df_validator.py +1 -0
sibi_flux/init/core.py +93 -41
sibi_flux/init/discovery_updater.py +80 -26
sibi_flux/init/env.py +63 -36
sibi_flux/init/env_engine.py +83 -42
sibi_flux/init/env_generator.py +336 -183
sibi_flux/init/rule_generator.py +171 -0
sibi_flux/init/templates/discovery_params.yaml +9 -10
sibi_flux/init/templates/gen_dc.py +74 -23
sibi_flux/orchestration/__init__.py +0 -1
sibi_flux/storage/_storage_manager.py +0 -1
sibi_flux/utils/date_utils/__init__.py +0 -1
sibi_flux/utils/date_utils/_business_days.py +0 -1
{sibi_flux-2026.1.2.dist-info → sibi_flux-2026.1.4.dist-info}/METADATA +2 -4
{sibi_flux-2026.1.2.dist-info → sibi_flux-2026.1.4.dist-info}/RECORD +30 -29
{sibi_flux-2026.1.2.dist-info → sibi_flux-2026.1.4.dist-info}/WHEEL +0 -0
{sibi_flux-2026.1.2.dist-info → sibi_flux-2026.1.4.dist-info}/entry_points.txt +0 -0

sibi_flux/datacube/cli.py CHANGED Viewed

@@ -7,6 +7,7 @@ import typer
 import subprocess
 import importlib.util
 import importlib
+import shutil
 import sqlalchemy as sa
 from pathlib import Path
 from typing import Optional, Callable, Set, Dict, Any, Iterable, Mapping
@@ -29,15 +30,106 @@ from sibi_flux.datacube.generator import (
 )
 from sibi_flux.datacube.orchestrator import DiscoveryOrchestrator
 from sibi_flux.datacube.field_factory import FieldMapFactory
+from sibi_flux.init.rule_generator import RuleEngine
 import sibi_flux.datacube.generator
 app = typer.Typer(help="Sibi-Flux Data Cube Generator")
 console = Console()
 # --- Context Management ---
+def _load_and_resolve_config(config_path: Path) -> dict:
+    if not config_path.exists():
+        return {}
+    with open(config_path, "r") as f:
+        config_data = yaml.safe_load(f) or {}
+    # Heuristic: Config is in generators/datacubes/discovery_params.yaml
+    # Project Root is 3 levels up from FILE
+    try:
+        project_root = config_path.parent.parent.parent
+    except Exception:
+        project_root = Path.cwd()
+    if "paths" in config_data:
+        if "target" in config_data["paths"]:
+            target = config_data["paths"]["target"]
+            for key in ["datacubes_dir", "field_maps_dir"]:
+                if key in target:
+                    rel_path = target[key]
+                    if rel_path and not Path(rel_path).is_absolute():
+                        abs_path = (project_root / rel_path).resolve()
+                        target[key] = str(abs_path)
+        # Resolve Registry File (Dual Support)
+        repos = config_data.get("paths", {}).get("repositories", {})
+        # New location
+        if "global_datacube_registry_file" in repos:
+            reg_file = repos["global_datacube_registry_file"]
+            if not Path(reg_file).is_absolute():
+                repos["global_datacube_registry_file"] = str(
+                    (project_root / reg_file).resolve()
+                )
+        # Old location (fallback)
+        elif "global_datacube_registry_file" in config_data.get("paths", {}):
+            reg_file = config_data.get("paths", {})["global_datacube_registry_file"]
+            if not Path(reg_file).is_absolute():
+                config_data["paths"]["global_datacube_registry_file"] = str(
+                    (project_root / reg_file).resolve()
+                )
+        # Resolve Repositories
+        if "repositories" in config_data["paths"]:
+            repos = config_data["paths"]["repositories"]
+            for key in [
+                "global_field_repository_file",
+                "global_field_translations_file",
+            ]:
+                if key in repos:
+                    rel = repos[key]
+                    if rel and not Path(rel).is_absolute():
+                        config_data["paths"]["repositories"][key] = str(
+                            (project_root / rel).resolve()
+                        )
+    # Resolve Discovery Paths (Dual Support: root or paths.discovery)
+    discovery_block = None
+    if "paths" in config_data and "discovery" in config_data["paths"]:
+        discovery_block = config_data["paths"]["discovery"]
+    elif "discovery" in config_data:
+        discovery_block = config_data["discovery"]
+    if discovery_block:
+        for key in ["all_tables_file", "rules_file", "whitelist_file"]:
+            if key in discovery_block:
+                rel = discovery_block[key]
+                if rel and not Path(rel).is_absolute():
+                    discovery_block[key] = str((project_root / rel).resolve())
+    # Normalize databases (id -> name mapping) for CLI compatibility
+    # Ensure this matches logic in gen_dc.py wrapper
+    if "databases" in config_data:
+        for db in config_data["databases"]:
+            if "id" in db and "name" not in db:
+                db["name"] = db["id"]
+            if "connection_ref" in db and "connection_obj" not in db:
+                db["connection_obj"] = db["connection_ref"]
+            # Normalize import_spec to global_import string for resolve_db_url
+            if "import_spec" in db and "global_import" not in db:
+                spec = db["import_spec"]
+                if "module" in spec and "symbol" in spec:
+                    db["global_import"] = (
+                        f"from {spec['module']} import {spec['symbol']}"
+                    )
+    return config_data
 class CLIContext:
     def __init__(self):
         self.default_config: Optional[Path] = None
@@ -52,7 +144,7 @@ class CLIContext:
         field_translations_file: Path,
         valid_paths: list[str],
         valid_fieldmap_paths: list[str],
-        params: Optional[dict] = None
+        params: Optional[dict] = None,
     ):
         self.default_config = default_config
         self.field_translations_file = field_translations_file
@@ -60,23 +152,59 @@ class CLIContext:
         self.valid_fieldmap_paths = valid_fieldmap_paths
         self.params = params or {}
+    def auto_configure(self):
+        """Attempts to find defaults if not configured."""
+        if self.default_config:
+            return
+        # Heuristic check for standard project layout
+        # Case 1: Run from project root -> generators/datacubes/discovery_params.yaml
+        candidate = Path("generators/datacubes/discovery_params.yaml")
+        if candidate.exists():
+            # Use shared resolver to get normalization and project root paths
+            raw_params = _load_and_resolve_config(candidate)
+            self.configure(
+                default_config=candidate.resolve(),
+                field_translations_file=(
+                    candidate.parent.parent.parent
+                    / "dataobjects/globals/global_field_translations.yaml"
+                ).resolve(),
+                valid_paths=[],  # Would need params to populate
+                valid_fieldmap_paths=[],
+                params=raw_params,
+            )
+            console.print(f"[dim]Auto-configured context from {candidate}[/dim]")
 context = CLIContext()
+context.auto_configure()
 def set_context_defaults(
     default_config: Path,
     field_translations_file: Path,
     valid_paths: list[str],
     valid_fieldmap_paths: list[str],
-    params: Optional[dict] = None
+    params: Optional[dict] = None,
 ):
     """Configures the CLI context with project-specific defaults."""
-    context.configure(default_config, field_translations_file, valid_paths, valid_fieldmap_paths, params)
+    context.configure(
+        default_config,
+        field_translations_file,
+        valid_paths,
+        valid_fieldmap_paths,
+        params,
+    )
     # Ensure directories exist based on configured params
     if params:
         ensure_directories_exist(params, logger=console.log)
-def _get_db_url_callback(registry: DatacubeRegistry, db_url_map: Optional[str]) -> Callable[[str], str]:
+def _get_db_url_callback(
+    registry: DatacubeRegistry, db_url_map: Optional[str]
+) -> Callable[[str], str]:
     """Helper to create a callback that resolves DB URLs from CLI overrides or registry."""
     cli_urls = json.loads(db_url_map) if db_url_map else {}
@@ -88,19 +216,29 @@ def _get_db_url_callback(registry: DatacubeRegistry, db_url_map: Optional[str])
         url = resolve_db_url(conf_name, registry.global_imports)
         if url:
             return url
-        raise ValueError(f"Could not resolve DB URL for '{conf_name}'. Provide via --db-urls or check imports.")
+        raise ValueError(
+            f"Could not resolve DB URL for '{conf_name}'. Provide via --db-urls or check imports."
+        )
     return get_url
 # --- Commands ---
 @app.command()
 def sync(
     config_file: Optional[Path] = typer.Option(None, "--config"),
-    db_url_map: Optional[str] = typer.Option(None, "--db-urls", help="Optional JSON mapping. If omitted, tries to resolve from code."),
+    db_url_map: Optional[str] = typer.Option(
+        None,
+        "--db-urls",
+        help="Optional JSON mapping. If omitted, tries to resolve from code.",
+    ),
     force: bool = typer.Option(False, "--force", "-f"),
-    env_file: Optional[Path] = typer.Option(None, "--env-file", "-e", help="Path to environment file"),
-    dry_run: bool = typer.Option(False, "--dry-run")
+    env_file: Optional[Path] = typer.Option(
+        None, "--env-file", "-e", help="Path to environment file"
+    ),
+    dry_run: bool = typer.Option(False, "--dry-run"),
 ) -> None:
     """Generates all Datacube classes based on the whitelists and field maps."""
     config_path = config_file or context.default_config
@@ -119,64 +257,188 @@ def sync(
     load_environment(env_path, logger=console.print)
     # Start with empty/default registry
-    with open(config_path, 'r') as f:
-         config_data = yaml.safe_load(f)
+    config_data = _load_and_resolve_config(config_path)
     registry = DatacubeRegistry(config_data, params=context.params)
+    # --- Aggregation Phase ---
     # --- Aggregation Phase ---
     params = context.params
     databases = params.get("databases", [])
+    # JIT DISCOVERY CHECK
+    # If using simplified whitelist workflow, registry might be empty.
+    # Auto-discover from whitelist in-memory.
+    if not registry.tables:
+        console.print(
+            "[dim]Registry empty. Attempting JIT Discovery from Whitelists...[/dim]"
+        )
+        # Prepare URL resolver for orchestrator usage if needed
+        import json
+        cli_urls = json.loads(db_url_map) if db_url_map else {}
+        for db in databases:
+            conn_obj = db.get("connection_ref") or db.get("connection_obj")
+            nm = db.get("id") or db.get("name")
+            # Resolve whitelist/rules paths (reusing logic from discover command or simplifying?)
+            # Orchestrator handles defaults if paths passed are relative/simple strings.
+            # We need to resolve full paths to be safe, or trust Orchestrator logic.
+            # Let's rely on params provided to orchestrator logic via context.params
+            try:
+                # Resolve DB URL
+                import_spec = db.get("import_spec")
+                if import_spec and isinstance(import_spec, dict):
+                    imp = import_spec.get("module")
+                else:
+                    imp = db.get("global_import")
+                db_imports = [imp] if imp else registry.global_imports
+                # Helper to resolve
+                if conn_obj in cli_urls:
+                    db_conn_str = cli_urls[conn_obj]
+                else:
+                    db_conn_str = resolve_db_url(conn_obj, db_imports)
+                if not db_conn_str:
+                    console.print(
+                        f"[yellow]Skipping JIT discovery for {nm}: No DB URL.[/yellow]"
+                    )
+                    continue
+                # Initialize Orchestrator
+                # We need to construct paths similar to 'discover' command logic
+                # Or let Orchestrator defaults handle it.
+                # Better to pass explicit defaults from params if available.
+                disc_paths = params.get("paths", {}).get("discovery", {}) or params.get(
+                    "discovery", {}
+                )
+                whitelist_file = (
+                    db.get("whitelist_file")
+                    or disc_paths.get("whitelist_file")
+                    or params.get("whitelist_file")
+                    or f"discovery_whitelist_{conn_obj}.yaml"
+                )
+                rules_file = (
+                    db.get("rules_file")
+                    or disc_paths.get("rules_file")
+                    or params.get("rules_file")
+                    or f"discovery_rules_{conn_obj}.yaml"
+                )
+                # Anchoring
+                try:
+                    prj_root = config_path.parent.parent.parent
+                except Exception:
+                    prj_root = Path.cwd()
+                # Resolve Whitelist
+                if Path(whitelist_file).is_absolute():
+                    wl_path = whitelist_file
+                else:
+                    wl_path = str(prj_root / whitelist_file)
+                # Resolve Rules
+                if Path(rules_file).is_absolute():
+                    r_path = rules_file
+                else:
+                    r_path = str(prj_root / rules_file)
+                # console.print(f"DEBUG: {nm} -> WL Path: {wl_path} (Exists: {Path(wl_path).exists()})")
+                orchestrator = DiscoveryOrchestrator(
+                    params=context.params,
+                    rules_path=r_path,
+                    whitelist_path=wl_path,
+                    registry_path=str(config_path),  # Not saving, but needed for init?
+                    db_connection_str=db_conn_str,
+                    db_config=db,
+                )
+                entries = orchestrator.discover()
+                registry.merge_discovered(entries)
+            except Exception as e:
+                console.print(f"[red]JIT Discovery failed for {nm}: {e}[/red]")
     # 0. Generate Field Maps (if enabled)
     # Check generation.enable_field_maps (defaults to True)
     if params.get("generation", {}).get("enable_field_maps", True):
         import json
         cli_urls = json.loads(db_url_map) if db_url_map else {}
         def get_url_safe(conf_name, db_imp):
-             if conf_name in cli_urls: return cli_urls[conf_name]
-             imp = [db_imp] if db_imp else registry.global_imports
-             return resolve_db_url(conf_name, imp)
+            if conf_name in cli_urls:
+                return cli_urls[conf_name]
+            imp = [db_imp] if db_imp else registry.global_imports
+            return resolve_db_url(conf_name, imp)
-        _run_field_map_generation(context, config_path, databases, get_url_safe, force=force)
+        _run_field_map_generation(
+            context, config_path, databases, get_url_safe, force=force
+        )
         # Ensure new modules are picked up
         importlib.invalidate_caches()
     # Inject valid paths for security from context
     # Inject valid paths for security from context
-    registry.valid_paths = context.valid_paths
+    # Also inject the resolved datacubes_dir since params determines it.
+    # Resolving datacubes_dir locally just in case context.valid_paths misses it
+    # (Context might rely on static registry or defaults, but params can contain overrides)
+    dc_dir = params.get("paths", {}).get("target", {}).get("datacubes_dir")
+    valid_paths = set(context.valid_paths)  # Use set for deduplication
+    if dc_dir:
+        # Resolve against project root if relative
+        if Path(dc_dir).is_absolute():
+            valid_paths.add(str(dc_dir))
+        else:
+            try:
+                # Heuristic re-resolution
+                prj_root = config_path.parent.parent.parent
+                valid_paths.add(str(prj_root / dc_dir))
+            except Exception:
+                valid_paths.add(str(Path.cwd() / dc_dir))
+    # Debug: Check if registry uses valid_paths correctly
+    registry.valid_paths = list(valid_paths)
     registry.valid_fieldmap_paths = context.valid_fieldmap_paths
     get_url = _get_db_url_callback(registry, db_url_map)
     # Group tables by target file
     file_groups = registry.group_tables_by_file()
     summary_table = Table(title="Sync Results")
     summary_table.add_column("File", style="magenta")
     summary_table.add_column("Classes", style="cyan")
     summary_table.add_column("Status")
+    generated_registry = {}
     for file_path_str, items in file_groups.items():
         if not is_secure_path(file_path_str, registry.valid_paths):
-            console.print(f"[bold red]Blocked:[/bold red] {file_path_str} is outside allowed paths.")
+            console.print(
+                f"[bold red]Blocked:[/bold red] {file_path_str} is outside allowed paths."
+            )
             continue
         file_path = Path(file_path_str)
         is_append = False
         existing_content = ""
         if file_path.exists() and not force:
-            with open(file_path, 'r') as f:
+            with open(file_path, "r") as f:
                 existing_content = f.read()
             missing_items = []
             for item in items:
                 # item is (table_name, conf_obj, base_cls, base_imp, cls_name)
@@ -184,44 +446,59 @@ def sync(
                 cls_name = item[4]
                 if f"class {cls_name}" not in existing_content:
                     missing_items.append(item)
             if not missing_items:
-                summary_table.add_row(file_path_str, str(len(items)), "[yellow]Skipped (All Exist)[/yellow]")
+                summary_table.add_row(
+                    file_path_str,
+                    str(len(items)),
+                    "[yellow]Skipped (All Exist)[/yellow]",
+                )
                 continue
             items = missing_items
             is_append = True
         if dry_run:
-            status = "[blue]Dry Run (Append)[/blue]" if is_append else "[blue]Dry Run[/blue]"
+            status = (
+                "[blue]Dry Run (Append)[/blue]" if is_append else "[blue]Dry Run[/blue]"
+            )
             summary_table.add_row(file_path_str, str(len(items)), status)
             continue
         # Prepare File Content
         imports_list, classes_code = generate_datacube_module_code(
-             items=items,
-             registry=registry,
-             get_db_url_callback=get_url,
-             logger=console.print
+            items=items,
+            registry=registry,
+            get_db_url_callback=get_url,
+            logger=console.print,
         )
         imports = set(imports_list)
         # Collect used config objects for this file to filter imports
-        used_configs = set(item[1] for item in items if item[1]) # item[1] is conf_obj
-        filtered_global_imports = filter_global_imports(registry.global_imports, used_configs, ignored_prefixes=["solutions.conf"])
+        used_configs = set(item[1] for item in items if item[1])  # item[1] is conf_obj
+        filtered_global_imports = filter_global_imports(
+            registry.global_imports, used_configs, ignored_prefixes=["solutions.conf"]
+        )
         if not classes_code:
-             if not is_append:
-                 summary_table.add_row(file_path_str, "0", "[red]Failed (No Classes Generated)[/red]")
-             else:
-                 summary_table.add_row(file_path_str, "0", "[red]Failed to Append[/red]")
-             continue
+            if not is_append:
+                summary_table.add_row(
+                    file_path_str, "0", "[red]Failed (No Classes Generated)[/red]"
+                )
+            else:
+                summary_table.add_row(file_path_str, "0", "[red]Failed to Append[/red]")
+            continue
         if not is_append:
             # We are generating the field map with Mapping type hint, so we should allow it in the generator
             # but this file writes the datacube class.
-            full_content = sorted(list(imports)) + filtered_global_imports + ["\n# --- Generated ---"] + classes_code
+            full_content = (
+                sorted(list(imports))
+                + filtered_global_imports
+                + ["\n# --- Generated ---"]
+                + classes_code
+            )
             file_path.parent.mkdir(parents=True, exist_ok=True)
             with open(file_path, "w") as f:
                 f.write("\n".join(full_content))
@@ -233,24 +510,106 @@ def sync(
             status_msg = f"[green]Appended {len(classes_code)} Classes[/green]"
         # Format using Ruff
-        subprocess.run(["uv", "run", "ruff", "format", str(file_path)], capture_output=True)
+        subprocess.run(
+            ["uv", "run", "ruff", "format", str(file_path)], capture_output=True
+        )
         summary_table.add_row(file_path_str, str(len(items)), status_msg)
+        # --- Registry Collection ---
+        # Collect metadata for generated datacubes
+        # Structure: {conf_obj: {table_name: {class_name: ..., path: ...}}}
+        for item in items:
+            t_name = item[0]
+            conf_obj = item[1]
+            cls_n = item[4]
+            # Calculate path relative to project root
+            try:
+                if "project_root" not in locals():
+                    project_root = config_path.parent.parent.parent
+                rel_path = file_path.relative_to(project_root)
+            except Exception:
+                rel_path = file_path
+            if conf_obj not in generated_registry:
+                generated_registry[conf_obj] = {}
+            generated_registry[conf_obj][t_name] = {
+                "class_name": cls_n,
+                "path": str(rel_path),
+            }
     console.print(summary_table)
+    # --- Write Datacube Registry ---
+    reg_rel_path = params.get("paths", {}).get("repositories", {}).get(
+        "global_datacube_registry_file"
+    ) or params.get("global_datacube_registry_file")
+    if reg_rel_path and generated_registry:
+        try:
+            if Path(reg_rel_path).is_absolute():
+                reg_file = Path(reg_rel_path)
+            else:
+                if "project_root" not in locals():
+                    project_root = config_path.parent.parent.parent
+                reg_file = project_root / reg_rel_path
+            reg_file.parent.mkdir(parents=True, exist_ok=True)
+            # Group Logic Applied above.
+            # Sort keys for stability
+            reg_data = {
+                k: dict(sorted(v.items()))
+                for k, v in sorted(generated_registry.items())
+            }
+            with open(reg_file, "w") as f:
+                yaml.dump(reg_data, f, sort_keys=False)
+            console.print(
+                f"[green]Updated Datacube Registry at {reg_rel_path} ({len(generated_registry)} entries)[/green]"
+            )
+        except Exception as e:
+            console.print(f"[red]Failed to write Datacube Registry: {e}[/red]")
 @app.command()
 def discover(
     config_file: Optional[Path] = typer.Option(None, "--config"),
-    db_conf: str = typer.Option("replica_db_conf", help="Config object to use for discovery introspection"),
+    db_conf: str = typer.Option(
+        "replica_db_conf", help="Config object to use for discovery introspection"
+    ),
     db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
-    env_file: Optional[Path] = typer.Option(None, "--env-file", "-e", help="Path to environment file"),
-    update: bool = typer.Option(False, "--update", help="Update the registry file in place"),
-    prune: bool = typer.Option(False, "--prune", help="Remove tables from registry if they are not in the discovery result"),
-    run_sync: bool = typer.Option(False, "--sync", help="Run sync immediately after update"),
-    dry_run: bool = typer.Option(False, "--dry-run", help="Preview changes without saving (overrides --update)"),
-    generate_fields: bool = typer.Option(False, "--generate-fields", help="Generate field_map files for discovered tables"),
-    force: bool = typer.Option(False, "--force", "-f", help="Force overwrite of existing field maps"),
-    fields_root: str = typer.Option("solutions.conf.transforms.fields", "--fields-root", help="Python path root for field maps"),
+    env_file: Optional[Path] = typer.Option(
+        None, "--env-file", "-e", help="Path to environment file"
+    ),
+    update: bool = typer.Option(
+        False, "--update", help="Update the registry file in place"
+    ),
+    prune: bool = typer.Option(
+        False,
+        "--prune",
+        help="Remove tables from registry if they are not in the discovery result",
+    ),
+    run_sync: bool = typer.Option(
+        False, "--sync", help="Run sync immediately after update"
+    ),
+    dry_run: bool = typer.Option(
+        False, "--dry-run", help="Preview changes without saving (overrides --update)"
+    ),
+    generate_fields: bool = typer.Option(
+        False,
+        "--generate-fields",
+        help="Generate field_map files for discovered tables",
+    ),
+    force: bool = typer.Option(
+        False, "--force", "-f", help="Force overwrite of existing field maps"
+    ),
+    fields_root: str = typer.Option(
+        "solutions.conf.transforms.fields",
+        "--fields-root",
+        help="Python path root for field maps",
+    ),
 ) -> None:
     config_path = config_file or context.default_config
     if not config_path:
@@ -258,7 +617,7 @@ def discover(
         raise typer.Exit(code=1)
     gen_config_path = config_path.parent / "generator_config.yaml"
     # Resolve env_file: CLI > Params > Default
     if env_file:
         env_path = env_file
@@ -266,24 +625,49 @@ def discover(
         env_path = Path(context.params.get("defaults", {})["env_file"])
     else:
         env_path = Path(".env.linux")
     load_environment(env_path, logger=console.print)
-    with open(config_path, 'r') as f:
-        config_data = yaml.safe_load(f)
+    # Load Registry Config (Bootstrap if missing)
+    if config_path.exists():
+        config_data = _load_and_resolve_config(config_path)
+    else:
+        # If registry file doesn't exist (e.g. first run), initialize with minimal settings
+        # We need "cubes_root_path" from params usually.
+        # But wait, config_path IS the registry file path.
+        console.print(
+            f"[yellow]Registry file {config_path} not found. Initializing empty registry.[/yellow]"
+        )
+        config_path.parent.mkdir(parents=True, exist_ok=True)
+        # Try to infer cubes_root_path from context params if available
+        cubes_root = "dataobjects/gencubes"  # Default fallback
+        if context.params and "paths" in context.params:
+            cubes_root = (
+                context.params.get("paths", {})
+                .get("target", {})
+                .get("datacubes_dir", cubes_root)
+            )
+        config_data = {"settings": {"cubes_root_path": cubes_root}}
+        # We don't save it yet? Or should we?
+        # DatacubeRegistry will use this data. If we save later, it's fine.
     registry = DatacubeRegistry(config_data)
     import json
     # Resolve DB URL
     cli_urls = json.loads(db_url_map) if db_url_map else {}
     def get_url(conf_name):
         if conf_name in cli_urls:
             return cli_urls[conf_name]
         url = resolve_db_url(conf_name, registry.global_imports)
         if url:
             return url
-        raise ValueError(f"Could not resolve DB URL for '{conf_name}'. Provide via --db-urls or check imports.")
+        raise ValueError(
+            f"Could not resolve DB URL for '{conf_name}'. Provide via --db-urls or check imports."
+        )
     # --- Initialize Global Field Registry (DEPRECATED) ---
     field_registry = None
@@ -297,34 +681,46 @@ def discover(
     params = context.params
     databases = params.get("databases", [])
     # Fallback to single DB mode if no databases defined (Backwards Compatibility)
     if not databases:
-        databases = [{
-            "name": db_conf,
-            "connection_obj": db_conf,
-            # Use standard name if not defined
-            "whitelist_file": "discovery_whitelist.yaml",
-            "rules_file": "discovery_rules.yaml"
-        }]
+        databases = [
+            {
+                "name": db_conf,
+                "connection_obj": db_conf,
+                # Use standard name if not defined
+                "whitelist_file": "discovery_whitelist.yaml",
+                "rules_file": "discovery_rules.yaml",
+            }
+        ]
     # Filter if user requested specific DB via CLI (using db_conf arg as filter name)
-    # The `db_conf` argument defaults to "replica_db_conf".
+    # The `db_conf` argument defaults to "replica_db_conf".
     target_db_name = None
     aggregated_entries = {}
     last_orchestrator = None
+    # Load existing all_tables data if accumulating
+    global_tables_file = params.get("all_tables_file") or "all_tables.yaml"
+    global_tables_path = config_path.parent / global_tables_file
+    all_tables_data = {}
+    if global_tables_path.exists():
+        with open(global_tables_path, "r") as f:
+            all_tables_data = yaml.safe_load(f) or {}
     for db_config in databases:
         db_name = db_config.get("id") or db_config.get("name", "unknown")
         conn_obj = db_config.get("connection_ref") or db_config.get("connection_obj")
         # Determine whitelist path
         wl_filename = db_config.get("whitelist_file")
         if not wl_filename:
-             # Try global param fallback
-             wl_filename = params.get("discovery", {}).get("whitelist_file") or params.get("whitelist_file")
+            # Try global param fallback
+            wl_filename = params.get("discovery", {}).get(
+                "whitelist_file"
+            ) or params.get("whitelist_file")
         if not wl_filename:
             # Default convention: discovery_whitelist_<db_name>.yaml
             wl_filename = f"discovery_whitelist_{conn_obj}.yaml"
@@ -333,14 +729,18 @@ def discover(
         # Determine rules path
         rules_filename = db_config.get("rules_file")
         if not rules_filename:
-            rules_filename = params.get("discovery", {}).get("rules_file") or params.get("rules_file")
+            rules_filename = params.get("discovery", {}).get(
+                "rules_file"
+            ) or params.get("rules_file")
         if not rules_filename:
-             rules_filename = f"discovery_rules_{conn_obj}.yaml"
+            rules_filename = f"discovery_rules_{conn_obj}.yaml"
         rules_path = config_path.parent / rules_filename
         # Determine blacklist path
-        bl_filename = db_config.get("blacklist_file", f"discovery_blacklist_{conn_obj}.yaml")
+        bl_filename = db_config.get(
+            "blacklist_file", f"discovery_blacklist_{conn_obj}.yaml"
+        )
         blacklist_path = config_path.parent / bl_filename
         console.print(f"[bold cyan]Discovering: {db_name} ({conn_obj})[/]")
@@ -351,16 +751,18 @@ def discover(
             # Support proper import_spec from new config or legacy global_import
             import_spec = db_config.get("import_spec")
             if import_spec and isinstance(import_spec, dict):
-                 imp = import_spec.get("module")
+                imp = import_spec.get("module")
             else:
-                 imp = db_config.get("global_import")
+                imp = db_config.get("global_import")
             db_imports = [imp] if imp else registry.global_imports
             if not db_imports and registry.global_imports:
-                 db_imports = registry.global_imports
+                db_imports = registry.global_imports
             db_conn_str = resolve_db_url(conn_obj, db_imports)
         except Exception:
-            console.print(f"[red]Could not resolve connection {conn_obj}. Skipping.[/red]")
+            console.print(
+                f"[red]Could not resolve connection {conn_obj}. Skipping.[/red]"
+            )
             continue
         orchestrator = DiscoveryOrchestrator(
@@ -370,43 +772,62 @@ def discover(
             whitelist_path=str(whitelist_path),
             registry_path=str(config_path),
             db_connection_str=db_conn_str,
-            db_config=db_config
+            db_config=db_config,
         )
         try:
             entries = orchestrator.discover()
             aggregated_entries.update(entries)
             last_orchestrator = orchestrator
+            # --- Capture Raw Tables for all_tables.yaml ---
+            if hasattr(orchestrator, "raw_tables") and orchestrator.raw_tables:
+                # Sort for consistency
+                all_tables_data[conn_obj] = sorted(list(orchestrator.raw_tables))
+                console.print(
+                    f"[green]Captured {len(orchestrator.raw_tables)} raw tables for {conn_obj}[/green]"
+                )
         except Exception as e:
-             console.print(f"[red]Discovery failed for {db_name}: {e}[/red]")
-             if not dry_run:
-                 raise # Fail hard if not dry run? Or continue? Let's buffer errors?
-                 # For now, log and continue might result in partial registry which is bad (prune would wipe missing).
-                 # Fail safe:
-                 return
+            console.print(f"[red]Discovery failed for {db_name}: {e}[/red]")
+            if not dry_run:
+                raise  # Fail hard if not dry run? Or continue? Let's buffer errors?
+                # For now, log and continue might result in partial registry which is bad (prune would wipe missing).
+                # Fail safe:
+                return
     # Aggregate global imports from ALL databases to ensure registry has them
     aggregated_global_imports = set(params.get("global_imports", []))
     for db in databases:
         if "global_import" in db:
             aggregated_global_imports.add(db["global_import"])
     # Save Aggregated Registry
     if last_orchestrator:
         console.print("")
         # Inject aggregated imports into the last orchestrator's update logic?
         # The orchestrator's save_registry loads existing, updates tables, and saves.
         # It DOES NOT currently update global_imports. We need to add that cap.
         # Helper manual update for now, or update Orchestrator to support it?
-        # Let's update Orchestrator.save_registry to accept global_imports update.
         last_orchestrator.save_registry(
-            aggregated_entries,
-            dry_run=dry_run,
-            prune=prune,
-            global_imports=list(aggregated_global_imports)
+            aggregated_entries,
+            dry_run=dry_run,
+            prune=prune,
+            global_imports=list(aggregated_global_imports),
         )
+        # Save all_tables.yaml
+        if not dry_run and all_tables_data:
+            with open(global_tables_path, "w") as f:
+                yaml.dump(all_tables_data, f, sort_keys=False)
+            console.print(
+                f"[bold green]Updated {global_tables_file} with raw tables from providers.[/bold green]"
+            )
+        elif dry_run:
+            console.print(
+                f"[yellow]DRY RUN: Would update {global_tables_file} with {len(all_tables_data)} providers.[/yellow]"
+            )
     # Save Registry changes (collected during discovery)
     if not dry_run and field_registry:
         field_registry.save()
@@ -415,29 +836,29 @@ def discover(
     if generate_fields and not dry_run:
         console.print("")
         console.rule("[bold blue]Generating Field Maps[/]")
         # Reload registry to ensure we have latest discovered tables
-        with open(config_path, 'r') as f:
+        with open(config_path, "r") as f:
             updated_config_data = yaml.safe_load(f)
         updated_registry = DatacubeRegistry(updated_config_data, params=context.params)
         # Convert python path to physical path
-        phys_root = Path(fields_root.replace('.', '/'))
+        phys_root = Path(fields_root.replace(".", "/"))
         # Group tables by connection to use correct inspector
         tables_by_conn = {}
         for t_name, t_data in updated_registry.tables.items():
-            conn = t_data.get('connection_obj', updated_registry.default_connection_obj)
+            conn = t_data.get("connection_obj", updated_registry.default_connection_obj)
             if conn not in tables_by_conn:
                 tables_by_conn[conn] = {}
             tables_by_conn[conn][t_name] = t_data
         for conn_obj, table_group in tables_by_conn.items():
             try:
                 db_url = get_url(conn_obj)
                 engine = sa.create_engine(db_url)
                 inspector = inspect(engine)
                 # Use the promoted Generator
                 generate_field_map_files(
                     discovered_entries=table_group,
@@ -445,35 +866,46 @@ def discover(
                     root_path=phys_root,
                     force=force,
                     logger=console.print,
-                    allowed_paths=updated_registry.valid_fieldmap_paths if hasattr(updated_registry, 'valid_fieldmap_paths') else None
+                    allowed_paths=(
+                        updated_registry.valid_fieldmap_paths
+                        if hasattr(updated_registry, "valid_fieldmap_paths")
+                        else None
+                    ),
                 )
             except Exception as e:
-                console.print(f"[red]Error generating fields for connection {conn_obj}: {e}[/red]")
+                console.print(
+                    f"[red]Error generating fields for connection {conn_obj}: {e}[/red]"
+                )
     # Chained Sync
     if run_sync:
         if dry_run:
             console.print("[yellow]Skipping sync in dry-run mode.[/yellow]")
         elif not update:
-             console.print("[yellow]Sync skipped: Registry not updated (use --update to enable chaining).[/yellow]")
+            console.print(
+                "[yellow]Sync skipped: Registry not updated (use --update to enable chaining).[/yellow]"
+            )
         else:
-             console.print("")
-             console.rule("[bold blue]Auto-Syncing Datacubes[/]")
-             # Call sync command directly with current context options
-             sync(
-                 config_file=config_path,
-                 db_url_map=db_url_map,
-                 force=True,
-                 env_file=env_file,
-                 dry_run=False
-             )
+            console.print("")
+            console.rule("[bold blue]Auto-Syncing Datacubes[/]")
+            # Call sync command directly with current context options
+            sync(
+                config_file=config_path,
+                db_url_map=db_url_map,
+                force=True,
+                env_file=env_file,
+                dry_run=False,
+            )
 @app.command()
 def scan(
     config_file: Optional[Path] = typer.Option(None, "--config"),
     db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
     env_file: Optional[Path] = typer.Option(None, "--env-file", "-e"),
-    db_name: Optional[str] = typer.Option(None, "--db", help="Target specific database from params"),
+    db_name: Optional[str] = typer.Option(
+        None, "--db", help="Target specific database from params"
+    ),
 ) -> None:
     """
     Introspects configured databases and dumps table lists to YAML.
@@ -487,41 +919,41 @@ def scan(
     # Resolve env_file: CLI > Params > Default
     if env_file:
         env_path = env_file
-    elif context.params and "env_file" in context.params:
-        env_path = Path(context.params["env_file"])
+    elif context.params and context.params.get("defaults", {}).get("env_file"):
+        env_path = Path(context.params.get("defaults", {})["env_file"])
     else:
         env_path = Path(".env.linux")
     load_environment(env_path, logger=console.print)
-    with open(config_path, 'r') as f:
-        config_data = yaml.safe_load(f)
+    config_data = _load_and_resolve_config(config_path)
     registry = DatacubeRegistry(config_data, params=context.params)
     import json
     cli_urls = json.loads(db_url_map) if db_url_map else {}
     # Helper Resolution
     def get_url_safe(conf_name, db_imp):
-         if conf_name in cli_urls: return cli_urls[conf_name]
-         imp = [db_imp] if db_imp else registry.global_imports
-         return resolve_db_url(conf_name, imp)
+        if conf_name in cli_urls:
+            return cli_urls[conf_name]
+        imp = [db_imp] if db_imp else registry.global_imports
+        return resolve_db_url(conf_name, imp)
     params = context.params
     databases = params.get("databases", [])
     # Filter targets
     target_dbs = databases
     if db_name:
         target_dbs = [d for d in databases if d.get("name") == db_name]
         if not target_dbs:
-             console.print(f"[red]Database '{db_name}' not found.[/red]")
-             raise typer.Exit(code=1)
+            console.print(f"[red]Database '{db_name}' not found.[/red]")
+            raise typer.Exit(code=1)
     # Resolve global output file
     global_tables_file = params.get("all_tables_file") or "all_tables.yaml"
     global_tables_path = config_path.parent / global_tables_file
     # Load existing data to preserve config for DBs not being scanned
     all_tables_data = {}
     if global_tables_path.exists():
@@ -531,37 +963,47 @@ def scan(
     for db in target_dbs:
         name = db.get("name")
         conn_obj = db.get("connection_obj")
         console.print(f"[bold cyan]Scanning: {name} ...[/bold cyan]")
         try:
             db_url = get_url_safe(conn_obj, db.get("global_import"))
             if not db_url:
                 console.print(f"[red]Could not resolve URL for {conn_obj}[/red]")
                 continue
             engine = sa.create_engine(db_url)
             inspector = inspect(engine)
             tables = sorted(inspector.get_table_names())
             # Update shared dictionary
             all_tables_data[conn_obj] = tables
             console.print(f"[green]Found {len(tables)} tables for {conn_obj}[/green]")
         except Exception as e:
             console.print(f"[red]Scan failed for {name}: {e}[/red]")
-            if params.get("debug"): raise e
+            if params.get("debug"):
+                raise e
     # Persist aggregated result
     with open(global_tables_path, "w") as f:
         yaml.dump(all_tables_data, f, sort_keys=False)
-    console.print(f"[bold green]Updated table list at {global_tables_path}[/bold green]")
+    console.print(
+        f"[bold green]Updated table list at {global_tables_path}[/bold green]"
+    )
 @app.command()
 def drift(
     config_file: Optional[Path] = typer.Option(None, "--config"),
-    db_url_map: Optional[str] = typer.Option(None, "--db-urls", help="Optional JSON mapping. If omitted, tries to resolve from code."),
-    env_file: Optional[Path] = typer.Option(None, "--env-file", "-e", help="Path to environment file"),
+    db_url_map: Optional[str] = typer.Option(
+        None,
+        "--db-urls",
+        help="Optional JSON mapping. If omitted, tries to resolve from code.",
+    ),
+    env_file: Optional[Path] = typer.Option(
+        None, "--env-file", "-e", help="Path to environment file"
+    ),
 ) -> None:
     """
     Checks for 'drift' between the generated Python classes and the DB schema.
@@ -574,21 +1016,19 @@ def drift(
     # Resolve env_file: CLI > Params > Default
     if env_file:
         env_path = env_file
-    elif context.params and "env_file" in context.params:
-        env_path = Path(context.params["env_file"])
+    elif context.params and context.params.get("defaults", {}).get("env_file"):
+        env_path = Path(context.params.get("defaults", {})["env_file"])
     else:
         env_path = Path(".env.linux")
     load_environment(env_path, logger=console.print)
+    config_data = _load_and_resolve_config(config_path)
-    with open(config_path, 'r') as f:
-        config_data = yaml.safe_load(f)
     registry = DatacubeRegistry(config_data)
     get_url = _get_db_url_callback(registry, db_url_map)
     cli_urls = json.loads(db_url_map) if db_url_map else {}
     drift_table = Table(title="Schema Drift Analysis")
     drift_table.add_column("Class", style="cyan")
     drift_table.add_column("Status", style="bold")
@@ -598,22 +1038,32 @@ def drift(
     attribute_names = list(registry.processed_mappings.keys())
     for table_name, details in registry.tables.items():
-        target = details.get('save_to_path', details.get('path'))
+        target = details.get("save_to_path", details.get("path"))
         if not target:
-             drift_table.add_row(table_name, "[red]Config Error[/red]", "Missing save_to_path")
-             continue
+            drift_table.add_row(
+                table_name, "[red]Config Error[/red]", "Missing save_to_path"
+            )
+            continue
         path = Path(target)
         if not path.exists():
-            console.print(f"[yellow]Skipping {table_name}: File {path} not found.[/yellow]")
+            console.print(
+                f"[yellow]Skipping {table_name}: File {path} not found.[/yellow]"
+            )
             continue
         # 1. Determine Class Name
-        provided_class_name = details.get('class_name')
-        class_name = provided_class_name if provided_class_name else "".join(w.capitalize() for w in table_name.split('_')) + "Dc"
+        provided_class_name = details.get("class_name")
+        class_name = (
+            provided_class_name
+            if provided_class_name
+            else "".join(w.capitalize() for w in table_name.split("_")) + "Dc"
+        )
         # 2. Dynamically load the generated class from the file
         try:
-            spec = importlib.util.spec_from_file_location(f"dynamic_mod_{table_name}", path)
+            spec = importlib.util.spec_from_file_location(
+                f"dynamic_mod_{table_name}", path
+            )
             mod = importlib.util.module_from_spec(spec)
             spec.loader.exec_module(mod)
             dc_class = getattr(mod, class_name)
@@ -624,35 +1074,41 @@ def drift(
         # 3. Determine DB URL
         # Priority: CLI Override > Class Attribute > Registry Config
         db_url = None
-        conf_obj = details.get('connection_obj', details.get('config_obj', registry.default_connection_obj))
+        conf_obj = details.get(
+            "connection_obj", details.get("config_obj", registry.default_connection_obj)
+        )
         if conf_obj in cli_urls:
             db_url = cli_urls[conf_obj]
-        elif hasattr(dc_class, 'connection_url'):
-            db_url = getattr(dc_class, 'connection_url')
-        elif hasattr(dc_class, 'config') and isinstance(dc_class.config, dict):
-            db_url = dc_class.config.get('connection_url')
+        elif hasattr(dc_class, "connection_url"):
+            db_url = getattr(dc_class, "connection_url")
+        elif hasattr(dc_class, "config") and isinstance(dc_class.config, dict):
+            db_url = dc_class.config.get("connection_url")
         # Fallback to registry resolution
         if not db_url:
             try:
                 db_url = get_url(conf_obj)
             except Exception:
-                pass
+                pass
         # 4. Introspect DB
         try:
             engine = sa.create_engine(db_url)
             inspector = inspect(engine)
-            db_cols = {c['name'] for c in inspector.get_columns(table_name)}
+            db_cols = {c["name"] for c in inspector.get_columns(table_name)}
         except Exception as e:
-             drift_table.add_row(class_name, "[red]DB Error[/red]", repr(e))
-             continue
+            drift_table.add_row(class_name, "[red]DB Error[/red]", repr(e))
+            continue
         # 5. Extract Field Map (if any)
-        field_map = getattr(dc_class, 'field_map', None)
-        if not field_map and hasattr(dc_class, 'config') and isinstance(dc_class.config, dict):
-             field_map = dc_class.config.get('field_map')
+        field_map = getattr(dc_class, "field_map", None)
+        if (
+            not field_map
+            and hasattr(dc_class, "config")
+            and isinstance(dc_class.config, dict)
+        ):
+            field_map = dc_class.config.get("field_map")
         # 6. Check Drift
         issues = check_drift(dc_class, db_cols, attribute_names, field_map=field_map)
@@ -665,12 +1121,80 @@ def drift(
     console.print(drift_table)
+@app.command()
+def propose_rules(
+    config_file: Optional[Path] = typer.Option(None, "--config"),
+    dry_run: bool = typer.Option(
+        False, "--dry-run", help="Preview rules without saving"
+    ),
+):
+    """
+    Analyzes all_tables.yaml and proposes new discovery rules.
+    """
+    config_path = config_file or context.default_config
+    if not config_path:
+        console.print("[red]No config file specified and no default configured.[/red]")
+        raise typer.Exit(code=1)
+    # Resolve paths via helper to ensure Project Root logic is applied
+    resolved_config = _load_and_resolve_config(config_path)
+    # We rely on all_tables.yaml being generated by scan/discover
+    # The resolved config will have absolute paths for these if the helper worked.
+    params = context.params
+    # Prefer resolved values if available
+    if (
+        "discovery" in resolved_config
+        and "all_tables_file" in resolved_config["discovery"]
+    ):
+        all_tables_path = Path(resolved_config["discovery"]["all_tables_file"])
+    else:
+        # Fallback to manual resolution (legacy or if not in discovery block)
+        raw_val = (
+            params.get("discovery", {}).get("all_tables_file")
+            or params.get("all_tables_file")
+            or "all_tables.yaml"
+        )
+        all_tables_path = config_path.parent / raw_val
+    # Rules File
+    if "discovery" in resolved_config and "rules_file" in resolved_config["discovery"]:
+        rules_path = Path(resolved_config["discovery"]["rules_file"])
+    else:
+        raw_rules = (
+            params.get("discovery", {}).get("rules_file")
+            or params.get("rules_file")
+            or "discovery_rules.yaml"
+        )
+        rules_path = config_path.parent / raw_rules
+    if not all_tables_path.exists():
+        console.print(
+            f"[red]Error: {all_tables_path} not found. Run 'dc-scan' first.[/red]"
+        )
+        raise typer.Exit(code=1)
+    engine = RuleEngine(all_tables_path, rules_path)
+    engine.load()
+    updates = engine.propose_rules()
+    if dry_run:
+        console.print("[bold yellow]Proposed Updates:[/]")
+        for conn, rules in updates.items():
+            console.print(f"[cyan]{conn}:[/]")
+            for r in rules:
+                console.print(f"  - {r}")
+    else:
+        engine.save_proposal(updates)
 @app.command()
 def match(
     config_file: Optional[Path] = typer.Option(None, "--config"),
-    db_name: Optional[str] = typer.Option(None, "--db", help="Target specific database from params"),
+    db_name: Optional[str] = typer.Option(
+        None, "--db", help="Target specific database from params"
+    ),
 ) -> None:
     """
     Applies discovery rules to scanned tables and generates whitelists (registry).
@@ -682,23 +1206,23 @@ def match(
         raise typer.Exit(code=1)
     import yaml
     # Load Params
-    params_path = config_path.parent / "discovery_params.yaml" # Assuming relative location or loaded via context
+    params_path = (
+        config_path.parent / "discovery_params.yaml"
+    )  # Assuming relative location or loaded via context
     # Context should already have params if set_context_defaults ran, but to be safe/standalone:
     params = context.params
     databases = params.get("databases", [])
     folder_prefix = params.get("folder_prefix", "solutions/dataobjects/gencubes/")
     fields_suffix = params.get("fields_module_root", "fields")
     target_dbs = databases
     if db_name:
         target_dbs = [d for d in databases if d.get("name") == db_name]
         if not target_dbs:
-             console.print(f"[red]Database '{db_name}' not found.[/red]")
-             raise typer.Exit(code=1)
+            console.print(f"[red]Database '{db_name}' not found.[/red]")
+            raise typer.Exit(code=1)
     for db in target_dbs:
         name = db.get("name")
@@ -706,36 +1230,40 @@ def match(
         rules_file = db.get("rules_file")
         whitelist_file = db.get("whitelist_file")
         conn_obj = db.get("connection_obj")
         # Path Composition
         db_domain = db.get("db_domain")
         import_base = Path(folder_prefix)
         if db_domain:
             import_base = import_base / db_domain
         import_base = import_base / fields_suffix
         try:
             import_base = import_base.relative_to(Path.cwd())
         except ValueError:
             pass
         fields_module_base = str(import_base).replace("/", ".")
         if not (all_tables_file and rules_file and whitelist_file):
-             console.print(f"[yellow]Skipping {name}: Missing file config.[/yellow]")
-             continue
+            console.print(f"[yellow]Skipping {name}: Missing file config.[/yellow]")
+            continue
         tables_path = config_path.parent / all_tables_file
         rules_path = config_path.parent / rules_file
         out_path = config_path.parent / whitelist_file
         if not tables_path.exists():
-            console.print(f"[red]Skipping {name}: {all_tables_file} not found. Run 'scan' first.[/red]")
+            console.print(
+                f"[red]Skipping {name}: {all_tables_file} not found. Run 'scan' first.[/red]"
+            )
             continue
         if not rules_path.exists():
-             console.print(f"[yellow]Skipping {name}: Rules file {rules_file} not found.[/yellow]")
-             continue
+            console.print(
+                f"[yellow]Skipping {name}: Rules file {rules_file} not found.[/yellow]"
+            )
+            continue
         # Load existing whitelist to preserve customizations
         existing_whitelist = {}
         if out_path.exists():
@@ -745,71 +1273,79 @@ def match(
         with open(tables_path, "r") as f:
             all_tables = yaml.safe_load(f) or []
         with open(rules_path, "r") as f:
             rules_data = yaml.safe_load(f) or []
         # Match Logic
-        console.print(f"[bold cyan]Matching: {name} ({len(all_tables)} tables)[/bold cyan]")
+        console.print(
+            f"[bold cyan]Matching: {name} ({len(all_tables)} tables)[/bold cyan]"
+        )
         matches = {}
         matched_count = 0
         for table in sorted(all_tables):
             # Find first matching rule
             matched_rule = None
             for r in rules_data:
                 pattern = r.get("pattern")
                 mtype = r.get("match_type", "exact")
                 is_match = False
                 if mtype == "exact" and table == pattern:
                     is_match = True
                 elif mtype == "prefix" and table.startswith(pattern):
                     is_match = True
                 elif mtype == "regex":
-                     import re
-                     if re.search(pattern, table):
-                         is_match = True
+                    import re
+                    if re.search(pattern, table):
+                        is_match = True
                 if is_match:
                     matched_rule = r
                     break
             if matched_rule:
                 # Construct Registry Entry
                 # Resolve path using folder_prefix
                 template = matched_rule.get("output_template", f"{table}_cubes.py")
                 domain = matched_rule.get("domain", "common")
                 # Path Construction: folder_prefix + db_domain + domain + output_template
                 db_domain = db.get("db_domain", "")
                 # Careful not to double slash if db_domain is empty, but Path handles it.
                 # template should now be just filename per rule updates.
                 full_path_obj = Path(folder_prefix)
                 if db_domain:
                     full_path_obj = full_path_obj / db_domain
                 full_path_obj = full_path_obj / domain / template
                 full_path = str(full_path_obj)
                 # Class Name Generation
                 # Check if exists in old whitelist
                 existing_entry = existing_whitelist.get(table, {})
                 custom_name = existing_entry.get("custom_name")
                 if custom_name:
                     class_name = custom_name
                 elif existing_entry.get("class_name"):
                     class_name = existing_entry.get("class_name")
                 else:
                     class_suffix = params.get("class_suffix", "Dc")
-                    class_name = "".join(w.capitalize() for w in table.split("_")) + class_suffix
+                    class_name = (
+                        "".join(w.capitalize() for w in table.split("_")) + class_suffix
+                    )
+                field_map_template = matched_rule.get(
+                    "field_map_template",
+                    f"{fields_module_base}.{{domain}}.{{table}}.field_map",
+                )
-                field_map_template = matched_rule.get("field_map_template", f"{fields_module_base}.{{domain}}.{{table}}.field_map")
                 # Construct defaults
                 entry = {
                     "path": full_path,
@@ -817,17 +1353,17 @@ def match(
                     "domain": domain,
                     "class_name": class_name,
                     # Ensure field_map is assigned
-                    "field_map": field_map_template.format(domain=domain, table=table)
+                    "field_map": field_map_template.format(domain=domain, table=table),
                 }
                 # Preserve custom_name if present, else default to None
                 entry["custom_name"] = custom_name if custom_name else None
-                # Preserve other fields if needed?
+                # Preserve other fields if needed?
                 # User asked specifically for keys on class_name preservation.
-                # But generally we might want to respect other overrides?
+                # But generally we might want to respect other overrides?
                 # For now, strict to class_name per request + generation logic.
                 matches[table] = entry
                 matched_count += 1
@@ -835,15 +1371,20 @@ def match(
         output_data = {"tables": matches}
         with open(out_path, "w") as f:
             yaml.dump(output_data, f, sort_keys=False)
-        console.print(f"[green]Matched {matched_count} tables. Written to {out_path}[/green]")
+        console.print(
+            f"[green]Matched {matched_count} tables. Written to {out_path}[/green]"
+        )
 @app.command()
 def map(
     config_file: Optional[Path] = typer.Option(None, "--config"),
     db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
     env_file: Optional[Path] = typer.Option(None, "--env-file", "-e"),
-    db_name: Optional[str] = typer.Option(None, "--db", help="Target specific database from params"),
+    db_name: Optional[str] = typer.Option(
+        None, "--db", help="Target specific database from params"
+    ),
     force: bool = typer.Option(False, "--force", "-f"),
 ) -> None:
     """
@@ -855,50 +1396,66 @@ def map(
         console.print("[red]No config file specified.[/red]")
         raise typer.Exit(code=1)
+    # Env Load
     # Env Load
     if env_file:
         env_path = env_file
-    elif context.params and "env_file" in context.params:
-        env_path = Path(context.params["env_file"])
+    elif context.params and context.params.get("defaults", {}).get("env_file"):
+        env_path = Path(context.params.get("defaults", {})["env_file"])
     else:
         env_path = Path(".env.linux")
     load_environment(env_path, logger=console.print)
     import json
     cli_urls = json.loads(db_url_map) if db_url_map else {}
-    registry = DatacubeRegistry({}, params=context.params) # Dummy reg for imports
+    registry = DatacubeRegistry({}, params=context.params)  # Dummy reg for imports
     def get_url_safe(conf_name, db_imp):
-         if conf_name in cli_urls: return cli_urls[conf_name]
-         imp = [db_imp] if db_imp else registry.global_imports
-         return resolve_db_url(conf_name, imp)
+        if conf_name in cli_urls:
+            return cli_urls[conf_name]
+        imp = [db_imp] if db_imp else registry.global_imports
+        return resolve_db_url(conf_name, imp)
     params = context.params
     databases = params.get("databases", [])
     target_dbs = databases
     if db_name:
         target_dbs = [d for d in databases if d.get("name") == db_name]
-    _run_field_map_generation(context, config_path, target_dbs, get_url_safe, force=force)
+    _run_field_map_generation(
+        context, config_path, target_dbs, get_url_safe, force=force
+    )
     return
 @app.command()
 def init(
     config_file: Optional[Path] = typer.Option(None, "--config"),
-    db_conf: Optional[str] = typer.Option(None, help="Config object to use for introspection"),
-    db_name: Optional[str] = typer.Option(None, "--db", help="Target specific database from params"),
+    db_conf: Optional[str] = typer.Option(
+        None, help="Config object to use for introspection"
+    ),
+    db_name: Optional[str] = typer.Option(
+        None, "--db", help="Target specific database from params"
+    ),
     db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
     env_file: Optional[Path] = typer.Option(None, "--env-file", "-e"),
-    dump_schema: Optional[Path] = typer.Option(None, "--dump-schema", help="Dump database schema"),
-    init_whitelist: Optional[Path] = typer.Option(None, "--init-whitelist", help="Initialize whitelist from DB tables"),
-    init_rules: Optional[Path] = typer.Option(None, "--init-rules", help="Initialize discovery rules from DB tables"),
-    reset: bool = typer.Option(False, "--reset", help="Reset registry and config to defaults"),
+    dump_schema: Optional[Path] = typer.Option(
+        None, "--dump-schema", help="Dump database schema"
+    ),
+    init_rules: Optional[Path] = typer.Option(
+        None, "--init-rules", help="Initialize discovery rules from DB tables"
+    ),
+    reset: bool = typer.Option(
+        False, "--reset", help="Reset registry and config to defaults"
+    ),
 ) -> None:
     """
-    Initializes configuration, schema dumps, and whitelists.
+    Initializes configuration and schema dumps.
     """
     params = context.params
     # Determine Targets
     databases = params.get("databases", [])
     target_dbs = []
@@ -907,203 +1464,657 @@ def init(
         # Filter specific DB
         target_dbs = [d for d in databases if d.get("name") == db_name]
         if not target_dbs:
-             console.print(f"[red]Database '{db_name}' not found in params.[/red]")
-             raise typer.Exit(code=1)
+            console.print(f"[red]Database '{db_name}' not found in params.[/red]")
+            raise typer.Exit(code=1)
     elif databases:
         # All DBs
         target_dbs = databases
     else:
         # Legacy Fallback
-        target_db_conf = db_conf or params.get("default_connection_obj", "replica_db_conf")
-        target_dbs = [{"name": target_db_conf, "connection_obj": target_db_conf, "whitelist_file": "discovery_whitelist.yaml"}]
+        target_db_conf = db_conf or params.get(
+            "default_connection_obj", "replica_db_conf"
+        )
+        target_dbs = [
+            {
+                "name": target_db_conf,
+                "connection_obj": target_db_conf,
+                "whitelist_file": "discovery_whitelist.yaml",
+            }
+        ]
     # Validate db_conf override if provided (only if single target or legacy)
     if db_conf and not db_name and not databases:
-         target_dbs[0]["connection_obj"] = db_conf
+        target_dbs[0]["connection_obj"] = db_conf
     # Resolve env_file: CLI > Params > Default
     if env_file:
         env_path = env_file
-    elif context.params and "env_file" in context.params:
-        env_path = Path(context.params["env_file"])
+    elif context.params and context.params.get("defaults", {}).get("env_file"):
+        env_path = Path(context.params.get("defaults", {})["env_file"])
     else:
         env_path = Path(".env.linux")
     load_environment(env_path, logger=console.print)
     # Resolve Context (Registry/Config Paths)
     # Be robust if config_file doesn't exist yet
     config_path = config_file or context.default_config
     if not config_path:
         console.print("[red]No config file target specified.[/red]")
         raise typer.Exit(code=1)
     import json
     cli_urls = json.loads(db_url_map) if db_url_map else {}
     # Helper to resolve URL without a full registry instance if file missing
     def resolve_url_safe(conf_name):
-        if conf_name in cli_urls: return cli_urls[conf_name]
+        if conf_name in cli_urls:
+            return cli_urls[conf_name]
         # Check params first
         for db in databases:
-             if db.get("connection_obj") == conf_name:
-                 imp = db.get("global_import")
-                 if imp:
-                     url = resolve_db_url(conf_name, [imp])
-                     if url: return url
+            if db.get("connection_obj") == conf_name:
+                imp = db.get("global_import")
+                if imp:
+                    url = resolve_db_url(conf_name, [imp])
+                    if url:
+                        return url
         # Try loading defaults if registry file exists
         if config_path.exists():
-            with open(config_path, 'r') as f:
+            with open(config_path, "r") as f:
                 data = yaml.safe_load(f)
             # Minimal registry just to get imports/resolution
             reg = DatacubeRegistry(data, params=context.params)
             url = resolve_db_url(conf_name, reg.global_imports)
-            if url: return url
+            if url:
+                return url
         # Fallback: try raw resolve (might fail if imports missing)
-        url = resolve_db_url(conf_name, [])
-        if url: return url
-        raise ValueError("Cannot resolve DB URL. Please ensure registry exists or use --db-urls.")
+        url = resolve_db_url(conf_name, [])
+        if url:
+            return url
+        raise ValueError(
+            "Cannot resolve DB URL. Please ensure registry exists or use --db-urls."
+        )
     # 1. Reset / Initialize Files (Global)
     if reset:
         if typer.confirm("Are you sure you want to reset the registry?"):
-             # We rely on params being provided by the wrapper/context now.
-             if not params:
-                 console.print("[yellow]Warning: No params loaded from context. Defaults may be minimal.[/yellow]")
-             # Registry Default
-             default_registry = {
-                 "global_imports": params.get("global_imports", []),
-                 "tables": {}
-             }
-             with open(config_path, "w") as f:
-                 yaml.dump(default_registry, f, sort_keys=False)
-             console.print(f"[green]Reset {config_path}[/green]")
+            # We rely on params being provided by the wrapper/context now.
+            if not params:
+                console.print(
+                    "[yellow]Warning: No params loaded from context. Defaults may be minimal.[/yellow]"
+                )
+            # Registry Default
+            default_registry = {
+                "global_imports": params.get("global_imports", []),
+                "tables": {},
+            }
+            with open(config_path, "w") as f:
+                yaml.dump(default_registry, f, sort_keys=False)
+            console.print(f"[green]Reset {config_path}[/green]")
     # Loop over targets for DB-specific actions
     for db in target_dbs:
         db_name = db.get("name")
         conn_obj = db.get("connection_obj")
         try:
             db_url = resolve_url_safe(conn_obj)
             engine = sa.create_engine(db_url)
         except Exception as e:
-            console.print(f"[red]Skipping {db_name}: Cannot check DB connection ({e})[/red]")
+            console.print(
+                f"[red]Skipping {db_name}: Cannot check DB connection ({e})[/red]"
+            )
             continue
         # 2. Dump Schema
         if dump_schema:
             from sibi_flux.datacube.generator import dump_db_schema
             console.print(f"[bold]Dumping schema for {db_name}...[/bold]")
             dump_db_schema(
                 engine=engine,
                 db_name=db_name,
                 output_dir=dump_schema,
-                logger=console.print
+                logger=console.print,
             )
-        # 3. Initialize Whitelist
-        if init_whitelist:
+        # 3. Initialize Rules (Global/Merged?)
+        if init_rules:
             insp = inspect(engine)
             tables = insp.get_table_names()
-            # Determine path
-            if init_whitelist.name == "discovery_whitelist.yaml": # CLI Default value check?
-                # Actually Typer might pass the value even if default.
-                # If valid path provided, usage is ambiguous with multi-db.
-                # If explicit path provided, we write to IT. (Overwriting per loop? Bad).
-                # Convention: If explicit path provided, we assume single DB mode or user knows what they do.
-                # BUT here we want to use the config-defined whitelist file if available.
-                wl_file = db.get("whitelist_file", f"discovery_whitelist_{db_name}.yaml")
-                target_path = config_path.parent / wl_file
+            target_path = init_rules
+            if not target_path.is_absolute():
+                # Default to Project Root anchoring for consistency
+                try:
+                    project_root = config_path.parent.parent.parent
+                except Exception:
+                    project_root = Path.cwd()
+                target_path = project_root / target_path
+            # Ensure parent dir exists
+            if not target_path.parent.exists():
+                target_path.parent.mkdir(parents=True, exist_ok=True)
+            # Load existing to append?
+            existing_rules = []
+            if target_path.exists():
+                with open(target_path, "r") as f:
+                    existing_rules = yaml.safe_load(f) or []
+            console.print(f"Appending rules for {db_name} tables...")
+            new_rules = []
+            for table in sorted(tables):
+                # Check existence
+                if any(r["pattern"] == table for r in existing_rules):
+                    continue
+                new_rules.append(
+                    {
+                        "pattern": table,
+                        "match_type": "exact",
+                        "domain": "common",
+                        "output_template": f"common/{table}_cubes.py",
+                        "db_conn_override": conn_obj,
+                    }
+                )
+            all_rules = existing_rules + new_rules
+            with open(target_path, "w") as f:
+                yaml.dump(all_rules, f, sort_keys=False)
+            console.print(f"[green]Rules updated for {db_name}.[/green]")
+@app.command()
+def whitelist(
+    config_file: Optional[Path] = typer.Option(None, "--config"),
+    db_name: Optional[str] = typer.Option(
+        None, "--db", help="Target specific database from params"
+    ),
+    db_url_map: Optional[str] = typer.Option(None, "--db-urls"),
+    env_file: Optional[Path] = typer.Option(None, "--env-file", "-e"),
+    force: bool = typer.Option(False, "--force", "-f"),
+) -> None:
+    """
+    Generates whitelist files based on discovery rules and database schema.
+    """
+    params = context.params
+    databases = params.get("databases", [])
+    # 1. Determine Targets
+    target_dbs = []
+    if db_name:
+        target_dbs = [d for d in databases if d.get("name") == db_name]
+        if not target_dbs:
+            console.print(f"[red]Database '{db_name}' not found in params.[/red]")
+            raise typer.Exit(code=1)
+    elif databases:
+        target_dbs = databases
+    if not target_dbs:
+        # Legacy
+        target_db_conf = params.get("default_connection_obj", "replica_db_conf")
+        target_dbs = [{"name": target_db_conf, "connection_obj": target_db_conf}]
+    # 2. Env Load
+    if env_file:
+        env_path = env_file
+    elif context.params and context.params.get("defaults", {}).get("env_file"):
+        env_path = Path(context.params.get("defaults", {})["env_file"])
+    else:
+        env_path = Path(".env.linux")
+    load_environment(env_path, logger=console.print)
+    # 3. Config Path
+    config_path = config_file or context.default_config
+    if not config_path:
+        console.print("[red]No config file target specified.[/red]")
+        raise typer.Exit(code=1)
+    import json
+    cli_urls = json.loads(db_url_map) if db_url_map else {}
+    config_data = _load_and_resolve_config(
+        config_path
+    )  # Since we need rules/paths resolved
+    registry = DatacubeRegistry(config_data, params=context.params)
+    def resolve_url_safe(conf_name, db_imp):
+        if conf_name in cli_urls:
+            return cli_urls[conf_name]
+        imp = [db_imp] if db_imp else registry.global_imports
+        return resolve_db_url(conf_name, imp)
+    # 4. Iterate and Generate
+    for db in target_dbs:
+        db_name = db.get("name")
+        conn_obj = db.get("connection_obj")
+        try:
+            db_url = resolve_url_safe(conn_obj, db.get("global_import"))
+            engine = sa.create_engine(db_url)
+        except Exception as e:
+            console.print(
+                f"[red]Skipping {db_name}: Cannot check DB connection ({e})[/red]"
+            )
+            continue
+        insp = inspect(engine)
+        tables = insp.get_table_names()
+        # Determine path (Config Driven)
+        discovery_cfg = params.get("paths", {}).get("discovery") or params.get(
+            "discovery", {}
+        )
+        wl_file = (
+            discovery_cfg.get("whitelist_file")
+            or params.get("whitelist_file")
+            or "whitelist.yaml"
+        )
+        target_path = Path(wl_file)
+        if not target_path.is_absolute():
+            try:
+                project_root = config_path.parent.parent.parent
+            except Exception:
+                project_root = Path.cwd()
+            target_path = project_root / target_path
+        # Load Rules
+        discovery_cfg = params.get("paths", {}).get("discovery") or params.get(
+            "discovery", {}
+        )
+        rules_file = (
+            discovery_cfg.get("rules_file")
+            or params.get("rules_file")
+            or "discovery_rules.yaml"
+        )
+        if Path(rules_file).is_absolute():
+            rules_path = Path(rules_file)
+        else:
+            try:
+                prj_root = config_path.parent.parent.parent
+            except Exception:
+                prj_root = Path.cwd()
+            rules_path = prj_root / rules_file
+        filtered_tables = {}  # Default to ALL if no rules? No, tables is list.
+        # If no rules, we might want to default to empty dicts for all tables?
+        # Let's keep logic: if rules exist, filter.
+        # 4. Use ConfigurationEngine for logic
+        # Initialize engine with the resolved rules file
+        from sibi_flux.datacube.config_engine import ConfigurationEngine
+        # We need to construct a lightweight 'params' dict or use existing context.params
+        # But we need to ensure 'engine' uses the correct 'rules_path' for THIS connection.
+        # NOTE: ConfigEngine takes 'params' and 'rules_path'.
+        # It handles scoped dictionary rules (via 'context_key') correctly.
+        eng = ConfigurationEngine(
+            context.params, rules_path=str(rules_path), context_key=conn_obj
+        )
+        filtered_tables = {}
+        if rules_path.exists():
+            for t in tables:
+                # Resolve using engine (handles prefix, regex, template logic)
+                # Pass mocked db_config for domain resolution logic inside engine
+                mock_db_config = {
+                    "db_domain": db.get("db_domain", "common"),
+                    "connection_obj": conn_obj,
+                }
+                res = eng.resolve_table(t, db_config=mock_db_config)
+                if res:
+                    # Calculate relative paths
+                    db_dom = db.get("db_domain")
+                    # 1. Datacube Path Explicit
+                    # Structure: datacubes_dir / [db_domain] / domain / template
+                    # datacubes_dir resolved from params
+                    dc_root_dir = (
+                        context.params.get("paths", {})
+                        .get("target", {})
+                        .get("datacubes_dir", "dataobjects/gencubes")
+                    )
+                    dc_base = Path(dc_root_dir)
+                    if db_dom:
+                        dc_base = dc_base / db_dom
+                    # Rule might provide 'output_template' (e.g. 'asm_cubes.py')
+                    # We assume template is just the filename now per robust rules
+                    template_name = res.get("output_template", Path(res["path"]).name)
+                    full_dc_path = dc_base / res["domain"] / template_name
+                    # 2. Field Map Path Explicit
+                    # Structure: field_maps_dir / [db_domain] / domain / table.py
+                    fields_root_dir = (
+                        context.params.get("paths", {})
+                        .get("target", {})
+                        .get("field_maps_dir", "dataobjects/fields")
+                    )
+                    fm_base = Path(fields_root_dir)
+                    if db_dom:
+                        fm_base = fm_base / db_dom
+                    fm_path_full = fm_base / res["domain"] / f"{t}.py"
+                    # Relativize logic
+                    def _safe_rel(p):
+                        try:
+                            pp = Path(p)
+                            # Heuristic for project root if not clear
+                            root = (
+                                config_path.parent.parent.parent
+                                if config_path.parent.name == "datacubes"
+                                else Path.cwd()
+                            )
+                            if pp.is_absolute():
+                                try:
+                                    return pp.relative_to(root)
+                                except ValueError:
+                                    return pp.resolve().relative_to(root.resolve())
+                            return pp
+                        except Exception:
+                            return Path(p)
+                    rel_dc_path = _safe_rel(full_dc_path)
+                    rel_fm_path = _safe_rel(fm_path_full)
+                    filtered_tables[t] = {
+                        "domain": res["domain"],
+                        "output_template": (
+                            Path(res["path"]).name
+                            if "output_template" not in res
+                            else res.get("output_template", Path(res["path"]).name)
+                        ),
+                        "datacube_path": str(rel_dc_path),
+                        "field_map_path": str(rel_fm_path),
+                    }
+                    # Recover template logic: if rule-based, template was used.
+                    # But since we have the path, template is secondary.
+                    # We'll stick to what we have.
+        else:
+            console.print(
+                f"[yellow]No rules found for {conn_obj}. Whitelisting ALL tables (No Paths Calculated).[/yellow]"
+            )
+            filtered_tables = {t: {} for t in tables}
+        # 5. Load Current Whitelist
+        current_wl = {}
+        if target_path.exists():
+            try:
+                with open(target_path, "r") as rf:
+                    current_wl = yaml.safe_load(rf) or {}
+            except:
+                pass
+        # Existing whitelist for this connection
+        existing_tables_map = {}
+        if conn_obj in current_wl:
+            raw_wl = current_wl[conn_obj]
+            if isinstance(raw_wl, list):
+                # Upgrade legacy list to dict
+                existing_tables_map = {t: {} for t in raw_wl}
             else:
-                target_path = init_whitelist
-                if not target_path.is_absolute():
-                     target_path = config_path.parent / target_path
-            console.print(f"Initializing whitelist at {target_path} for {db_name} with {len(tables)} tables...")
-            dump_data = sorted(tables)
-            with open(target_path, 'w') as f:
-                yaml.dump({'tables': dump_data}, f, sort_keys=False)
-            console.print(f"[green]whitelist initialized for {db_name}.[/green]")
-        # 4. Initialize Rules (Global/Merged?)
-        # Rules are usually global. Initializing from ONE db might miss others,
-        # or overwriting rules file repeatedly.
-        # We'll skip complex merging for now and just append or warn.
-        if init_rules:
-             insp = inspect(engine)
-             tables = insp.get_table_names()
-             target_path = init_rules
-             if not target_path.is_absolute():
-                 target_path = config_path.parent / target_path
-             # Load existing to append?
-             existing_rules = []
-             if target_path.exists():
-                 with open(target_path, 'r') as f:
-                     existing_rules = yaml.safe_load(f) or []
-             console.print(f"Appending rules for {db_name} tables...")
-             new_rules = []
-             for table in sorted(tables):
-                 # Check existence
-                 if any(r['pattern'] == table for r in existing_rules):
-                     continue
-                 new_rules.append({
-                     "pattern": table,
-                     "match_type": "exact",
-                     "domain": "common",
-                     "output_template": f"common/{table}_cubes.py",
-                     "db_conn_override": conn_obj
-                 })
-             all_rules = existing_rules + new_rules
-             with open(target_path, 'w') as f:
-                 yaml.dump(all_rules, f, sort_keys=False)
-             console.print(f"[green]Rules updated for {db_name}.[/green]")
-def _run_field_map_generation(context, config_path, target_dbs, url_resolver, force=False):
+                existing_tables_map = raw_wl.get("tables", {})
+        # Calculate sets
+        current_table_names = set(existing_tables_map.keys())
+        new_table_names = set(filtered_tables.keys())
+        # Sync Logic:
+        # 1. Start with intersection (preserve config, but update rule-based defaults if missing?)
+        # We want to keep manual overrides in existing map, but maybe refresh defaults?
+        retained = current_table_names.intersection(new_table_names)
+        # 2. Add new filtered tables (additions)
+        added = new_table_names - current_table_names
+        # 3. Removed (in current but not in filtered)
+        removed = current_table_names - new_table_names
+        # Construct new table map
+        new_table_map = {}
+        # 1. Retained: Merge existing with new rule metadata
+        # Priority: Existing (Manual) > New (Rule)
+        # BUT: Enforce calculated paths to avoid stale absolute paths
+        for t in retained:
+            existing_meta = existing_tables_map[t]
+            rule_meta = filtered_tables[t]
+            # Merge: update rule defaults only if not set in existing
+            merged = rule_meta.copy()
+            merged.update(existing_meta)  # Existing overwrites rule
+            # Restore calculated paths (Enforce Relative)
+            if "datacube_path" in rule_meta:
+                merged["datacube_path"] = rule_meta["datacube_path"]
+            if "field_map_path" in rule_meta:
+                merged["field_map_path"] = rule_meta["field_map_path"]
+            new_table_map[t] = merged
+        # 2. Add new
+        for t in added:
+            new_table_map[t] = filtered_tables[t]
+        # 3. Removed are omitted
+        # Update structure
+        # Inject Global Paths
+        paths_cfg = params.get("paths", {}).get("target", {})
+        # Helper to ensure relative paths
+        def _to_rel(p):
+            if not p:
+                return p
+            try:
+                pp = Path(p)
+                # Heuristic for project root if not clear
+                root = (
+                    config_path.parent.parent.parent
+                    if config_path.parent.name == "datacubes"
+                    else Path.cwd()
+                )
+                if pp.is_absolute():
+                    try:
+                        return str(pp.relative_to(root))
+                    except ValueError:
+                        return str(pp.resolve().relative_to(root.resolve()))
+                return p
+            except Exception:
+                pass
+            return p
+        dc_dir = _to_rel(paths_cfg.get("datacubes_dir", "dataobjects/gencubes/"))
+        fm_dir = _to_rel(
+            paths_cfg.get("field_maps_dir", "dataobjects/gencubes/fields/")
+        )
+        # Get db_domain from config (fallback to db_name if missing)
+        db_domain = db.get("db_domain") or db.get("name")
+        rich_structure = {
+            "db_domain": db_domain,
+            "datacubes_dir": dc_dir,
+            "field_maps_dir": fm_dir,
+            "tables": dict(sorted(new_table_map.items())),  # Sort keys for stability
+        }
+        current_wl[conn_obj] = rich_structure
+        with open(target_path, "w") as f:
+            yaml.dump(current_wl, f, sort_keys=False)
+        # Report
+        console.print(f"[bold underline]Sync Report for {db_name}[/bold underline]")
+        if added:
+            console.print(
+                f"[green]  + Added {len(added)} tables:[/green] {', '.join(sorted(list(added))[:5])}{'...' if len(added)>5 else ''}"
+            )
+        if removed:
+            console.print(
+                f"[red]  - Removed {len(removed)} tables:[/red] {', '.join(sorted(list(removed))[:5])}{'...' if len(removed)>5 else ''}"
+            )
+        if not added and not removed:
+            console.print("[dim]  No changes.[/dim]")
+        # Count keys directly
+        final_count = len(rich_structure["tables"])
+        console.print(f"[blue]  Total Whitelisted: {final_count}[/blue]")
+def _run_field_map_generation(
+    context, config_path, target_dbs, url_resolver, force=False
+):
     """Shared logic for generating field map files."""
     params = context.params
-    folder_prefix = params.get("folder_prefix", "solutions/dataobjects/gencubes/")
-    fields_suffix = params.get("fields_module_root", "fields")
+    # Logic: modern nested > legacy flat > default
+    folder_prefix = (
+        params.get("paths", {}).get("target", {}).get("datacubes_dir")
+        or params.get("folder_prefix")
+        or "solutions/dataobjects/gencubes/"
+    )
+    # Priority: explicit 'field_maps_dir' > legacy suffix construction
+    configured_fm_dir = params.get("paths", {}).get("target", {}).get("field_maps_dir")
+    if configured_fm_dir:
+        # If fm_dir is provided, it is the root for fields.
+        # We don't append suffix to it unless it's just a root base?
+        # Usually 'field_maps_dir' is the full relative path e.g. 'dataobjects/fields'
+        fields_root_path_base = Path(configured_fm_dir)
+        use_legacy_construction = False
+    else:
+        fields_suffix = (
+            params.get("generation", {}).get("fields_subpackage")
+            or params.get("fields_module_root")
+            or "fields"
+        )
+        fields_root_path_base = Path(folder_prefix) / fields_suffix
+        use_legacy_construction = True
     from sibi_flux.datacube.field_mapper import FieldTranslationManager
+    # Initialize Manager & Load Global Repo
     # Initialize Manager & Load Global Repo
-    repo_rel_path = context.params.get("global_repo_path", "solutions/conf/global_field_repository.yaml")
-    global_repo_path = Path(repo_rel_path).resolve()
+    repo_rel_path = (
+        params.get("paths", {})
+        .get("repositories", {})
+        .get("global_field_repository_file")
+        or params.get("global_repo_path")
+        or "solutions/conf/global_field_repository.yaml"
+    )
+    # Resolve relative to project root if needed
+    if Path(repo_rel_path).is_absolute():
+        global_repo_path = Path(repo_rel_path)
+    else:
+        try:
+            # Heuristic: config is in generators/datacubes, project root is 3 levels up
+            # But better to check if config_path is passed
+            project_root = config_path.parent.parent.parent
+        except Exception:
+            project_root = Path.cwd()
+        global_repo_path = (project_root / repo_rel_path).resolve()
     manager = FieldTranslationManager()
+    # 1. Load Repository (Definitions)
     if global_repo_path.exists():
         with open(global_repo_path, "r") as f:
             repo_data = yaml.safe_load(f) or []
             manager.load_from_list(repo_data)
-        console.print(f"[green]Loaded {len(manager.fields)} fields from Global Repository.[/green]")
-    else:
-        console.print(f"[dim]Global Repository not found at {global_repo_path}, creating new...[/dim]")
+        console.print(
+            f"[green]Loaded {len(manager.fields)} fields from Global Repository.[/green]"
+        )
+    # 2. Load Translations (Overrides)
+    trans_rel_path = params.get("paths", {}).get("repositories", {}).get(
+        "global_field_translations_file"
+    ) or params.get("global_field_translations_file")
+    if trans_rel_path:
+        if Path(trans_rel_path).is_absolute():
+            trans_path = Path(trans_rel_path)
+        else:
+            trans_path = (project_root / trans_rel_path).resolve()
+            if trans_path.exists():
+                with open(trans_path, "r") as f:
+                    trans_data = yaml.safe_load(f) or []
+                    manager.load_from_list(trans_data)
+                console.print(
+                    f"[green]Loaded translations from {trans_rel_path}.[/green]"
+                )
+    # --- GLOBAL CLEAN BUILD ---
+    if force:
+        # Determine global field maps root to wipe
+        # Logic matches default resolution used later
+        tgt = params.get("paths", {}).get("target", {})
+        fm_dir = tgt.get("field_maps_dir")
+        # If not set in params, check if we can infer from default
+        # But wait, whitelist overrides this per DB.
+        # User said "fields folder is exclusive".
+        # So we should wipe the configured 'field_maps_dir' from global params.
+        if fm_dir:
+            if Path(fm_dir).is_absolute():
+                abs_fm_dir = Path(fm_dir)
+            else:
+                abs_fm_dir = (project_root / fm_dir).resolve()
+            if abs_fm_dir.exists():
+                console.print(
+                    f"[bold red]Global Clean: Removing entire fields directory {abs_fm_dir}[/bold red]"
+                )
+                try:
+                    shutil.rmtree(abs_fm_dir)
+                    abs_fm_dir.mkdir(parents=True, exist_ok=True)
+                    (abs_fm_dir / "__init__.py").touch()
+                except Exception as e:
+                    console.print(f"[red]Failed to clean global fields dir: {e}[/red]")
+        else:
+            console.print(
+                "[yellow]Warning: Could not determine global field_maps_dir for clean build.[/yellow]"
+            )
     for db in target_dbs:
+        # console.print(f"DEBUG: Processing DB entry: {db} (Type: {type(db)})")
+        if isinstance(db, str):
+            console.print(
+                f"[red]Error: Database entry is a string '{db}', expected dict. Check config.[/red]"
+            )
+            continue
         name = db.get("id") or db.get("name")
         conn_obj = db.get("connection_ref") or db.get("connection_obj")
-        if not db.get("enable_field_map_generation", True) and not params.get("generation", {}).get("enable_field_maps", True):
-             console.print(f"[dim]Skipping {name}: Field map generation disabled.[/dim]")
-             continue
+        if not db.get("enable_field_map_generation", True) and not params.get(
+            "generation", {}
+        ).get("enable_field_maps", True):
+            console.print(f"[dim]Skipping {name}: Field map generation disabled.[/dim]")
+            continue
         # Language Settings
         source_lang = db.get("db_source_lang", "es")
@@ -1111,96 +2122,298 @@ def _run_field_map_generation(context, config_path, target_dbs, url_resolver, fo
         # Path Composition
         db_domain = db.get("db_domain")
-        fields_root_path = Path(folder_prefix)
-        if db_domain:
-            fields_root_path = fields_root_path / db_domain
-        fields_root_path = fields_root_path / fields_suffix
+        if use_legacy_construction:
+            fields_root_path = Path(folder_prefix)
+            if db_domain:
+                fields_root_path = fields_root_path / db_domain
+            fields_root_path = fields_root_path / fields_suffix
+        else:
+            # Modern Logic: configured_fm_dir is root
+            fields_root_path = fields_root_path_base
+            if db_domain:
+                fields_root_path = fields_root_path / db_domain
+        # Resolve global whitelist path
         # Resolve global whitelist path
         # Try nested discovery param first, then legacy flat
+        discovery_cfg = params.get("paths", {}).get("discovery") or params.get(
+            "discovery", {}
+        )
         global_whitelist_file = (
-            params.get("discovery", {}).get("whitelist_file") or
-            params.get("whitelist_file") or
-            "whitelist.yaml"
+            discovery_cfg.get("whitelist_file")
+            or params.get("whitelist_file")
+            or "whitelist.yaml"
         )
+        # If absolute, use directly (handled by gen_dc.py resolution), else resolve
         # If absolute, use directly (handled by gen_dc.py resolution), else resolve
         if Path(global_whitelist_file).is_absolute():
             wl_path = Path(global_whitelist_file)
         else:
-            wl_path = config_path.parent / global_whitelist_file
+            # Use Project Root anchoring
+            try:
+                # Heuristic: config is in generators/datacubes
+                project_root = config_path.parent.parent.parent
+            except Exception:
+                project_root = Path.cwd()
+            wl_path = project_root / global_whitelist_file
         if not wl_path.exists():
-            console.print(f"[yellow]Skipping {name}: Whitelist {global_whitelist_file} not found.[/yellow]")
+            console.print(
+                f"[yellow]Skipping {name}: Whitelist {global_whitelist_file} not found.[/yellow]"
+            )
             continue
         with open(wl_path, "r") as f:
             registry_data = yaml.safe_load(f) or {}
-        # Default to empty dict if key not found
+        # Load Rules for Domain Inference
+        rules_filename = db.get("rules_file")
+        if not rules_filename:
+            # Check deep keys
+            disc = params.get("paths", {}).get("discovery", {})
+            # console.print(f"DEBUG: Discovery Block: {disc}")
+            rules_filename = disc.get("rules_file") or params.get("discovery", {}).get(
+                "rules_file"
+            )
+        if not rules_filename:
+            console.print(
+                f"DEBUG: Fallback for {name}. Params keys: {disc.keys() if 'disc' in locals() else 'N/A'}"
+            )
+            rules_filename = f"discovery_rules_{conn_obj}.yaml"
+        # Resolve path relative to project root properly
+        if Path(rules_filename).is_absolute():
+            rules_path = Path(rules_filename)
+        else:
+            try:
+                # Heuristic: config is in generators/datacubes
+                # Reuse project_root calculation from above
+                if "project_root" not in locals():
+                    try:
+                        project_root = config_path.parent.parent.parent
+                    except Exception:
+                        project_root = Path.cwd()
+                console.print(
+                    f"DEBUG: ConfigRoot={config_path.parent}, ProjRoot={project_root}, RulesFile={rules_filename}"
+                )
+                rules_path = project_root / rules_filename
+            except Exception:
+                rules_path = config_path.parent / rules_filename
+        rules = []
+        # console.print(f"DEBUG: Checking rules path: {rules_path}")
+        if rules_path.exists():
+            with open(rules_path, "r") as f:
+                rules = yaml.safe_load(f) or []
+        # PROPERLY HANDLE DICT vs LIST RULES
+        if isinstance(rules, dict):
+            # Try to find specific rules for this connection
+            candidates = [conn_obj, name]
+            found = False
+            for key in candidates:
+                if key in rules:
+                    rules = rules[key]
+                    found = True
+                    break
+            if not found:
+                rules = []
+        # console.print(f"DEBUG: Loaded {len(rules)} rules from {rules_path} for {conn_obj}")
+        # console.print(f"DEBUG: Loaded {len(rules)} rules from {rules_path}")
+        # Support List or Dict Format
         scoped_data = registry_data.get(conn_obj, {})
-        tables = scoped_data.get("tables", {})
+        if isinstance(scoped_data, list):
+            tables = {t: {} for t in scoped_data}
+        else:
+            tables = scoped_data.get("tables", {})
         if not tables:
-             console.print(f"[dim]No tables in whitelist for {name}.[/dim]")
-             continue
+            console.print(f"[dim]No tables in whitelist for {name}.[/dim]")
+            continue
+        # Override paths from whitelist if present
+        # This allows whitelist to drive output location efficiently
+        # Retrieve db_domain from whitelist entry (preferred) or registry config
+        # Hoisted from loop for Clean Build capability
+        wl_entry = registry_data.get(conn_obj, {})
+        db_domain = wl_entry.get("db_domain") or db.get("db_domain") or db.get("name")
+        wl_fields_dir = wl_entry.get("field_maps_dir")
+        if wl_fields_dir:
+            # Resolve relative to project root
+            if Path(wl_fields_dir).is_absolute():
+                fields_root_path = Path(wl_fields_dir)
+            else:
+                # Use cached or recalculated project_root
+                if "project_root" not in locals():
+                    try:
+                        project_root = config_path.parent.parent.parent
+                    except Exception:
+                        project_root = Path.cwd()
+                fields_root_path = project_root / wl_fields_dir
+        else:
+            # Fallback to global setting (calculated earlier, but we need to re-calc if not in scope or just reuse?)
+            # Logic: modern nested > legacy flat > default
+            folder_prefix = (
+                params.get("paths", {}).get("target", {}).get("datacubes_dir")
+                or params.get("folder_prefix")
+                or "solutions/dataobjects/gencubes/"
+            )
+            fields_suffix = (
+                params.get("generation", {}).get("fields_subpackage")
+                or params.get("fields_module_root")
+                or "fields"
+            )
+            # Resolve
+            if not Path(folder_prefix).is_absolute():
+                try:
+                    prj_root = config_path.parent.parent.parent
+                except Exception:
+                    prj_root = Path.cwd()
+                folder_prefix = str(prj_root / folder_prefix)
+            # Construct
+            fields_root_path = fields_root_path_base
+        # Ensure Parent Chain has __init__.py (Backtrack to root)
+        try:
+            # Make sure fields_root_path itself exists
+            fields_root_path.mkdir(parents=True, exist_ok=True)
+            if not (fields_root_path / "__init__.py").exists():
+                (fields_root_path / "__init__.py").touch()
+            curr = fields_root_path.parent
+            # Stop at project root (.) or root (/)
+            while str(curr) != "." and str(curr) != "/" and len(curr.parts) > 0:
+                if not (curr / "__init__.py").exists():
+                    (curr / "__init__.py").touch()
+                curr = curr.parent
+        except Exception:
+            pass
+        # This is safe because db_domain is specific to this connection.
+        db_target_root = fields_root_path / db_domain
+        # Ensure Root Exists
+        db_target_root.mkdir(parents=True, exist_ok=True)
+        if not (db_target_root / "__init__.py").exists():
+            (db_target_root / "__init__.py").touch()
+        console.print(
+            f"[bold cyan]Mapping fields for {name} ({len(tables)} tables) [Src:{source_lang} -> Tgt:{target_lang}]...[/bold cyan]"
+        )
-        console.print(f"[bold cyan]Mapping fields for {name} ({len(tables)} tables) [Src:{source_lang} -> Tgt:{target_lang}]...[/bold cyan]")
         try:
             db_url = url_resolver(conn_obj, db.get("global_import"))
             engine = sa.create_engine(db_url)
             inspector = inspect(engine)
             for table_name, details in tables.items():
-                if not details.get("field_map"):
-                     continue
+                domain = details.get("domain")
+                # If domain is missing (e.g. manual entry without rule), try fallback inference
+                if not domain:
+                    # Infer from rules
+                    for r in rules:
+                        pat = r.get("pattern")
+                        mtype = r.get("match_type", "exact")
+                        is_match = False
+                        if mtype == "exact" and table_name == pat:
+                            is_match = True
+                        elif mtype == "prefix" and table_name.startswith(pat):
+                            is_match = True
+                        elif mtype == "regex":
+                            import re
+                            if re.search(pat, table_name):
+                                is_match = True
+                        if is_match:
+                            domain = r.get("domain")
+                            # console.print(f"DEBUG: {table_name} matched rule {pat} -> {domain}")
+                            break
+                if not domain:
+                    # console.print(f"DEBUG: No match for {table_name}, defaulting to common")
+                    domain = "common"
+                # Target Resolution
+                explicit_fm_path = details.get("field_map_path")
+                if explicit_fm_path:
+                    # Use Explicit Path from Whitelist
+                    if Path(explicit_fm_path).is_absolute():
+                        target_file = Path(explicit_fm_path)
+                    else:
+                        # Ensure project_root is available or fallback to cwd
+                        if "project_root" not in locals():
+                            try:
+                                project_root = config_path.parent.parent.parent
+                            except:
+                                project_root = Path.cwd()
+                        target_file = project_root / explicit_fm_path
+                    target_dir = target_file.parent
+                else:
+                    # Fallback Logic
+                    # Target Dir: fields/{db_domain}/{domain}
+                    target_dir = db_target_root / domain
+                    target_file = target_dir / f"{table_name}.py"
-                domain = details.get("domain", "common")
-                # Target Dir
-                target_dir = fields_root_path / domain
+                # Output Initialization
                 target_dir.mkdir(parents=True, exist_ok=True)
                 if not (target_dir / "__init__.py").exists():
                     (target_dir / "__init__.py").touch()
-                # Python Target File
-                target_file = target_dir / f"{table_name}.py"
                 # Skip if exists and not force
                 if target_file.exists() and not force:
                     # console.print(f"DEBUG: Skipping existing {table_name}")
                     continue
                 try:
-                    cols = inspector.get_columns(table_name)
+                    # Fix for SA 2.0 / Clickhouse: inspect connection, not engine
+                    with engine.connect() as conn:
+                        cols = inspect(conn).get_columns(table_name)
                     # console.print(f"DEBUG: Generating {table_name} with {len(cols)} columns")
                     field_map = {}
-                    full_table_name = f"{domain}.{table_name}"
+                    full_table_name = f"{db_domain}.{domain}.{table_name}"
                     for c in cols:
                         col_name = c["name"]
                         col_type = str(c["type"])
                         # 1. Register / Get Canonical Field
-                        trans_msg = manager.register_field(col_name, col_type, full_table_name)
+                        trans_msg = manager.register_field(
+                            col_name, col_type, full_table_name
+                        )
                         if trans_msg:
                             console.print(f"  [dim]{trans_msg}[/dim]")
                         # 2. Get Field Definition
                         fid = manager._generate_id(col_name, col_type)
                         field_def = manager.fields.get(fid)
                         if field_def:
                             # 3. Translate if needed
                             if target_lang not in field_def.aliases:
                                 manager.translate_alias(fid, target_lang)
                             # 4. Determine Target Column
-                            target_alias = field_def.aliases.get(target_lang) or field_def.aliases.get("en") or col_name
+                            target_alias = (
+                                field_def.aliases.get(target_lang)
+                                or field_def.aliases.get("en")
+                                or col_name
+                            )
                             target_col = manager.generate_target_column(target_alias)
                             if not field_def.target_column:
                                 field_def.target_column = target_col
                             else:
@@ -1220,27 +2433,36 @@ def _run_field_map_generation(context, config_path, target_dbs, url_resolver, fo
                     for col_name in field_map.keys():
                         lines.append(f'    "{col_name}",')
                     lines.append("]")
                     lines.append("")
-                    lines.append(f'field_map: Mapping[str, str] = FieldMapFactory.create("{table_name}", COLUMNS)')
+                    lines.append(
+                        f'field_map: Mapping[str, str] = FieldMapFactory.create("{full_table_name}", COLUMNS)'
+                    )
                     # Generate metadata call
                     lines.append("")
-                    lines.append(f'metadata: Mapping[str, Any] = FieldMapFactory.create_metadata("{table_name}", COLUMNS)')
+                    lines.append(
+                        f'metadata: Mapping[str, Any] = FieldMapFactory.create_metadata("{full_table_name}", COLUMNS)'
+                    )
                     with open(target_file, "w") as f:
                         f.write("\n".join(lines))
                 except Exception as e:
-                    console.print(f"[red]Error mapping {table_name}: {e}[/red]")
-                    if context.params.get("debug"): raise e
+                    console.print(f"[red]Error processing {table_name}: {e}[/red]")
+                    continue
         except Exception as e:
-             console.print(f"[red]Error processing DB {name}: {e}[/red]")
+            console.print(f"[red]Error connecting/inspecting DB {name}: {e}[/red]")
-    # Save Global Repo
-    manager.save_to_yaml(global_repo_path)
-    console.print("Saved Global Field Repository.")
+    # Save Updates to Global Repository
+    try:
+        manager.save_to_yaml(global_repo_path)
+        console.print(
+            f"[green]Updated Global Field Repository at {global_repo_path} ({len(manager.fields)} fields)[/green]"
+        )
+    except Exception as e:
+        console.print(f"[red]Failed to save Global Field Repository: {e}[/red]")
 if __name__ == "__main__":

sibi-flux 2026.1.2__py3-none-any.whl → 2026.1.4__py3-none-any.whl

sibi-flux 2026.1.2py3-none-any.whl → 2026.1.4py3-none-any.whl