PyPI - euler-preprocess - Versions diffs - 1.8.0__tar.gz → 2.0.0__tar.gz - Mend

euler-preprocess 1.8.0tar.gz → 2.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{euler_preprocess-1.8.0 → euler_preprocess-2.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: euler-preprocess
-Version: 1.8.0
+Version: 2.0.0
 Summary: Physics-based preprocessing (fog, etc.) for RGB+depth datasets
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown

{euler_preprocess-1.8.0 → euler_preprocess-2.0.0}/euler_preprocess/cli.py RENAMED Viewed

@@ -12,7 +12,7 @@ from pathlib import Path
 from euler_preprocess.common.dataset import build_dataset
 from euler_preprocess.common.logging import get_logger, log_dataset_info
-from euler_preprocess.common.output import prepare_output_backend
+from euler_preprocess.common.output import prepare_output_backends
 # ---------------------------------------------------------------------------
@@ -55,7 +55,8 @@ def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
     required_modalities = transform_class.REQUIRED_MODALITIES
     required_hierarchical = transform_class.REQUIRED_HIERARCHICAL_MODALITIES or None
     dataset = build_dataset(config, required_modalities, required_hierarchical)
-    output_backend = prepare_output_backend(config, dataset, transform_class)
+    output_backends = prepare_output_backends(config, dataset, transform_class)
+    primary_backend = next(iter(output_backends.values()))
     dataset_name = config.get("dataset", "dataset")
     raw_modalities = {
@@ -69,14 +70,25 @@ def _run_transform(args: argparse.Namespace, transform_class: type) -> int:
         else:
             modality_info[name] = entry
     log_dataset_info(logger, dataset_name, len(dataset), modality_info, use_gpu)
-    logger.info("Output path: %s", output_backend.root)
+    for slot, backend in output_backends.items():
+        logger.info("Output path [%s]: %s", slot, backend.root)
     transform_kwargs: dict = {
         "config_path": str(transform_config_path),
-        "out_path": str(output_backend.root),
-        "output_backend": output_backend,
+        "out_path": str(primary_backend.root),
     }
     init_params = inspect.signature(transform_class.__init__).parameters
+    if "output_backends" in init_params:
+        transform_kwargs["output_backends"] = output_backends
+    else:
+        transform_kwargs["output_backend"] = primary_backend
+        if len(output_backends) > 1:
+            extra = [s for s in output_backends if s != next(iter(output_backends))]
+            logger.warning(
+                "%s does not accept output_backends; ignoring auxiliary slots: %s",
+                transform_class.__name__,
+                extra,
+            )
     if "strict" in init_params:
         transform_kwargs["strict"] = bool(getattr(args, "strict", False))
     elif getattr(args, "strict", False):

{euler_preprocess-1.8.0 → euler_preprocess-2.0.0}/euler_preprocess/common/dataset.py RENAMED Viewed

@@ -1,11 +1,12 @@
 from __future__ import annotations
-def _parse_modality_entry(entry: str | dict) -> dict:
-    """Normalise a modality config entry to ``{path, split}``."""
+def _make_modality(entry: str | dict):
+    from euler_loading import Modality
     if isinstance(entry, str):
-        return {"path": entry}
-    return entry
+        return Modality(entry)
+    return Modality(entry["path"], split=entry.get("split"))
 def build_dataset(
@@ -15,19 +16,15 @@ def build_dataset(
 ):
     """Build a ``MultiModalDataset`` from a config dict.
-    Args:
-        config: Top-level dataset config containing ``modalities`` and
-            optionally ``hierarchical_modalities`` mappings.  Each modality
-            value may be a plain path string or a dict with ``path`` and
-            an optional ``split`` key.
-        required_modalities: Set of modality names that must be present.
-        required_hierarchical: Optional set of hierarchical modality names
-            that must be present.
-    Returns:
-        A ``MultiModalDataset`` instance.
+    Each modality entry is either a plain path string or a dict with
+    ``path`` and optional ``split``.  Loader resolution (which function to
+    call, which module to use) is handled by euler-loading via the
+    ds-crawler index at each path — point the config at a path whose
+    index declares the function you want (e.g. a ``sky_mask`` index for
+    boolean sky masks vs. a ``class_segmentation`` index for raw class
+    id maps).
     """
-    from euler_loading import Modality, MultiModalDataset
+    from euler_loading import MultiModalDataset
     raw_modalities = config.get("modalities", {})
     raw_hierarchical = config.get("hierarchical_modalities", {})
@@ -48,15 +45,10 @@ def build_dataset(
                 f"contain at least: {', '.join(sorted(required_hierarchical))}"
             )
-    modalities = {}
-    for name, entry in raw_modalities.items():
-        parsed = _parse_modality_entry(entry)
-        modalities[name] = Modality(parsed["path"], split=parsed.get("split"))
-    hierarchical_modalities = {}
-    for name, entry in raw_hierarchical.items():
-        parsed = _parse_modality_entry(entry)
-        hierarchical_modalities[name] = Modality(parsed["path"], split=parsed.get("split"))
+    modalities = {name: _make_modality(entry) for name, entry in raw_modalities.items()}
+    hierarchical_modalities = {
+        name: _make_modality(entry) for name, entry in raw_hierarchical.items()
+    }
     return MultiModalDataset(
         modalities=modalities,

{euler_preprocess-1.8.0 → euler_preprocess-2.0.0}/euler_preprocess/common/output.py RENAMED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import json
 import tempfile
+from collections.abc import Callable
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
@@ -19,6 +20,39 @@ _PIPELINE_OUTPUT_STORAGE_KINDS = {"directory", "zip", "file"}
 _IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff"}
+@dataclass(frozen=True)
+class OutputSlotSpec:
+    """Auxiliary-slot spec for transforms producing more than one modality.
+    Auxiliary slots reuse the source modality's hierarchy/indexing so the
+    written files line up with the input dataset, but supply their own writer
+    and ds-crawler metadata so the resulting on-disk dataset advertises the
+    correct modality type and loader.
+    Attributes:
+        source_modality: Name of the input modality whose hierarchy and
+            per-sample basenames are mirrored when writing auxiliary outputs.
+        writer: Writer callable invoked as ``writer(target, value, meta)``.
+            ``target`` is either a filesystem path (``str``/``PathLike``) or a
+            binary stream (when the writer is marked stream-supported and the
+            output is a zip).
+        index_overlay: Mapping merged on top of the source modality's
+            ``index_output`` to produce the ds-crawler head metadata for this
+            slot.  Use this to override ``name``/``type``/``euler_train``/
+            ``euler_loading``/``meta`` while inheriting indexing/hierarchy.
+        output_extension: When set (e.g. ``".npy"``), source basenames are
+            rewritten with this extension before writing.
+        meta: Optional ``modality_meta`` passed to the writer.  Defaults to
+            the ``meta`` field from the merged ``index_overlay`` when set there.
+    """
+    source_modality: str
+    writer: Callable[..., None]
+    index_overlay: dict[str, Any]
+    output_extension: str | None = None
+    meta: dict[str, Any] | None = None
 @dataclass(frozen=True)
 class PipelineOutputTargetConfig:
     """Runtime-resolved pipeline output target for a single transform output."""
@@ -218,6 +252,7 @@ class SourceBackedOutputBackend:
         dataset_writer: DatasetWriter | ZipDatasetWriter,
         modality_writer: Any,
         modality_meta: dict[str, Any] | None,
+        output_extension: str | None = None,
         pipeline_manifest_path: Path | None = None,
         pipeline_manifest_targets: list[PipelineOutputTargetConfig] | None = None,
     ) -> None:
@@ -226,6 +261,7 @@ class SourceBackedOutputBackend:
         self.dataset_writer = dataset_writer
         self.modality_writer = modality_writer
         self.modality_meta = modality_meta
+        self.output_extension = output_extension
         self.pipeline_manifest_path = pipeline_manifest_path
         self.pipeline_manifest_targets = pipeline_manifest_targets or []
@@ -254,8 +290,13 @@ class SourceBackedOutputBackend:
                 "requires sample['meta'][source_modality]['path']."
             )
-        basename = Path(str(source_meta["path"])).name
-        relative_path = str(source_meta["path"])
+        source_path = Path(str(source_meta["path"]))
+        if self.output_extension is not None:
+            basename = source_path.stem + self.output_extension
+            relative_path = str(source_path.with_suffix(self.output_extension))
+        else:
+            basename = source_path.name
+            relative_path = str(source_path)
         source_meta_copy = dict(source_meta)
         if isinstance(self.dataset_writer, ZipDatasetWriter):
@@ -380,7 +421,11 @@ def prepare_output_backend(
     dataset: MultiModalDataset,
     transform_class: type,
 ) -> SourceBackedOutputBackend:
-    """Create a source-backed output backend for a transform run."""
+    """Create a source-backed output backend for a transform run.
+    Used for the *primary* output slot.  Transforms with auxiliary outputs
+    should use :func:`prepare_output_backends` (plural).
+    """
     source_modality = getattr(transform_class, "SOURCE_MODALITY", None)
     if not isinstance(source_modality, str) or not source_modality:
@@ -436,3 +481,184 @@ def prepare_output_backend(
         pipeline_manifest_path=pipeline_manifest_path,
         pipeline_manifest_targets=[pipeline_target] if pipeline_target else [],
     )
+def _build_auxiliary_backend(
+    *,
+    spec: OutputSlotSpec,
+    pipeline_target: PipelineOutputTargetConfig,
+    dataset: MultiModalDataset,
+) -> SourceBackedOutputBackend:
+    """Create a backend for an auxiliary slot using its OutputSlotSpec.
+    The auxiliary backend does not own the pipeline manifest — that is
+    aggregated and written by the primary backend.
+    """
+    if pipeline_target.storage == "file":
+        raise ValueError(
+            f"Pipeline output target '{pipeline_target.slot}' uses "
+            "unsupported storage='file'"
+        )
+    root = Path(pipeline_target.path)
+    zip_mode = pipeline_target.storage == "zip"
+    base_index = dataset.get_modality_index(spec.source_modality)
+    index_output = _build_auxiliary_index(base_index, spec)
+    dataset_writer = create_dataset_writer_from_index(
+        index_output=index_output,
+        root=root,
+        zip=zip_mode,
+    )
+    modality_meta = spec.meta
+    if modality_meta is None:
+        head = index_output.get("head") or {}
+        modality_meta = (head.get("modality") or {}).get("meta")
+        if modality_meta is None:
+            modality_meta = index_output.get("meta")
+    return SourceBackedOutputBackend(
+        source_modality=spec.source_modality,
+        root=root,
+        dataset_writer=dataset_writer,
+        modality_writer=spec.writer,
+        modality_meta=modality_meta,
+        output_extension=spec.output_extension,
+        pipeline_manifest_path=None,
+        pipeline_manifest_targets=[],
+    )
+def _build_auxiliary_index(
+    base_index: dict[str, Any], spec: OutputSlotSpec
+) -> dict[str, Any]:
+    """Apply ``spec.index_overlay`` to a copy of ``base_index``.
+    The overlay's recognised keys map to fields used by ds-crawler's writer
+    construction.  ``name`` / ``type`` rewrite the dataset id+name and
+    modality key on both the contract head and the legacy top-level fields;
+    ``meta`` overrides the modality's meta dict; ``euler_train`` /
+    ``euler_loading`` replace those addon entries.  Any other overlay keys
+    are passed through as top-level fields for the legacy writer path.
+    """
+    overlay = dict(spec.index_overlay)
+    index_output: dict[str, Any] = {**base_index}
+    head = base_index.get("head")
+    if isinstance(head, dict):
+        new_head = json.loads(json.dumps(head))  # deep copy via JSON
+        new_head.setdefault("dataset", {})
+        new_head.setdefault("modality", {})
+        new_head.setdefault("addons", {})
+        if "name" in overlay:
+            name = overlay["name"]
+            new_head["dataset"]["id"] = name
+            new_head["dataset"]["name"] = name
+        if "type" in overlay:
+            new_head["modality"]["key"] = overlay["type"]
+        if "meta" in overlay:
+            new_head["modality"]["meta"] = dict(overlay["meta"])
+        if "euler_train" in overlay:
+            new_head["addons"]["euler_train"] = dict(overlay["euler_train"])
+        if "euler_loading" in overlay:
+            new_head["addons"]["euler_loading"] = dict(overlay["euler_loading"])
+        index_output["head"] = new_head
+    # Legacy top-level fields used by the non-contract writer construction
+    # path.  Preserved alongside the head for compatibility.
+    for key, value in overlay.items():
+        if isinstance(value, dict):
+            index_output[key] = dict(value)
+        else:
+            index_output[key] = value
+    return index_output
+def _resolve_primary_slot(transform_class: type) -> str:
+    """Return the *primary* slot name declared by *transform_class*."""
+    output_slots = getattr(transform_class, "OUTPUT_SLOTS", None)
+    if output_slots:
+        return output_slots[0]
+    output_slot = getattr(transform_class, "OUTPUT_SLOT", None)
+    if isinstance(output_slot, str) and output_slot:
+        return output_slot
+    source_modality = getattr(transform_class, "SOURCE_MODALITY", None)
+    if isinstance(source_modality, str) and source_modality:
+        return source_modality
+    raise ValueError(
+        f"{transform_class.__name__} declares no output slot "
+        "(set OUTPUT_SLOT, OUTPUT_SLOTS, or SOURCE_MODALITY)"
+    )
+def prepare_output_backends(
+    config: dict[str, Any],
+    dataset: MultiModalDataset,
+    transform_class: type,
+) -> dict[str, SourceBackedOutputBackend]:
+    """Create per-slot output backends for *transform_class*.
+    Returns ``{slot_name: backend}``.  The primary slot (the first entry of
+    ``OUTPUT_SLOTS``, falling back to ``OUTPUT_SLOT`` / ``SOURCE_MODALITY``) is
+    always present.  Auxiliary slots declared in
+    :attr:`Transform.OUTPUT_SLOT_SPECS` are included only when the dataset
+    config's ``pipeline.output_targets`` contains a matching entry; otherwise
+    the slot is silently omitted (auxiliary outputs are opt-in).
+    The returned dict's iteration order matches the declared
+    ``OUTPUT_SLOTS`` order.
+    """
+    primary_slot = _resolve_primary_slot(transform_class)
+    primary_backend = prepare_output_backend(config, dataset, transform_class)
+    backends: dict[str, SourceBackedOutputBackend] = {primary_slot: primary_backend}
+    slot_specs = getattr(transform_class, "OUTPUT_SLOT_SPECS", None) or {}
+    pipeline = parse_pipeline_config(config)
+    declared_slots = getattr(transform_class, "OUTPUT_SLOTS", ()) or ()
+    aux_slots = [s for s in declared_slots if s != primary_slot]
+    if pipeline is not None and slot_specs:
+        for slot in aux_slots:
+            spec = slot_specs.get(slot)
+            if spec is None:
+                continue
+            target = pipeline.get_output_target(slot)
+            if target is None:
+                continue
+            backends[slot] = _build_auxiliary_backend(
+                spec=spec,
+                pipeline_target=target,
+                dataset=dataset,
+            )
+    # Aggregate every slot we actually wrote into the manifest the primary
+    # backend will emit, so a single manifest documents the full set.
+    if (
+        pipeline is not None
+        and pipeline.outputs_manifest_path
+        and len(backends) > 1
+    ):
+        manifest_targets: list[PipelineOutputTargetConfig] = list(
+            primary_backend.pipeline_manifest_targets
+        )
+        for slot in aux_slots:
+            if slot not in backends:
+                continue
+            target = pipeline.get_output_target(slot)
+            if target is not None:
+                manifest_targets.append(target)
+        primary_backend.pipeline_manifest_targets = manifest_targets
+    return backends

{euler_preprocess-1.8.0 → euler_preprocess-2.0.0}/euler_preprocess/common/transform.py RENAMED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 from abc import ABC, abstractmethod
 from collections.abc import Iterable
 from pathlib import Path
-from typing import ClassVar
+from typing import Any, ClassVar
 class Transform(ABC):
@@ -11,12 +11,22 @@ class Transform(ABC):
     Subclasses declare the modalities they need via class variables and
     implement :meth:`run` to process samples.
+    Output slots:
+        Most transforms produce a single output (the *primary* slot, declared
+        via :attr:`OUTPUT_SLOT`).  Transforms that produce additional auxiliary
+        outputs (e.g. fog β / L_s maps) declare them in :attr:`OUTPUT_SLOTS`
+        together with per-slot specs in :attr:`OUTPUT_SLOT_SPECS`.  Auxiliary
+        slots are opt-in: they are only written when the dataset config's
+        ``pipeline.output_targets`` includes a matching entry.
     """
     REQUIRED_MODALITIES: ClassVar[set[str]] = set()
     REQUIRED_HIERARCHICAL_MODALITIES: ClassVar[set[str]] = set()
     SOURCE_MODALITY: ClassVar[str | None] = None
     OUTPUT_SLOT: ClassVar[str | None] = None
+    OUTPUT_SLOTS: ClassVar[tuple[str, ...]] = ()
+    OUTPUT_SLOT_SPECS: ClassVar[dict[str, Any]] = {}
     OUTPUT_INDEX_META_OVERRIDES: ClassVar[dict[str, object]] = {}
     @abstractmethod

{euler_preprocess-1.8.0 → euler_preprocess-2.0.0}/euler_preprocess/fog/models.py RENAMED Viewed

@@ -243,6 +243,33 @@ def uses_estimated_airlight(al_spec) -> bool:
     return al_spec is None or al_spec in AIRLIGHT_METHODS
+def broadcast_k_field(k_field: Any, height: int, width: int) -> np.ndarray:
+    """Return ``k_field`` as a ``(H, W)`` float32 map (broadcasting if scalar)."""
+    arr = np.asarray(k_field, dtype=np.float32)
+    if arr.ndim == 0:
+        return np.broadcast_to(arr, (height, width)).astype(np.float32, copy=True)
+    if arr.shape == (height, width):
+        return arr.astype(np.float32, copy=False)
+    raise ValueError(
+        f"k_field must be scalar or shape ({height}, {width}); got {arr.shape}"
+    )
+def broadcast_ls_field(ls_field: Any, height: int, width: int) -> np.ndarray:
+    """Return ``ls_field`` as a ``(H, W, 3)`` float32 map (broadcasting if needed)."""
+    arr = np.asarray(ls_field, dtype=np.float32)
+    if arr.shape == (3,):
+        return np.broadcast_to(arr, (height, width, 3)).astype(np.float32, copy=True)
+    if arr.shape == (1, 1, 3):
+        return np.broadcast_to(arr, (height, width, 3)).astype(np.float32, copy=True)
+    if arr.shape == (height, width, 3):
+        return arr.astype(np.float32, copy=False)
+    raise ValueError(
+        f"ls_field must have shape (3,), (1, 1, 3), or "
+        f"({height}, {width}, 3); got {arr.shape}"
+    )
 def apply_model(
     rgb: np.ndarray,
     depth_m: np.ndarray,
@@ -251,7 +278,18 @@ def apply_model(
     rng: np.random.Generator,
     contrast_threshold_default: float,
     estimated_airlight: np.ndarray,
-) -> tuple[np.ndarray, float, np.ndarray]:
+) -> tuple[np.ndarray, float, np.ndarray, np.ndarray, np.ndarray]:
+    """Apply a fog model to ``rgb``.
+    Returns:
+        Tuple ``(foggy, k_mean, ls_base, k_map, ls_map)``:
+        * ``foggy``: ``(H, W, 3)`` foggy RGB image.
+        * ``k_mean``: scalar mean scattering coefficient (for filenames/logs).
+        * ``ls_base``: ``(3,)`` base atmospheric light (for filenames/logs).
+        * ``k_map``: ``(H, W)`` β-field actually used (broadcast for uniform).
+        * ``ls_map``: ``(H, W, 3)`` L_s-field actually used (broadcast for uniform).
+    """
     if model_name not in DEFAULT_MODEL_CONFIGS:
         raise ValueError(f"Unsupported fog model: {model_name}")
     visibility = float(sample_value(model_cfg.get("visibility_m"), rng))
@@ -269,14 +307,19 @@ def apply_model(
         sampled_al = sample_value(al_spec, rng)
         ls_base = normalize_atmospheric_light(np.asarray(sampled_al))
+    height, width = depth_m.shape
     if model_name == "uniform":
         ls_field = ls_base.reshape(1, 1, 3)
-        return apply_fog(rgb, depth_m, k_mean, ls_field), k_mean, ls_base
+        foggy = apply_fog(rgb, depth_m, k_mean, ls_field)
+        k_map = broadcast_k_field(k_mean, height, width)
+        ls_map = broadcast_ls_field(ls_base, height, width)
+        return foggy, k_mean, ls_base, k_map, ls_map
     if model_name in ("heterogeneous_k", "heterogeneous_k_ls"):
         k_cfg = model_cfg.get("k_hetero", {})
-        k_scales = resolve_scales(k_cfg, depth_m.shape[0], depth_m.shape[1], rng)
-        k_noise = perlin_fbm(depth_m.shape[0], depth_m.shape[1], k_scales, rng)
+        k_scales = resolve_scales(k_cfg, height, width, rng)
+        k_noise = perlin_fbm(height, width, k_scales, rng)
         min_factor = float(sample_value(k_cfg.get("min_factor", 1.0), rng))
         max_factor = float(sample_value(k_cfg.get("max_factor", 1.0), rng))
         k_field = modulate_with_noise(
@@ -291,8 +334,8 @@ def apply_model(
     if model_name in ("heterogeneous_ls", "heterogeneous_k_ls"):
         ls_cfg = model_cfg.get("ls_hetero", {})
-        ls_scales = resolve_scales(ls_cfg, depth_m.shape[0], depth_m.shape[1], rng)
-        ls_noise = perlin_fbm(depth_m.shape[0], depth_m.shape[1], ls_scales, rng)
+        ls_scales = resolve_scales(ls_cfg, height, width, rng)
+        ls_noise = perlin_fbm(height, width, ls_scales, rng)
         min_factor = float(sample_value(ls_cfg.get("min_factor", 1.0), rng))
         max_factor = float(sample_value(ls_cfg.get("max_factor", 1.0), rng))
         ls_field = modulate_with_noise(
@@ -306,4 +349,7 @@ def apply_model(
     else:
         ls_field = ls_base.reshape(1, 1, 3)
-    return apply_fog(rgb, depth_m, k_field, ls_field), k_mean, ls_base
+    foggy = apply_fog(rgb, depth_m, k_field, ls_field)
+    k_map = broadcast_k_field(k_field, height, width)
+    ls_map = broadcast_ls_field(ls_field, height, width)
+    return foggy, k_mean, ls_base, k_map, ls_map

euler-preprocess 1.8.0__tar.gz → 2.0.0__tar.gz

euler-preprocess 1.8.0tar.gz → 2.0.0tar.gz