PyPI - nerfstudio-pixelnerf - Versions diffs - 0.0.1__py3-none-any.whl - Mend

nerfstudio-pixelnerf 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

PixelNeRFConfig.py +37 -0
PixelNeRFDataManager.py +86 -0
PixelNeRFModel.py +199 -0
nerfstudio_pixelnerf-0.0.1.dist-info/METADATA +10 -0
nerfstudio_pixelnerf-0.0.1.dist-info/RECORD +8 -0
nerfstudio_pixelnerf-0.0.1.dist-info/WHEEL +5 -0
nerfstudio_pixelnerf-0.0.1.dist-info/entry_points.txt +2 -0
nerfstudio_pixelnerf-0.0.1.dist-info/top_level.txt +4 -0

PixelNeRFConfig.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""nerstudio-pixel-nerf/PixelNeRF.py"""
+from PixelNeRFDataManager import PixelNeRFDataManagerConfig
+from PixelNeRFModel import PixelNeRFModelConfig
+from nerfstudio.configs.base_config import ViewerConfig
+from nerfstudio.engine.optimizers import AdamOptimizerConfig
+from nerfstudio.engine.schedulers import ExponentialDecaySchedulerConfig
+from nerfstudio.engine.trainer import TrainerConfig
+from nerfstudio.plugins.types import MethodSpecification
+from PixelNerfPipeline import PixelNerfPipelineConfig
+PixelNeRF = MethodSpecification(
+    config=TrainerConfig(
+        method_name="pixel-nerf",
+        steps_per_eval_batch=500,
+        steps_per_save=2000,
+        max_num_iterations=300000,
+        mixed_precision=True,
+        pipeline=PixelNerfPipelineConfig(
+            datamanager=PixelNeRFDataManagerConfig(),
+            model=PixelNeRFModelConfig(),
+        ),
+        optimizers={
+            "network": {
+                "optimizer": AdamOptimizerConfig(lr=1e-3, eps=1e-15),
+                "scheduler": ExponentialDecaySchedulerConfig(
+                    lr_final=5e-5,
+                    max_steps=300000,
+                ),
+            },
+        },
+        viewer=ViewerConfig(num_rays_per_chunk=1 << 11),
+        vis="tensorboard",
+    ),
+    description="Configuration for the PixelNeRF method"
+)

PixelNeRFDataManager.py ADDED Viewed

@@ -0,0 +1,86 @@
+from dataclasses import dataclass, field
+from typing import Dict, Literal, Tuple, Type, Union
+from nerfstudio.cameras.rays import RayBundle
+from nerfstudio.data.datamanagers.base_datamanager import (
+    VanillaDataManager,
+)
+import random
+import torch
+@dataclass
+class PixelNeRFDataManagerConfig(VanillaDataManager):
+    """Configuration for the GNT data manager.
+    Args:
+        _target: The target class to instantiate, in this case, GNTDataManager
+    """
+    _target: Type = field(default_factory=lambda: PixelNeRFDataManager, init=False)
+    num_source_views: int = 3
+    """Number of source views to sample from the dataset for conditioning the pixelNeRF model. The paper typically uses 3 views, but you can experiment with this number."""
+class PixelNeRFDataManager(VanillaDataManager):
+    config: PixelNeRFDataManagerConfig
+    def __init__(
+        self,
+        config: PixelNeRFDataManagerConfig,
+        device: Union[torch.device, str] = "cpu",
+        test_mode: Literal["test", "val", "inference"] = "val",
+        **kwargs,
+    ):
+        # 1. Chame o super() para herdar todo o carregamento de dataset do Nerfstudio!
+        super().__init__(
+            config=config, device=device, test_mode=test_mode, **kwargs
+        )
+    def _sample_source_views(self, num_views: int) -> Dict[str, torch.Tensor]:
+        """
+        Sorteia N imagens do dataset para atuar como contexto (source views)
+        e as formata para o pixelNeRF.
+        """
+        dataset = self.train_dataset
+        # I should implement some smarter sampling strategy here, but for now I'll just randomly sample N views from the dataset.
+        indices = random.sample(range(len(dataset)), num_views)
+        src_rgbs = []
+        src_poses = []
+        focals = []
+        cs = []
+        for idx in indices:
+            data = dataset[idx]
+            src_rgbs.append(data["image"])
+            camera = dataset.cameras[idx]
+            c2w_3x4 = camera.camera_to_worlds
+            c2w_4x4 = torch.cat([
+                c2w_3x4,
+                torch.tensor([[0.0, 0.0, 0.0, 1.0]], device=c2w_3x4.device)
+            ], dim=0)
+            src_poses.append(c2w_4x4)
+            focals.append(torch.tensor([camera.fx.item(), camera.fy.item()]))
+            cs.append(torch.tensor([camera.cx.item(), camera.cy.item()]))
+        return {
+            "src_rgbs": torch.stack(src_rgbs).unsqueeze(0),
+            "src_cameras": torch.stack(src_poses).unsqueeze(0),
+            "focal": torch.stack(focals),
+            "c": torch.stack(cs)
+        }
+    def next_train(self, step: int) -> Tuple[RayBundle, Dict]:
+        ray_bundle, batch = self.train_pixel_sampler.sample(self.config.train_num_rays_per_batch)
+        source_data = self._sample_source_views(self.config.num_source_views)
+        ray_bundle.metadata.update(source_data)
+        return ray_bundle, batch
+    def next_eval(self, step: int) -> Tuple[RayBundle, Dict]:
+        """A mesma lógica de next_train, mas usando o eval_pixel_sampler."""
+        ray_bundle, batch = self.eval_pixel_sampler.sample(self.config.eval_num_rays_per_batch)
+        source_data = self._sample_source_views(self.config.num_source_views)
+        ray_bundle.metadata.update(source_data)
+        return ray_bundle, batch

PixelNeRFModel.py ADDED Viewed

@@ -0,0 +1,199 @@
+import sys
+import os
+from pathlib import Path
+pixelnerf_src_root = str(Path(__file__).parent / "pixelnerf" / "src")
+if pixelnerf_src_root not in sys.path:
+    sys.path.insert(0, pixelnerf_src_root)
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Type, Tuple, cast
+from nerfstudio.cameras.cameras import Cameras
+from nerfstudio.cameras.rays import RayBundle
+from nerfstudio.data.scene_box import SceneBox
+from nerfstudio.engine.callbacks import TrainingCallback, TrainingCallbackAttributes
+from nerfstudio.models.base_model import Model, ModelConfig
+from torch.nn import Parameter
+from pixelnerf.src.model.models import PixelNeRFNet
+from pixelnerf.src.render import NeRFRenderer
+from dotmap import DotMap
+import os
+import torch
+import nerfstudio.utils.profiler as profiler
+@dataclass
+class PixelNeRFModelConfig(ModelConfig):
+    _target: Type = field(default_factory=lambda: PixelNeRFModel, init=False)
+    ckpt_path: Optional[str] = None
+    """Path to a .pth checkpoint file to load the network weights from. If not provided, will look for .pth files in the output directory and load the latest one."""
+    no_reload: bool = False
+    """If True, will not attempt to load from any checkpoint and will always train from scratch."""
+    out_dir: str = "outputs"
+    """Subdirectory of the output directory to save checkpoints and logs for this model. If not provided, will use 'default_exp'."""
+    exp_name: str = "default_exp"
+    """Name of the experiment, used as a subdirectory under out_dir to save checkpoints and logs. If not provided, will use 'default_exp'."""
+    encoder: Dict[str, Any] = field(
+        default_factory=lambda: {
+            "backbone": "resnet34",
+            "pretrained": True,
+            "num_layers": 4,
+        },
+        metadata={
+            "help": "Configuration for the pixelNeRF encoder. Currently using the paper default configuration"
+        },
+    )
+    renderer: Dict[str, Any] = field(
+        default_factory=lambda: {
+            "n_coarse": 64,
+            "n_fine": 32,
+            "n_fine_depth": 64,
+            "depth_std": 0.01,
+            "white_bkgd": False,
+        },
+        metadata={
+            "help": "Configuration for the pixelNeRF renderer. Currently using the paper default configuration"
+        },
+    )
+    lindisp: bool = False
+    """Whether to sample linearly in disparity (inverse depth) rather than depth. Paper defines it troughout dataset preprocessing, so we keep it as a config option but set it to False by default since it's not commonly used in nerf implementations."""
+class PixelNeRFModel(Model):
+    config: PixelNeRFModelConfig
+    def __init__(self, config, scene_box=None, num_train_data=0, **kwargs):
+        if scene_box is None:
+            scene_box = SceneBox(
+                aabb=torch.tensor([[-1, -1, -1], [1, 1, 1]], dtype=torch.float32)
+            )
+        super().__init__(
+            config=config, scene_box=scene_box, num_train_data=num_train_data, **kwargs
+        )
+    def populate_modules(self):
+        super().populate_modules()
+        self.net = PixelNeRFNet(self.config)
+        self.renderer = NeRFRenderer.from_conf(
+            self.config.renderer,
+            lindisp=self.config.lindisp,
+        )
+        if torch.cuda.is_available():
+            print(f"Using {torch.cuda.device_count()} GPUs for parallelization")
+            self.renderer = self.renderer.bind_parallel(
+                self.net, gpus=list(range(torch.cuda.device_count()))
+            ).eval()
+        if self.config.no_reload:
+            print("Not loading from ckpt, training from scratch...")
+        else:
+            self.load_from_ckpt(self.config.out_dir, force_latest=False)
+    def get_param_groups(self) -> Dict[str, List[Parameter]]:
+        return {"network": list(self.net.parameters())}
+    def get_training_callbacks(
+        self, training_callback_attributes: TrainingCallbackAttributes
+    ) -> List[TrainingCallback]:
+        return []
+    def get_loss_dict(
+        self, outputs, batch, metrics_dict=None
+    ) -> Dict[str, torch.Tensor]:
+        """The paper calcs loss"""
+        loss = torch.nn.functional.mse_loss(outputs["rgb_coarse"], batch["rgb"])
+        if "rgb_fine" in outputs:
+            loss = loss + torch.nn.functional.mse_loss(
+                outputs["rgb_fine"], batch["rgb"]
+            )
+        return {"rgb_loss": loss}
+    def get_metrics(
+        self, outputs, batch
+    ) -> Dict[str, torch.Tensor]:
+        """The paper only reports PSNR, but you can add more metrics here if you want."""
+        pred = outputs.get("rgb_fine", outputs["rgb_coarse"])
+        gt = batch["rgb"].to(pred.device)
+        psnr = -10.0 * torch.log10(torch.mean((pred - gt) ** 2).clamp_min(1e-10))
+        return {"psnr": psnr}
+    def get_image_metrics_and_images(
+        self, outputs, batch
+    ) -> Tuple[Dict[str, float], Dict[str, torch.Tensor]]:
+        pred = outputs.get("rgb_fine", outputs["rgb_coarse"])
+        gt = batch["rgb"].to(pred.device)
+        psnr = -10.0 * torch.log10(torch.mean((pred - gt) ** 2).clamp_min(1e-10))
+        return {"psnr": float(psnr.item())}, {"rgb": pred, "rgb_gt": gt}
+    def load_from_ckpt(self, out_folder, force_latest=False):
+        if not os.path.exists(out_folder):
+            print("No ckpts found, training from scratch...")
+            return 0
+        ckpts = sorted(
+            [
+                os.path.join(out_folder, f)
+                for f in os.listdir(out_folder)
+                if f.endswith(".pth")
+            ]
+        )
+        if self.config.ckpt_path and not force_latest:
+            if os.path.isfile(self.config.ckpt_path):
+                ckpts = [self.config.ckpt_path]
+        if ckpts and not self.config.no_reload:
+            fpath = ckpts[-1]
+            self.net.load_state_dict(torch.load(fpath, map_location="cpu"))
+            print(f"Reloading from {fpath}")
+            return int(fpath[-10:-4])
+        print("No ckpts found, training from scratch...")
+        return 0
+    @profiler.time_function
+    def get_outputs(self, ray_bundle: RayBundle | Cameras) -> Dict[str, torch.Tensor | List]:
+        assert isinstance(ray_bundle, RayBundle)
+        device = next(self.net.parameters()).device
+        metadata = ray_bundle.metadata or {}
+        for key in ("src_rgbs", "src_cameras", "focal", "c"):
+            if key not in metadata:
+                raise KeyError(f"Missing metadata key '{key}' — pipeline must inject source views")
+        src_images = metadata["src_rgbs"].squeeze(0).permute(0, 3, 1, 2).to(device)  # (NS, 3, H, W)
+        src_poses  = metadata["src_cameras"].squeeze(0).to(device)                    # (NS, 4, 4)
+        focal      = metadata["focal"].to(device)                                     # (NS, 2)
+        c          = metadata["c"].to(device)                                         # (NS, 2)
+        self.net.encode(
+            src_images.unsqueeze(0),
+            src_poses.unsqueeze(0),
+            focal,
+            c=c,
+        )
+        rays = torch.cat([
+            ray_bundle.origins.to(device),
+            ray_bundle.directions.to(device),
+            ray_bundle.nears.to(device),
+            ray_bundle.fars.to(device),
+        ], dim=-1).unsqueeze(0)
+        render_dict = DotMap(self.renderer(rays, want_weights=True))
+        outputs: Dict[str, torch.Tensor | List] = {
+            "rgb_coarse":     render_dict.coarse.rgb.squeeze(0),
+            "depth_coarse":   render_dict.coarse.depth.squeeze(0),
+            "weights_coarse": render_dict.coarse.weights.squeeze(0),
+        }
+        if len(render_dict.fine) > 0:
+            outputs["rgb_fine"]     = render_dict.fine.rgb.squeeze(0)
+            outputs["depth_fine"]   = render_dict.fine.depth.squeeze(0)
+            outputs["weights_fine"] = render_dict.fine.weights.squeeze(0)
+        outputs["rgb"] = outputs.get("rgb_fine", outputs["rgb_coarse"])
+        outputs["accumulation"] = outputs.get("weights_fine", outputs["weights_coarse"]).sum(dim=-1)
+        outputs["depth"] = outputs.get("depth_fine", outputs["depth_coarse"])
+        return cast(Dict[str, torch.Tensor | List], outputs)

nerfstudio_pixelnerf-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,10 @@
+Metadata-Version: 2.4
+Name: nerfstudio-pixelnerf
+Version: 0.0.1
+Summary: Unofficial Implementation of `pixelNeRF: Neural Radiance Fields from One or Few Images` [Yu et al.] for NeRFStudio
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+Requires-Dist: dotmap>=1.3.30
+Requires-Dist: lpips>=0.1.4
+Requires-Dist: nerfstudio
+Requires-Dist: pyhocon>=0.3.63

nerfstudio_pixelnerf-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+PixelNeRFConfig.py,sha256=_kGrr4nNlV8xaMN8s7xoJW7pbevM8laYaGZWU_BblRU,1320
+PixelNeRFDataManager.py,sha256=9GA6_mQ9TcfR-CLITTQKvm-f77CgfgG9dWusxttW7U4,3300
+PixelNeRFModel.py,sha256=oEyq9RJuiHdtTJ-CqKttdy6dR9rcmH7J8g2UMTNUYr4,8184
+nerfstudio_pixelnerf-0.0.1.dist-info/METADATA,sha256=Ihw9SP-o0AmoHR-Ekma2cz_R2DSB6jPH6E0Q19AKFSQ,367
+nerfstudio_pixelnerf-0.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+nerfstudio_pixelnerf-0.0.1.dist-info/entry_points.txt,sha256=DUIH2xvV26nSkjyi9DswfxnhEcc9njfxCsqo-qUBcmk,66
+nerfstudio_pixelnerf-0.0.1.dist-info/top_level.txt,sha256=UZJrWmW6hLBUIi4B78LvwwrgHuNZ0AJRAnTysU9RpcA,70
+nerfstudio_pixelnerf-0.0.1.dist-info/RECORD,,

nerfstudio_pixelnerf-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

nerfstudio_pixelnerf-0.0.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [nerfstudio.method_configs]
2	+ pixelnerf = PixelNeRFConfig:PixelNeRF

nerfstudio_pixelnerf-0.0.1.dist-info/top_level.txt ADDED Viewed

@@ -0,0 +1,4 @@
+PixelNeRFConfig
+PixelNeRFDataManager
+PixelNeRFModel
+PixelNeRFPipeline