PyPI - pms-nvidia-processor - Versions diffs - 0.1.1__tar.gz - Mend

pms-nvidia-processor 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

pms_nvidia_processor-0.1.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,20 @@
+Metadata-Version: 2.1
+Name: pms-nvidia-processor
+Version: 0.1.1
+Summary: lib for pms nvidia processors
+Author: HyeongSeok Kim
+Author-email: tiryul@gmail.com
+Requires-Python: >=3.10,<4.0
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: loguru (>=0.7.2,<0.8.0)
+Requires-Dist: numpy (>=1.26.4,<2.0.0)
+Requires-Dist: pms-inference-engine (==1.0)
+Requires-Dist: pms-tensorrt (==1.1)
+Description-Content-Type: text/markdown
+# pms-nvidia-processor
+repos for pms-nvidia-processor

pms_nvidia_processor-0.1.1/README.md ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # pms-nvidia-processor
2	+ repos for pms-nvidia-processor

pms_nvidia_processor-0.1.1/pms_nvidia_processor/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from ._dpir_processor import DPIRProcessor
+__version__ = "0.1.1"

pms_nvidia_processor-0.1.1/pms_nvidia_processor/_const.py ADDED Viewed

@@ -0,0 +1,33 @@
+from ._dependency import *
+class DPIRConfig:
+    NUMBER_OF_INPUT_CHANNELS: int = 4
+    NUMBER_OF_OUTPUT_CHANNELS: int = 3
+    UPSCALE_RATIO: int = 1
+    PATCH_SIZE = 256
+    MAX_BATCH_SIZE = 8
+    OPT_BATCH_SIZE = MAX_BATCH_SIZE // 2
+    MIN_BATCH_SIZE = 1
+    INPUT_OVERLAB_LENGTH = 5
+    OUTPUT_OVERLAB_LENGTH = INPUT_OVERLAB_LENGTH
+    INPUT_INIT_VECTOR_SHAPE = [NUMBER_OF_INPUT_CHANNELS, -1, -1]
+    TRT_INPUT_PATCH_SHAPE = [NUMBER_OF_INPUT_CHANNELS, PATCH_SIZE, PATCH_SIZE]
+    TRT_OUTPUT_PATCH_SHAPE = [
+        NUMBER_OF_OUTPUT_CHANNELS,
+        PATCH_SIZE * UPSCALE_RATIO,
+        PATCH_SIZE * UPSCALE_RATIO,
+    ]
+    PATCHER_OUTPUT_OVERLAB_LENGTH = INPUT_OVERLAB_LENGTH * UPSCALE_RATIO
+    PATCHER_INPUT_PATCH_SHAPE = (PATCH_SIZE, PATCH_SIZE, NUMBER_OF_OUTPUT_CHANNELS)
+    PATCHER_OUTPUT_PATCH_SHAPE = (
+        (PATCH_SIZE - PATCHER_OUTPUT_OVERLAB_LENGTH * 2) * UPSCALE_RATIO,
+        (PATCH_SIZE - PATCHER_OUTPUT_OVERLAB_LENGTH * 2) * UPSCALE_RATIO,
+        NUMBER_OF_OUTPUT_CHANNELS,
+    )

pms_nvidia_processor-0.1.1/pms_nvidia_processor/_dependency.py ADDED Viewed

@@ -0,0 +1,10 @@
+from typing import List, Optional, Tuple, Iterable, Literal, Dict
+from dataclasses import dataclass
+import os
+import time
+import uvloop
+import asyncio
+import numpy as np
+from loguru import logger
+import pms_tensorrt as TRT
+from pms_inference_engine import IEngineProcessor, EngineIOData, register

pms_nvidia_processor-0.1.1/pms_nvidia_processor/_dpir_processor.py ADDED Viewed

@@ -0,0 +1,177 @@
+from ._dependency import *
+from ._const import *
+from . import patcher
+def _pre_processing(
+    batch_input_images: List[np.ndarray],
+    input_buffer: np.ndarray,
+) -> None:
+    b = len(batch_input_images)
+    for batch_idx in range(b):
+        image = batch_input_images[batch_idx]
+        h, w, c = image.shape
+        for channel_idx in range(c):
+            np.divide(
+                image[:, :, channel_idx],
+                255,
+                out=input_buffer[batch_idx, channel_idx, :h, :w],
+            )
+def _post_processing(
+    output_buffer: np.ndarray,  # BxCxHxW
+    output_image: np.ndarray,  # BxHxWxC
+) -> None:
+    b, h, w, c = output_image.shape
+    denoise_pred = np.clip(
+        np.multiply(output_buffer[:b, :, :h, :w], 255), 0, 255
+    ).astype(np.uint8)
+    for i in range(3):
+        np.copyto(src=denoise_pred[:, i, :, :], dst=output_image[:, :, :, i])
+@register
+class DPIRProcessor(IEngineProcessor[EngineIOData, EngineIOData]):
+    def __init__(
+        self,
+        concurrency: int,
+        index: int,
+        model_path: str,
+        device_name: str = "cuda",
+    ):
+        # set member var
+        self.index = index
+        self.model_path = model_path
+        self.device_name = device_name
+        self._concurrency = concurrency
+        # set loop policy
+        asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+        # set device_id
+        device_count = TRT.get_device_count()
+        device_id = index % device_count
+        self.device_id = device_id
+        # start init
+        logger.info(f"{self.__class__}[{index}]>> Init Start")
+        # super
+        super().__init__(
+            concurrency=concurrency,
+            index=index,
+        )
+        # end init
+        logger.info(f"{self.__class__}[{index}]>> Init END")
+    async def inference(self, batch_input_data: List[np.ndarray]) -> List[np.ndarray]:
+        session = self.session
+        patch_size = DPIRConfig.PATCH_SIZE
+        batch = len(batch_input_data)
+        batch_output_data: np.ndarray = np.zeros(
+            (batch, patch_size, patch_size, 3), np.uint8
+        )
+        TACT = {}
+        TACT["_pre_processing"] = time.time()
+        _pre_processing(
+            batch_input_images=batch_input_data,
+            input_buffer=self.input_buffer,
+        )
+        session.run()
+        _post_processing(
+            output_buffer=self.output_buffer,
+            output_image=batch_output_data,
+        )
+        # for key, value in TACT.items():
+        #     logger.debug(f"TACT[{key}] : {-value*1000:.3f} ms")
+        return [output_data for output_data in batch_output_data]  # unpack
+    async def _run(self, input_data: EngineIOData) -> EngineIOData:
+        max_batch_size = self.io_shapes["input"][0][0]
+        # 여기서 patching
+        input_image: np.ndarray = input_data.frame  # type: ignore
+        padded_input_image = patcher.pad_vector(
+            input_image, overlap_length=DPIRConfig.INPUT_OVERLAB_LENGTH
+        )
+        output_image: np.ndarray = np.zeros_like(input_image)
+        # slice
+        input_patches = self.patcher.slice(input_vector=padded_input_image)
+        # batch inference
+        output_patches = []
+        for batch_items in TRT.batch(input_patches, max_batch_size):
+            ops = await self.inference(batch_input_data=batch_items)
+            output_patches += ops
+        self.patcher.merge(output_vector=output_image, patches=output_patches)
+        return EngineIOData(frame_id=input_data.frame_id, frame=output_image)
+    def _ready_processor(self) -> bool:
+        return True
+    def _bind_io(self, input_data: EngineIOData):
+        model_path = self.model_path
+        device_id = self.device_id
+        # set patcher
+        input_image: np.ndarray = input_data.frame  # type: ignore
+        padded_input_image = patcher.pad_vector(
+            input_image, overlap_length=DPIRConfig.INPUT_OVERLAB_LENGTH
+        )
+        output_image: np.ndarray = np.zeros_like(input_image)
+        self.input_vector_shape = padded_input_image.shape
+        self.output_vector_shape = output_image.shape
+        self.patcher = patcher.Patcher(
+            input_vector_shape=self.input_vector_shape,  # type: ignore
+            input_patch_shape=DPIRConfig.PATCHER_INPUT_PATCH_SHAPE,
+            input_overlap_length=DPIRConfig.INPUT_OVERLAB_LENGTH,
+            output_vector_shape=self.output_vector_shape,  # type: ignore
+            output_patch_shape=DPIRConfig.PATCHER_OUTPUT_PATCH_SHAPE,
+            output_overlap_length=DPIRConfig.OUTPUT_OVERLAB_LENGTH,
+        )
+        n_patches = len(self.patcher.slice(input_vector=padded_input_image))
+        # set io shape
+        self.batch_size = min(n_patches, DPIRConfig.MAX_BATCH_SIZE)
+        self.io_shapes = {
+            "input": (
+                [DPIRConfig.MAX_BATCH_SIZE, *DPIRConfig.TRT_INPUT_PATCH_SHAPE],
+                np.float32,
+            ),
+            "output": (
+                [DPIRConfig.MAX_BATCH_SIZE, *DPIRConfig.TRT_OUTPUT_PATCH_SHAPE],
+                np.float32,
+            ),
+        }
+        # init trt engine
+        self.session = TRT.TRTSession(
+            model_path=model_path,
+            device_id=device_id,
+            io_shapes=self.io_shapes,
+        )
+        # warm up
+        self.session.run()
+        # set io buffer
+        self.input_buffer = self.session._input_bindings[0].host_buffer.reshape(
+            self.io_shapes["input"][0]
+        )
+        self.input_buffer.fill(1.0 / 255.0)
+        self.output_buffer = self.session._output_bindings[0].host_buffer.reshape(
+            *self.io_shapes["output"][0]
+        )
+        return True
+    def _get_live(self) -> bool:
+        return True
+    def _get_concurrency(self) -> int:
+        return self._concurrency

pms_nvidia_processor-0.1.1/pms_nvidia_processor/patcher/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from ._patcher import Patcher, pad_vector

pms_nvidia_processor-0.1.1/pms_nvidia_processor/patcher/_patch_collection.py ADDED Viewed

@@ -0,0 +1,103 @@
+from pms_nvidia_processor._const import *
+from pms_nvidia_processor.patcher._patch_position import (
+    PatchPositionXY,
+    PatchPosition,
+)
+class PatchPosXYCollection:
+    def __init__(self, patch_pos_list: List[List[PatchPositionXY]]):
+        self.patch_pos_list = patch_pos_list
+    def __iter__(self):
+        for poses in self.__patch_pos_list:
+            for pos in poses:
+                yield pos
+    def __len__(self):
+        return self.__size
+    def __getitem__(self, idx):
+        y = idx // self.__cols
+        x = idx % self.__cols
+        return self.__patch_pos_list[y][x]
+    def get_patch(
+        self,
+        vector: np.ndarray,
+    ) -> List[np.ndarray]:
+        return [vector[pos.y.range, pos.x.range] for pos in self]
+    def set_patch(
+        self,
+        vector: np.ndarray,
+        patches: List[np.ndarray],
+        overlab_length: int,
+    ):
+        for pos, patch in zip(self, patches, strict=True):  # inplace copy
+            h, w, c = patch.shape
+            vector[pos.y.range, pos.x.range] = patch[
+                overlab_length : overlab_length + pos.y.dp,
+                overlab_length : overlab_length + pos.x.dp,
+            ]
+    @property
+    def patch_pos_list(self) -> List[List[PatchPositionXY]]:
+        return self.__patch_pos_list
+    @patch_pos_list.setter
+    def patch_pos_list(self, patch_pos_list: List[List[PatchPositionXY]]):
+        self.__rows = len(patch_pos_list)
+        self.__cols = len(patch_pos_list[0])
+        assert all([len(c) == self.__cols for c in patch_pos_list])
+        self.__size = self.__rows * self.__cols
+        self.__patch_pos_list = patch_pos_list
+    @property
+    def rows(self):
+        return self.__rows
+    @property
+    def cols(self):
+        return self.__cols
+    @property
+    def size(self):
+        return self.__size
+    @property
+    def shape(self):
+        return (self.rows, self.cols)
+    @staticmethod
+    def create(
+        vector_shape: Tuple[int, int, int],
+        patch_shape: Tuple[int, int, int],
+        overlap_length: int,
+    ):
+        vector_height, vector_width, vector_c = vector_shape
+        shape_height, shape_width, shape_c = patch_shape
+        overlap_length = overlap_length
+        pos_y = 0
+        pos_x = 0
+        patch_rows = 0
+        patch_cols = 0
+        pos_list: List[List[PatchPositionXY]] = []
+        # loop for y
+        while pos_y < vector_height - overlap_length * 2:
+            pos_x = 0
+            p_list_for_cols: List[PatchPositionXY] = []
+            # loop for x
+            while pos_x < vector_width - overlap_length * 2:
+                p_list_for_cols.append(
+                    PatchPositionXY(
+                        PatchPosition(pos_x, vector_width, shape_width),
+                        PatchPosition(pos_y, vector_height, shape_height),
+                    )
+                )
+                pos_x = pos_x + shape_width - (overlap_length * 2)
+                patch_cols += 1
+            pos_list.append(p_list_for_cols)
+            pos_y = pos_y + shape_height - (overlap_length * 2)
+            patch_rows += 1
+        return PatchPosXYCollection(patch_pos_list=pos_list)

pms_nvidia_processor-0.1.1/pms_nvidia_processor/patcher/_patch_position.py ADDED Viewed

@@ -0,0 +1,42 @@
+from pms_nvidia_processor._const import *
+@dataclass
+class PatchPosition:
+    target_pos: int
+    target_length: int
+    patch_length: int
+    def __iter__(self):
+        yield self.p1
+        yield self.p2
+    @property
+    def p1(self) -> int:
+        assert (
+            self.target_pos < self.target_length
+        ), f"ERROR, assert self.target_pos < self.target_length"
+        return self.target_pos
+    @property
+    def p2(self) -> int:
+        pos = self.target_pos + self.patch_length
+        pos = pos if pos < self.target_length else self.target_length
+        assert pos != self.p1, f"ERROR, pos != self.p1"
+        return pos
+    @property
+    def dp(self) -> int:
+        dp = self.p2 - self.p1
+        assert dp > 0, f"ERROR, p1 and p2 are same. p1: {self.p1}, p2: {self.p2}"
+        return dp
+    @property
+    def range(self) -> slice:
+        return slice(self.p1, self.p2)
+@dataclass
+class PatchPositionXY:
+    x: PatchPosition
+    y: PatchPosition

pms_nvidia_processor-0.1.1/pms_nvidia_processor/patcher/_patcher.py ADDED Viewed

@@ -0,0 +1,81 @@
+from pms_nvidia_processor._const import *
+from pms_nvidia_processor.patcher._patch_collection import PatchPosXYCollection
+def pad_vector(
+    vector: np.ndarray,
+    overlap_length: int,
+    mode: Literal[
+        "edge",
+        "mean",
+        "median",
+        "reflect",
+        "symmetric",
+    ] = "edge",
+) -> np.ndarray:
+    # create padding image
+    padded_vector = np.pad(
+        vector,
+        pad_width=(
+            (overlap_length, overlap_length),
+            (overlap_length, overlap_length),
+            (0, 0),
+        ),
+        mode=mode,
+    )
+    return padded_vector
+class Patcher:
+    def __init__(
+        self,
+        input_vector_shape: Tuple[int, int, int],
+        input_patch_shape: Tuple[int, int, int],
+        input_overlap_length: int,
+        output_vector_shape: Tuple[int, int, int],
+        output_patch_shape: Tuple[int, int, int],
+        output_overlap_length: int,
+    ) -> None:
+        assert input_overlap_length > -1, "assert input_overlap_length > -1"
+        assert output_overlap_length > -1, "assert output_overlap_length > -1"
+        assert all(
+            [e > 0 for e in input_patch_shape]
+        ), "assert all([e > 0 for e in input_patch_shape])"
+        assert all(
+            [e > 0 for e in output_patch_shape]
+        ), "assert all([e > 0 for e in output_patch_shape])"
+        assert (
+            len(input_patch_shape) == 3
+        ), "assert len(input_patch_shape) == 3"  # only allow image-like vector
+        assert (
+            len(output_patch_shape) == 3
+        ), "assert len(output_patch_shape) == 3"  # only allow image-like vector
+        input_pos_collection = PatchPosXYCollection.create(
+            vector_shape=input_vector_shape,
+            patch_shape=input_patch_shape,
+            overlap_length=input_overlap_length,
+        )
+        output_pos_collection = PatchPosXYCollection.create(
+            vector_shape=output_vector_shape,
+            patch_shape=output_patch_shape,
+            overlap_length=0,
+        )
+        assert (
+            input_pos_collection.shape == output_pos_collection.shape
+        ), f"assert input_pos_collection.shape == output_pos_collection.shape | {input_pos_collection.shape} != {output_pos_collection.shape}"
+        self._input_pos_collection = input_pos_collection
+        self._output_pos_collection = output_pos_collection
+        self._input_overlap_length = input_overlap_length
+        self._output_overlap_length = output_overlap_length
+    def slice(self, input_vector: np.ndarray):  # -> List[ndarray[Any, Any]]:
+        return self._input_pos_collection.get_patch(input_vector)
+    def merge(self, output_vector: np.ndarray, patches: List[np.ndarray]):
+        self._output_pos_collection.set_patch(
+            vector=output_vector,
+            patches=patches,
+            overlab_length=self._output_overlap_length,
+        )

pms_nvidia_processor-0.1.1/pyproject.toml ADDED Viewed

@@ -0,0 +1,33 @@
+[tool.poetry]
+name = "pms-nvidia-processor"
+version = "0.1.1"
+description = "lib for pms nvidia processors"
+authors = ["HyeongSeok Kim <tiryul@gmail.com>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.10"
+loguru = "^0.7.2"
+numpy = "^1.26.4"
+pms-inference-engine = "1.0"
+pms-tensorrt = "1.1"
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.1.1"
+pytest-order = "^1.2.0"
+pycuda = "^2024.1"
+nvidia-cudnn-cu12 = "8.9.7.29"
+tensorrt = {version = "8.6.1.post1", source = "nvidia"}
+tensorrt-libs = {version = "8.6.1", source = "nvidia"}
+tensorrt-bindings = {version = "8.6.1", source = "nvidia"}
+[[tool.poetry.source]]
+name = "nvidia"
+url = "https://pypi.nvidia.com/"
+priority = "supplemental"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"