PyPI - pms-nvidia-processor - Versions diffs - 0.2.0__tar.gz → 0.3.0__tar.gz - Mend

pms-nvidia-processor 0.2.0tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

{pms_nvidia_processor-0.2.0 → pms_nvidia_processor-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pms-nvidia-processor
-Version: 0.2.0
+Version: 0.3.0
 Summary: lib for pms nvidia processors
 Author: HyeongSeok Kim
 Author-email: tiryul@gmail.com

{pms_nvidia_processor-0.2.0 → pms_nvidia_processor-0.3.0}/pms_nvidia_processor/__init__.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from ._dpir_processor import DPIRProcessor
 from ._dru_rbpn_sr_f3_processor import DRURBPNSRF3Processor
 from ._dru_rbpn_sr_f5_processor import DRURBPNSRF5Processor
+from ._dru_asm_sr_f3_processor import DRUASMSRF3Processor
-__version__ = "0.2.0"
+__version__ = "0.3.0"

{pms_nvidia_processor-0.2.0 → pms_nvidia_processor-0.3.0}/pms_nvidia_processor/_const.py RENAMED Viewed

@@ -144,3 +144,29 @@ class DRURBPNSRF5Config:
         number_of_input_channels=NUMBER_OF_INPUT_CHANNELS,
         number_of_output_channels=NUMBER_OF_OUTPUT_CHANNELS,
     )
+class DRUASMSRF3Config:
+    NUMBER_OF_FRAMES = 3
+    NUMBER_OF_INPUT_CHANNELS: int = 3 * NUMBER_OF_FRAMES
+    NUMBER_OF_OUTPUT_CHANNELS: int = 3
+    UPSCALE_RATIO: int = 2
+    PATCH_SIZE = 512
+    MAX_BATCH_SIZE = 8
+    MIN_BATCH_SIZE = 1
+    OPT_BATCH_SIZE = MAX_BATCH_SIZE // 2
+    INPUT_OVERLAB_LENGTH = 16
+    PATCHER_CONFIG = PatcherIOConfig(
+        patch_size=PATCH_SIZE,
+        upscale_ratio=UPSCALE_RATIO,
+        number_of_input_channels=NUMBER_OF_INPUT_CHANNELS,
+        number_of_output_channels=NUMBER_OF_OUTPUT_CHANNELS,
+        input_overlab_length=INPUT_OVERLAB_LENGTH,
+    )
+    TRT_CONFIG = TRTIOConfig(
+        patch_size=PATCH_SIZE,
+        upscale_ratio=UPSCALE_RATIO,
+        number_of_input_channels=NUMBER_OF_INPUT_CHANNELS + 1,
+        number_of_output_channels=NUMBER_OF_OUTPUT_CHANNELS,
+    )

{pms_nvidia_processor-0.2.0 → pms_nvidia_processor-0.3.0}/pms_nvidia_processor/_dependency.py RENAMED Viewed

@@ -6,5 +6,6 @@ import uvloop
 import asyncio
 import numpy as np
 from loguru import logger
+import cv2
 import pms_tensorrt as TRT
 from pms_inference_engine import IEngineProcessor, EngineIOData, register

pms_nvidia_processor-0.3.0/pms_nvidia_processor/_dru_asm_sr_f3_processor.py ADDED Viewed

@@ -0,0 +1,212 @@
+import subprocess
+from ._dependency import *
+from ._const import *
+from . import patcher
+def _usm_sharp(img: np.ndarray, radius=15) -> np.ndarray:
+    if radius % 2 == 0:
+        radius += 1
+    blur = cv2.GaussianBlur(img, ksize=(radius, radius), sigmaX=0)
+    residual = img - blur
+    return residual
+def _pre_processing(
+    batch_input_images: List[np.ndarray],
+    input_buffer: np.ndarray,
+) -> None:
+    b = len(batch_input_images)
+    usm_edge_images = [
+        cv2.cvtColor(_usm_sharp(img=img[:, :, 3:6]), cv2.COLOR_BGR2GRAY) / 255.0
+        for img in batch_input_images
+    ]
+    for batch_idx in range(b):
+        image = batch_input_images[batch_idx]
+        h, w, c = image.shape
+        for channel_idx in range(c):
+            np.divide(
+                image[:, :, channel_idx],
+                255,
+                out=input_buffer[batch_idx, channel_idx, :h, :w],
+            )
+        # input_buffer[batch_idx, -1, :h, :w] = cv2.cvtColor(
+        #     _usm_sharp(img=input_buffer[batch_idx, 3:6, :h, :w]), cv2.COLOR_BGR2GRAY
+        # )
+        input_buffer[batch_idx, -1, :h, :w] = usm_edge_images[batch_idx][:h, :w]
+def _post_processing(
+    output_buffer: np.ndarray,  # BxCxHxW
+    output_image: np.ndarray,  # BxHxWxC
+) -> None:
+    b, h, w, c = output_image.shape
+    pred = np.clip(np.multiply(output_buffer[:b, :, :h, :w], 255), 0, 255).astype(
+        np.uint8
+    )
+    for i in range(3):
+        np.copyto(src=pred[:, i, :, :], dst=output_image[:, :, :, i])
+@register
+class DRUASMSRF3Processor(IEngineProcessor[EngineIOData, EngineIOData]):
+    def __init__(
+        self,
+        concurrency: int,
+        index: int,
+        model_path: str,
+        device_name: str = "cuda",
+    ):
+        # set member var
+        self.index = index
+        self.model_path = model_path
+        self.device_name = device_name
+        self._concurrency = concurrency
+        # set loop policy
+        asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+        # set device_id
+        device_count = TRT.get_device_count()
+        device_id = index % device_count
+        self.device_id = device_id
+        # start init
+        logger.info(f"{self.__class__}[{index}]>> Init Start")
+        # super
+        super().__init__(
+            concurrency=concurrency,
+            index=index,
+        )
+        # end init
+        logger.info(f"{self.__class__}[{index}]>> Init END")
+        self.config = DRUASMSRF3Config
+    async def inference(self, batch_input_data: List[np.ndarray]) -> List[np.ndarray]:
+        session = self.session
+        patch_size = self.config.PATCH_SIZE
+        batch = len(batch_input_data)
+        batch_output_data: np.ndarray = np.zeros(
+            (
+                batch,
+                self.config.PATCHER_CONFIG.patch_size * self.config.UPSCALE_RATIO,
+                self.config.PATCHER_CONFIG.patch_size * self.config.UPSCALE_RATIO,
+                self.config.NUMBER_OF_OUTPUT_CHANNELS,
+            ),
+            np.uint8,
+        )
+        _pre_processing(
+            batch_input_images=batch_input_data,
+            input_buffer=self.input_buffer,
+        )
+        session.run()
+        _post_processing(
+            output_buffer=self.output_buffer,
+            output_image=batch_output_data,
+        )
+        return [output_data for output_data in batch_output_data]  # unpack
+    async def _run(self, input_data: EngineIOData) -> EngineIOData:
+        max_batch_size = self.io_shapes["input"][0][0]
+        # 여기서 patching
+        input_image: np.ndarray = input_data.frame  # type: ignore
+        patcher_config = self.config.PATCHER_CONFIG
+        padded_input_image = patcher.pad_vector(
+            vector=input_image,
+            overlap_length=patcher_config.input_overlab_length,
+        )
+        output_image: np.ndarray = np.zeros(
+            (
+                input_image.shape[0] * self.config.UPSCALE_RATIO,
+                input_image.shape[1] * self.config.UPSCALE_RATIO,
+                self.config.NUMBER_OF_OUTPUT_CHANNELS,
+            ),
+            np.uint8,
+        )
+        # slice
+        input_patches = self.patcher.slice(input_vector=padded_input_image)
+        # batch inference
+        output_patches = []
+        for batch_items in TRT.batch(input_patches, max_batch_size):
+            ops = await self.inference(batch_input_data=batch_items)
+            output_patches += ops
+        self.patcher.merge(output_vector=output_image, patches=output_patches)
+        return EngineIOData(frame_id=input_data.frame_id, frame=output_image)
+    def _ready_processor(self) -> bool:
+        return True
+    def _bind_io(self, input_data: EngineIOData):
+        model_path = self.model_path
+        device_id = self.device_id
+        patcher_config = self.config.PATCHER_CONFIG
+        trt_config = self.config.TRT_CONFIG
+        input_image: np.ndarray = input_data.frame  # type: ignore
+        padded_input_image = patcher.pad_vector(
+            input_image,
+            overlap_length=patcher_config.input_overlab_length,
+        )
+        output_image: np.ndarray = np.zeros(
+            (
+                input_image.shape[0] * self.config.UPSCALE_RATIO,
+                input_image.shape[1] * self.config.UPSCALE_RATIO,
+                self.config.NUMBER_OF_OUTPUT_CHANNELS,
+            )
+        )
+        self.patcher = patcher.Patcher(
+            **patcher_config.build_patcher_params(
+                input_vector=padded_input_image,
+                output_vector=output_image,
+            )
+        )
+        n_patches = len(self.patcher.slice(input_vector=padded_input_image))
+        # set io shape
+        self.batch_size = min(n_patches, self.config.MAX_BATCH_SIZE)
+        self.io_shapes = {
+            "input": (
+                [self.batch_size, *trt_config.input_shape],
+                np.float32,
+            ),
+            "output": (
+                [self.batch_size, *trt_config.output_shape],
+                np.float32,
+            ),
+        }
+        # init trt engine
+        self.session = TRT.TRTSession(
+            model_path=model_path,
+            device_id=device_id,
+            io_shapes=self.io_shapes,
+        )
+        # warm up
+        self.session.run()
+        # set io buffer
+        self.input_buffer = self.session._input_bindings[0].host_buffer.reshape(
+            self.io_shapes["input"][0]
+        )
+        self.input_buffer.fill(1.0 / 255.0)
+        self.output_buffer = self.session._output_bindings[0].host_buffer.reshape(
+            *self.io_shapes["output"][0]
+        )
+        return True
+    def _get_live(self) -> bool:
+        return True
+    def _get_concurrency(self) -> int:
+        return self._concurrency

{pms_nvidia_processor-0.2.0 → pms_nvidia_processor-0.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pms-nvidia-processor"
-version = "0.2.0"
+version = "0.3.0"
 description = "lib for pms nvidia processors"
 authors = ["HyeongSeok Kim <tiryul@gmail.com>"]
 readme = "README.md"