pms-nvidia-processor 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.1
2
+ Name: pms-nvidia-processor
3
+ Version: 0.1.1
4
+ Summary: lib for pms nvidia processors
5
+ Author: HyeongSeok Kim
6
+ Author-email: tiryul@gmail.com
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Dist: loguru (>=0.7.2,<0.8.0)
13
+ Requires-Dist: numpy (>=1.26.4,<2.0.0)
14
+ Requires-Dist: pms-inference-engine (==1.0)
15
+ Requires-Dist: pms-tensorrt (==1.1)
16
+ Description-Content-Type: text/markdown
17
+
18
+ # pms-nvidia-processor
19
+ repos for pms-nvidia-processor
20
+
@@ -0,0 +1,2 @@
1
+ # pms-nvidia-processor
2
+ repos for pms-nvidia-processor
@@ -0,0 +1,3 @@
1
+ from ._dpir_processor import DPIRProcessor
2
+
3
+ __version__ = "0.1.1"
@@ -0,0 +1,33 @@
1
+ from ._dependency import *
2
+
3
+
4
+ class DPIRConfig:
5
+ NUMBER_OF_INPUT_CHANNELS: int = 4
6
+ NUMBER_OF_OUTPUT_CHANNELS: int = 3
7
+ UPSCALE_RATIO: int = 1
8
+ PATCH_SIZE = 256
9
+ MAX_BATCH_SIZE = 8
10
+ OPT_BATCH_SIZE = MAX_BATCH_SIZE // 2
11
+ MIN_BATCH_SIZE = 1
12
+ INPUT_OVERLAB_LENGTH = 5
13
+ OUTPUT_OVERLAB_LENGTH = INPUT_OVERLAB_LENGTH
14
+
15
+ INPUT_INIT_VECTOR_SHAPE = [NUMBER_OF_INPUT_CHANNELS, -1, -1]
16
+
17
+ TRT_INPUT_PATCH_SHAPE = [NUMBER_OF_INPUT_CHANNELS, PATCH_SIZE, PATCH_SIZE]
18
+
19
+ TRT_OUTPUT_PATCH_SHAPE = [
20
+ NUMBER_OF_OUTPUT_CHANNELS,
21
+ PATCH_SIZE * UPSCALE_RATIO,
22
+ PATCH_SIZE * UPSCALE_RATIO,
23
+ ]
24
+
25
+ PATCHER_OUTPUT_OVERLAB_LENGTH = INPUT_OVERLAB_LENGTH * UPSCALE_RATIO
26
+
27
+ PATCHER_INPUT_PATCH_SHAPE = (PATCH_SIZE, PATCH_SIZE, NUMBER_OF_OUTPUT_CHANNELS)
28
+
29
+ PATCHER_OUTPUT_PATCH_SHAPE = (
30
+ (PATCH_SIZE - PATCHER_OUTPUT_OVERLAB_LENGTH * 2) * UPSCALE_RATIO,
31
+ (PATCH_SIZE - PATCHER_OUTPUT_OVERLAB_LENGTH * 2) * UPSCALE_RATIO,
32
+ NUMBER_OF_OUTPUT_CHANNELS,
33
+ )
@@ -0,0 +1,10 @@
1
+ from typing import List, Optional, Tuple, Iterable, Literal, Dict
2
+ from dataclasses import dataclass
3
+ import os
4
+ import time
5
+ import uvloop
6
+ import asyncio
7
+ import numpy as np
8
+ from loguru import logger
9
+ import pms_tensorrt as TRT
10
+ from pms_inference_engine import IEngineProcessor, EngineIOData, register
@@ -0,0 +1,177 @@
1
+ from ._dependency import *
2
+ from ._const import *
3
+ from . import patcher
4
+
5
+
6
+ def _pre_processing(
7
+ batch_input_images: List[np.ndarray],
8
+ input_buffer: np.ndarray,
9
+ ) -> None:
10
+ b = len(batch_input_images)
11
+ for batch_idx in range(b):
12
+ image = batch_input_images[batch_idx]
13
+ h, w, c = image.shape
14
+ for channel_idx in range(c):
15
+ np.divide(
16
+ image[:, :, channel_idx],
17
+ 255,
18
+ out=input_buffer[batch_idx, channel_idx, :h, :w],
19
+ )
20
+
21
+
22
+ def _post_processing(
23
+ output_buffer: np.ndarray, # BxCxHxW
24
+ output_image: np.ndarray, # BxHxWxC
25
+ ) -> None:
26
+ b, h, w, c = output_image.shape
27
+ denoise_pred = np.clip(
28
+ np.multiply(output_buffer[:b, :, :h, :w], 255), 0, 255
29
+ ).astype(np.uint8)
30
+ for i in range(3):
31
+ np.copyto(src=denoise_pred[:, i, :, :], dst=output_image[:, :, :, i])
32
+
33
+
34
+ @register
35
+ class DPIRProcessor(IEngineProcessor[EngineIOData, EngineIOData]):
36
+ def __init__(
37
+ self,
38
+ concurrency: int,
39
+ index: int,
40
+ model_path: str,
41
+ device_name: str = "cuda",
42
+ ):
43
+
44
+ # set member var
45
+ self.index = index
46
+ self.model_path = model_path
47
+ self.device_name = device_name
48
+ self._concurrency = concurrency
49
+
50
+ # set loop policy
51
+ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
52
+
53
+ # set device_id
54
+ device_count = TRT.get_device_count()
55
+ device_id = index % device_count
56
+ self.device_id = device_id
57
+
58
+ # start init
59
+ logger.info(f"{self.__class__}[{index}]>> Init Start")
60
+
61
+ # super
62
+ super().__init__(
63
+ concurrency=concurrency,
64
+ index=index,
65
+ )
66
+
67
+ # end init
68
+ logger.info(f"{self.__class__}[{index}]>> Init END")
69
+
70
+ async def inference(self, batch_input_data: List[np.ndarray]) -> List[np.ndarray]:
71
+ session = self.session
72
+ patch_size = DPIRConfig.PATCH_SIZE
73
+ batch = len(batch_input_data)
74
+ batch_output_data: np.ndarray = np.zeros(
75
+ (batch, patch_size, patch_size, 3), np.uint8
76
+ )
77
+ TACT = {}
78
+ TACT["_pre_processing"] = time.time()
79
+ _pre_processing(
80
+ batch_input_images=batch_input_data,
81
+ input_buffer=self.input_buffer,
82
+ )
83
+ session.run()
84
+ _post_processing(
85
+ output_buffer=self.output_buffer,
86
+ output_image=batch_output_data,
87
+ )
88
+ # for key, value in TACT.items():
89
+ # logger.debug(f"TACT[{key}] : {-value*1000:.3f} ms")
90
+ return [output_data for output_data in batch_output_data] # unpack
91
+
92
+ async def _run(self, input_data: EngineIOData) -> EngineIOData:
93
+ max_batch_size = self.io_shapes["input"][0][0]
94
+ # 여기서 patching
95
+ input_image: np.ndarray = input_data.frame # type: ignore
96
+ padded_input_image = patcher.pad_vector(
97
+ input_image, overlap_length=DPIRConfig.INPUT_OVERLAB_LENGTH
98
+ )
99
+ output_image: np.ndarray = np.zeros_like(input_image)
100
+
101
+ # slice
102
+ input_patches = self.patcher.slice(input_vector=padded_input_image)
103
+
104
+ # batch inference
105
+ output_patches = []
106
+ for batch_items in TRT.batch(input_patches, max_batch_size):
107
+ ops = await self.inference(batch_input_data=batch_items)
108
+ output_patches += ops
109
+
110
+ self.patcher.merge(output_vector=output_image, patches=output_patches)
111
+ return EngineIOData(frame_id=input_data.frame_id, frame=output_image)
112
+
113
+ def _ready_processor(self) -> bool:
114
+ return True
115
+
116
+ def _bind_io(self, input_data: EngineIOData):
117
+ model_path = self.model_path
118
+ device_id = self.device_id
119
+
120
+ # set patcher
121
+
122
+ input_image: np.ndarray = input_data.frame # type: ignore
123
+ padded_input_image = patcher.pad_vector(
124
+ input_image, overlap_length=DPIRConfig.INPUT_OVERLAB_LENGTH
125
+ )
126
+ output_image: np.ndarray = np.zeros_like(input_image)
127
+ self.input_vector_shape = padded_input_image.shape
128
+ self.output_vector_shape = output_image.shape
129
+ self.patcher = patcher.Patcher(
130
+ input_vector_shape=self.input_vector_shape, # type: ignore
131
+ input_patch_shape=DPIRConfig.PATCHER_INPUT_PATCH_SHAPE,
132
+ input_overlap_length=DPIRConfig.INPUT_OVERLAB_LENGTH,
133
+ output_vector_shape=self.output_vector_shape, # type: ignore
134
+ output_patch_shape=DPIRConfig.PATCHER_OUTPUT_PATCH_SHAPE,
135
+ output_overlap_length=DPIRConfig.OUTPUT_OVERLAB_LENGTH,
136
+ )
137
+ n_patches = len(self.patcher.slice(input_vector=padded_input_image))
138
+
139
+ # set io shape
140
+ self.batch_size = min(n_patches, DPIRConfig.MAX_BATCH_SIZE)
141
+ self.io_shapes = {
142
+ "input": (
143
+ [DPIRConfig.MAX_BATCH_SIZE, *DPIRConfig.TRT_INPUT_PATCH_SHAPE],
144
+ np.float32,
145
+ ),
146
+ "output": (
147
+ [DPIRConfig.MAX_BATCH_SIZE, *DPIRConfig.TRT_OUTPUT_PATCH_SHAPE],
148
+ np.float32,
149
+ ),
150
+ }
151
+
152
+ # init trt engine
153
+ self.session = TRT.TRTSession(
154
+ model_path=model_path,
155
+ device_id=device_id,
156
+ io_shapes=self.io_shapes,
157
+ )
158
+
159
+ # warm up
160
+ self.session.run()
161
+
162
+ # set io buffer
163
+ self.input_buffer = self.session._input_bindings[0].host_buffer.reshape(
164
+ self.io_shapes["input"][0]
165
+ )
166
+ self.input_buffer.fill(1.0 / 255.0)
167
+ self.output_buffer = self.session._output_bindings[0].host_buffer.reshape(
168
+ *self.io_shapes["output"][0]
169
+ )
170
+
171
+ return True
172
+
173
+ def _get_live(self) -> bool:
174
+ return True
175
+
176
+ def _get_concurrency(self) -> int:
177
+ return self._concurrency
@@ -0,0 +1 @@
1
+ from ._patcher import Patcher, pad_vector
@@ -0,0 +1,103 @@
1
+ from pms_nvidia_processor._const import *
2
+ from pms_nvidia_processor.patcher._patch_position import (
3
+ PatchPositionXY,
4
+ PatchPosition,
5
+ )
6
+
7
+
8
+ class PatchPosXYCollection:
9
+ def __init__(self, patch_pos_list: List[List[PatchPositionXY]]):
10
+ self.patch_pos_list = patch_pos_list
11
+
12
+ def __iter__(self):
13
+ for poses in self.__patch_pos_list:
14
+ for pos in poses:
15
+ yield pos
16
+
17
+ def __len__(self):
18
+ return self.__size
19
+
20
+ def __getitem__(self, idx):
21
+ y = idx // self.__cols
22
+ x = idx % self.__cols
23
+ return self.__patch_pos_list[y][x]
24
+
25
+ def get_patch(
26
+ self,
27
+ vector: np.ndarray,
28
+ ) -> List[np.ndarray]:
29
+ return [vector[pos.y.range, pos.x.range] for pos in self]
30
+
31
+ def set_patch(
32
+ self,
33
+ vector: np.ndarray,
34
+ patches: List[np.ndarray],
35
+ overlab_length: int,
36
+ ):
37
+ for pos, patch in zip(self, patches, strict=True): # inplace copy
38
+ h, w, c = patch.shape
39
+ vector[pos.y.range, pos.x.range] = patch[
40
+ overlab_length : overlab_length + pos.y.dp,
41
+ overlab_length : overlab_length + pos.x.dp,
42
+ ]
43
+
44
+ @property
45
+ def patch_pos_list(self) -> List[List[PatchPositionXY]]:
46
+ return self.__patch_pos_list
47
+
48
+ @patch_pos_list.setter
49
+ def patch_pos_list(self, patch_pos_list: List[List[PatchPositionXY]]):
50
+ self.__rows = len(patch_pos_list)
51
+ self.__cols = len(patch_pos_list[0])
52
+ assert all([len(c) == self.__cols for c in patch_pos_list])
53
+ self.__size = self.__rows * self.__cols
54
+ self.__patch_pos_list = patch_pos_list
55
+
56
+ @property
57
+ def rows(self):
58
+ return self.__rows
59
+
60
+ @property
61
+ def cols(self):
62
+ return self.__cols
63
+
64
+ @property
65
+ def size(self):
66
+ return self.__size
67
+
68
+ @property
69
+ def shape(self):
70
+ return (self.rows, self.cols)
71
+
72
+ @staticmethod
73
+ def create(
74
+ vector_shape: Tuple[int, int, int],
75
+ patch_shape: Tuple[int, int, int],
76
+ overlap_length: int,
77
+ ):
78
+ vector_height, vector_width, vector_c = vector_shape
79
+ shape_height, shape_width, shape_c = patch_shape
80
+ overlap_length = overlap_length
81
+ pos_y = 0
82
+ pos_x = 0
83
+ patch_rows = 0
84
+ patch_cols = 0
85
+ pos_list: List[List[PatchPositionXY]] = []
86
+ # loop for y
87
+ while pos_y < vector_height - overlap_length * 2:
88
+ pos_x = 0
89
+ p_list_for_cols: List[PatchPositionXY] = []
90
+ # loop for x
91
+ while pos_x < vector_width - overlap_length * 2:
92
+ p_list_for_cols.append(
93
+ PatchPositionXY(
94
+ PatchPosition(pos_x, vector_width, shape_width),
95
+ PatchPosition(pos_y, vector_height, shape_height),
96
+ )
97
+ )
98
+ pos_x = pos_x + shape_width - (overlap_length * 2)
99
+ patch_cols += 1
100
+ pos_list.append(p_list_for_cols)
101
+ pos_y = pos_y + shape_height - (overlap_length * 2)
102
+ patch_rows += 1
103
+ return PatchPosXYCollection(patch_pos_list=pos_list)
@@ -0,0 +1,42 @@
1
+ from pms_nvidia_processor._const import *
2
+
3
+
4
+ @dataclass
5
+ class PatchPosition:
6
+ target_pos: int
7
+ target_length: int
8
+ patch_length: int
9
+
10
+ def __iter__(self):
11
+ yield self.p1
12
+ yield self.p2
13
+
14
+ @property
15
+ def p1(self) -> int:
16
+ assert (
17
+ self.target_pos < self.target_length
18
+ ), f"ERROR, assert self.target_pos < self.target_length"
19
+ return self.target_pos
20
+
21
+ @property
22
+ def p2(self) -> int:
23
+ pos = self.target_pos + self.patch_length
24
+ pos = pos if pos < self.target_length else self.target_length
25
+ assert pos != self.p1, f"ERROR, pos != self.p1"
26
+ return pos
27
+
28
+ @property
29
+ def dp(self) -> int:
30
+ dp = self.p2 - self.p1
31
+ assert dp > 0, f"ERROR, p1 and p2 are same. p1: {self.p1}, p2: {self.p2}"
32
+ return dp
33
+
34
+ @property
35
+ def range(self) -> slice:
36
+ return slice(self.p1, self.p2)
37
+
38
+
39
+ @dataclass
40
+ class PatchPositionXY:
41
+ x: PatchPosition
42
+ y: PatchPosition
@@ -0,0 +1,81 @@
1
+ from pms_nvidia_processor._const import *
2
+ from pms_nvidia_processor.patcher._patch_collection import PatchPosXYCollection
3
+
4
+
5
+ def pad_vector(
6
+ vector: np.ndarray,
7
+ overlap_length: int,
8
+ mode: Literal[
9
+ "edge",
10
+ "mean",
11
+ "median",
12
+ "reflect",
13
+ "symmetric",
14
+ ] = "edge",
15
+ ) -> np.ndarray:
16
+ # create padding image
17
+ padded_vector = np.pad(
18
+ vector,
19
+ pad_width=(
20
+ (overlap_length, overlap_length),
21
+ (overlap_length, overlap_length),
22
+ (0, 0),
23
+ ),
24
+ mode=mode,
25
+ )
26
+ return padded_vector
27
+
28
+
29
+ class Patcher:
30
+
31
+ def __init__(
32
+ self,
33
+ input_vector_shape: Tuple[int, int, int],
34
+ input_patch_shape: Tuple[int, int, int],
35
+ input_overlap_length: int,
36
+ output_vector_shape: Tuple[int, int, int],
37
+ output_patch_shape: Tuple[int, int, int],
38
+ output_overlap_length: int,
39
+ ) -> None:
40
+ assert input_overlap_length > -1, "assert input_overlap_length > -1"
41
+ assert output_overlap_length > -1, "assert output_overlap_length > -1"
42
+ assert all(
43
+ [e > 0 for e in input_patch_shape]
44
+ ), "assert all([e > 0 for e in input_patch_shape])"
45
+ assert all(
46
+ [e > 0 for e in output_patch_shape]
47
+ ), "assert all([e > 0 for e in output_patch_shape])"
48
+ assert (
49
+ len(input_patch_shape) == 3
50
+ ), "assert len(input_patch_shape) == 3" # only allow image-like vector
51
+ assert (
52
+ len(output_patch_shape) == 3
53
+ ), "assert len(output_patch_shape) == 3" # only allow image-like vector
54
+
55
+ input_pos_collection = PatchPosXYCollection.create(
56
+ vector_shape=input_vector_shape,
57
+ patch_shape=input_patch_shape,
58
+ overlap_length=input_overlap_length,
59
+ )
60
+ output_pos_collection = PatchPosXYCollection.create(
61
+ vector_shape=output_vector_shape,
62
+ patch_shape=output_patch_shape,
63
+ overlap_length=0,
64
+ )
65
+ assert (
66
+ input_pos_collection.shape == output_pos_collection.shape
67
+ ), f"assert input_pos_collection.shape == output_pos_collection.shape | {input_pos_collection.shape} != {output_pos_collection.shape}"
68
+ self._input_pos_collection = input_pos_collection
69
+ self._output_pos_collection = output_pos_collection
70
+ self._input_overlap_length = input_overlap_length
71
+ self._output_overlap_length = output_overlap_length
72
+
73
+ def slice(self, input_vector: np.ndarray): # -> List[ndarray[Any, Any]]:
74
+ return self._input_pos_collection.get_patch(input_vector)
75
+
76
+ def merge(self, output_vector: np.ndarray, patches: List[np.ndarray]):
77
+ self._output_pos_collection.set_patch(
78
+ vector=output_vector,
79
+ patches=patches,
80
+ overlab_length=self._output_overlap_length,
81
+ )
@@ -0,0 +1,33 @@
1
+ [tool.poetry]
2
+ name = "pms-nvidia-processor"
3
+ version = "0.1.1"
4
+ description = "lib for pms nvidia processors"
5
+ authors = ["HyeongSeok Kim <tiryul@gmail.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.10"
10
+ loguru = "^0.7.2"
11
+ numpy = "^1.26.4"
12
+ pms-inference-engine = "1.0"
13
+ pms-tensorrt = "1.1"
14
+
15
+
16
+ [tool.poetry.group.dev.dependencies]
17
+ pytest = "^8.1.1"
18
+ pytest-order = "^1.2.0"
19
+ pycuda = "^2024.1"
20
+ nvidia-cudnn-cu12 = "8.9.7.29"
21
+ tensorrt = {version = "8.6.1.post1", source = "nvidia"}
22
+ tensorrt-libs = {version = "8.6.1", source = "nvidia"}
23
+ tensorrt-bindings = {version = "8.6.1", source = "nvidia"}
24
+
25
+
26
+ [[tool.poetry.source]]
27
+ name = "nvidia"
28
+ url = "https://pypi.nvidia.com/"
29
+ priority = "supplemental"
30
+
31
+ [build-system]
32
+ requires = ["poetry-core"]
33
+ build-backend = "poetry.core.masonry.api"