pms-nvidia-processor 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pms-nvidia-processor
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: lib for pms nvidia processors
5
5
  Author: HyeongSeok Kim
6
6
  Author-email: tiryul@gmail.com
@@ -1,5 +1,6 @@
1
1
  from ._dpir_processor import DPIRProcessor
2
2
  from ._dru_rbpn_sr_f3_processor import DRURBPNSRF3Processor
3
3
  from ._dru_rbpn_sr_f5_processor import DRURBPNSRF5Processor
4
+ from ._dru_asm_sr_f3_processor import DRUASMSRF3Processor
4
5
 
5
- __version__ = "0.2.0"
6
+ __version__ = "0.3.0"
@@ -144,3 +144,29 @@ class DRURBPNSRF5Config:
144
144
  number_of_input_channels=NUMBER_OF_INPUT_CHANNELS,
145
145
  number_of_output_channels=NUMBER_OF_OUTPUT_CHANNELS,
146
146
  )
147
+
148
+
149
+ class DRUASMSRF3Config:
150
+ NUMBER_OF_FRAMES = 3
151
+ NUMBER_OF_INPUT_CHANNELS: int = 3 * NUMBER_OF_FRAMES
152
+ NUMBER_OF_OUTPUT_CHANNELS: int = 3
153
+ UPSCALE_RATIO: int = 2
154
+ PATCH_SIZE = 512
155
+ MAX_BATCH_SIZE = 8
156
+ MIN_BATCH_SIZE = 1
157
+ OPT_BATCH_SIZE = MAX_BATCH_SIZE // 2
158
+ INPUT_OVERLAB_LENGTH = 16
159
+
160
+ PATCHER_CONFIG = PatcherIOConfig(
161
+ patch_size=PATCH_SIZE,
162
+ upscale_ratio=UPSCALE_RATIO,
163
+ number_of_input_channels=NUMBER_OF_INPUT_CHANNELS,
164
+ number_of_output_channels=NUMBER_OF_OUTPUT_CHANNELS,
165
+ input_overlab_length=INPUT_OVERLAB_LENGTH,
166
+ )
167
+ TRT_CONFIG = TRTIOConfig(
168
+ patch_size=PATCH_SIZE,
169
+ upscale_ratio=UPSCALE_RATIO,
170
+ number_of_input_channels=NUMBER_OF_INPUT_CHANNELS + 1,
171
+ number_of_output_channels=NUMBER_OF_OUTPUT_CHANNELS,
172
+ )
@@ -6,5 +6,6 @@ import uvloop
6
6
  import asyncio
7
7
  import numpy as np
8
8
  from loguru import logger
9
+ import cv2
9
10
  import pms_tensorrt as TRT
10
11
  from pms_inference_engine import IEngineProcessor, EngineIOData, register
@@ -0,0 +1,212 @@
1
+ import subprocess
2
+ from ._dependency import *
3
+ from ._const import *
4
+ from . import patcher
5
+
6
+
7
+ def _usm_sharp(img: np.ndarray, radius=15) -> np.ndarray:
8
+ if radius % 2 == 0:
9
+ radius += 1
10
+ blur = cv2.GaussianBlur(img, ksize=(radius, radius), sigmaX=0)
11
+ residual = img - blur
12
+ return residual
13
+
14
+
15
+ def _pre_processing(
16
+ batch_input_images: List[np.ndarray],
17
+ input_buffer: np.ndarray,
18
+ ) -> None:
19
+ b = len(batch_input_images)
20
+ usm_edge_images = [
21
+ cv2.cvtColor(_usm_sharp(img=img[:, :, 3:6]), cv2.COLOR_BGR2GRAY) / 255.0
22
+ for img in batch_input_images
23
+ ]
24
+
25
+ for batch_idx in range(b):
26
+ image = batch_input_images[batch_idx]
27
+ h, w, c = image.shape
28
+ for channel_idx in range(c):
29
+ np.divide(
30
+ image[:, :, channel_idx],
31
+ 255,
32
+ out=input_buffer[batch_idx, channel_idx, :h, :w],
33
+ )
34
+ # input_buffer[batch_idx, -1, :h, :w] = cv2.cvtColor(
35
+ # _usm_sharp(img=input_buffer[batch_idx, 3:6, :h, :w]), cv2.COLOR_BGR2GRAY
36
+ # )
37
+ input_buffer[batch_idx, -1, :h, :w] = usm_edge_images[batch_idx][:h, :w]
38
+
39
+
40
+ def _post_processing(
41
+ output_buffer: np.ndarray, # BxCxHxW
42
+ output_image: np.ndarray, # BxHxWxC
43
+ ) -> None:
44
+
45
+ b, h, w, c = output_image.shape
46
+ pred = np.clip(np.multiply(output_buffer[:b, :, :h, :w], 255), 0, 255).astype(
47
+ np.uint8
48
+ )
49
+ for i in range(3):
50
+ np.copyto(src=pred[:, i, :, :], dst=output_image[:, :, :, i])
51
+
52
+
53
+ @register
54
+ class DRUASMSRF3Processor(IEngineProcessor[EngineIOData, EngineIOData]):
55
+
56
+ def __init__(
57
+ self,
58
+ concurrency: int,
59
+ index: int,
60
+ model_path: str,
61
+ device_name: str = "cuda",
62
+ ):
63
+ # set member var
64
+ self.index = index
65
+ self.model_path = model_path
66
+ self.device_name = device_name
67
+ self._concurrency = concurrency
68
+
69
+ # set loop policy
70
+ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
71
+
72
+ # set device_id
73
+ device_count = TRT.get_device_count()
74
+ device_id = index % device_count
75
+ self.device_id = device_id
76
+ # start init
77
+ logger.info(f"{self.__class__}[{index}]>> Init Start")
78
+
79
+ # super
80
+ super().__init__(
81
+ concurrency=concurrency,
82
+ index=index,
83
+ )
84
+
85
+ # end init
86
+ logger.info(f"{self.__class__}[{index}]>> Init END")
87
+
88
+ self.config = DRUASMSRF3Config
89
+
90
+ async def inference(self, batch_input_data: List[np.ndarray]) -> List[np.ndarray]:
91
+ session = self.session
92
+ patch_size = self.config.PATCH_SIZE
93
+ batch = len(batch_input_data)
94
+
95
+ batch_output_data: np.ndarray = np.zeros(
96
+ (
97
+ batch,
98
+ self.config.PATCHER_CONFIG.patch_size * self.config.UPSCALE_RATIO,
99
+ self.config.PATCHER_CONFIG.patch_size * self.config.UPSCALE_RATIO,
100
+ self.config.NUMBER_OF_OUTPUT_CHANNELS,
101
+ ),
102
+ np.uint8,
103
+ )
104
+ _pre_processing(
105
+ batch_input_images=batch_input_data,
106
+ input_buffer=self.input_buffer,
107
+ )
108
+ session.run()
109
+ _post_processing(
110
+ output_buffer=self.output_buffer,
111
+ output_image=batch_output_data,
112
+ )
113
+ return [output_data for output_data in batch_output_data] # unpack
114
+
115
+ async def _run(self, input_data: EngineIOData) -> EngineIOData:
116
+ max_batch_size = self.io_shapes["input"][0][0]
117
+ # 여기서 patching
118
+ input_image: np.ndarray = input_data.frame # type: ignore
119
+ patcher_config = self.config.PATCHER_CONFIG
120
+ padded_input_image = patcher.pad_vector(
121
+ vector=input_image,
122
+ overlap_length=patcher_config.input_overlab_length,
123
+ )
124
+ output_image: np.ndarray = np.zeros(
125
+ (
126
+ input_image.shape[0] * self.config.UPSCALE_RATIO,
127
+ input_image.shape[1] * self.config.UPSCALE_RATIO,
128
+ self.config.NUMBER_OF_OUTPUT_CHANNELS,
129
+ ),
130
+ np.uint8,
131
+ )
132
+
133
+ # slice
134
+ input_patches = self.patcher.slice(input_vector=padded_input_image)
135
+
136
+ # batch inference
137
+ output_patches = []
138
+ for batch_items in TRT.batch(input_patches, max_batch_size):
139
+ ops = await self.inference(batch_input_data=batch_items)
140
+ output_patches += ops
141
+
142
+ self.patcher.merge(output_vector=output_image, patches=output_patches)
143
+ return EngineIOData(frame_id=input_data.frame_id, frame=output_image)
144
+
145
+ def _ready_processor(self) -> bool:
146
+ return True
147
+
148
+ def _bind_io(self, input_data: EngineIOData):
149
+ model_path = self.model_path
150
+ device_id = self.device_id
151
+ patcher_config = self.config.PATCHER_CONFIG
152
+ trt_config = self.config.TRT_CONFIG
153
+
154
+ input_image: np.ndarray = input_data.frame # type: ignore
155
+ padded_input_image = patcher.pad_vector(
156
+ input_image,
157
+ overlap_length=patcher_config.input_overlab_length,
158
+ )
159
+ output_image: np.ndarray = np.zeros(
160
+ (
161
+ input_image.shape[0] * self.config.UPSCALE_RATIO,
162
+ input_image.shape[1] * self.config.UPSCALE_RATIO,
163
+ self.config.NUMBER_OF_OUTPUT_CHANNELS,
164
+ )
165
+ )
166
+ self.patcher = patcher.Patcher(
167
+ **patcher_config.build_patcher_params(
168
+ input_vector=padded_input_image,
169
+ output_vector=output_image,
170
+ )
171
+ )
172
+ n_patches = len(self.patcher.slice(input_vector=padded_input_image))
173
+
174
+ # set io shape
175
+ self.batch_size = min(n_patches, self.config.MAX_BATCH_SIZE)
176
+ self.io_shapes = {
177
+ "input": (
178
+ [self.batch_size, *trt_config.input_shape],
179
+ np.float32,
180
+ ),
181
+ "output": (
182
+ [self.batch_size, *trt_config.output_shape],
183
+ np.float32,
184
+ ),
185
+ }
186
+
187
+ # init trt engine
188
+ self.session = TRT.TRTSession(
189
+ model_path=model_path,
190
+ device_id=device_id,
191
+ io_shapes=self.io_shapes,
192
+ )
193
+
194
+ # warm up
195
+ self.session.run()
196
+
197
+ # set io buffer
198
+ self.input_buffer = self.session._input_bindings[0].host_buffer.reshape(
199
+ self.io_shapes["input"][0]
200
+ )
201
+ self.input_buffer.fill(1.0 / 255.0)
202
+ self.output_buffer = self.session._output_bindings[0].host_buffer.reshape(
203
+ *self.io_shapes["output"][0]
204
+ )
205
+
206
+ return True
207
+
208
+ def _get_live(self) -> bool:
209
+ return True
210
+
211
+ def _get_concurrency(self) -> int:
212
+ return self._concurrency
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pms-nvidia-processor"
3
- version = "0.2.0"
3
+ version = "0.3.0"
4
4
  description = "lib for pms nvidia processors"
5
5
  authors = ["HyeongSeok Kim <tiryul@gmail.com>"]
6
6
  readme = "README.md"