pms-nvidia-processor 0.2.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pms-nvidia-processor
3
- Version: 0.2.0
3
+ Version: 0.3.1
4
4
  Summary: lib for pms nvidia processors
5
5
  Author: HyeongSeok Kim
6
6
  Author-email: tiryul@gmail.com
@@ -1,5 +1,6 @@
1
1
  from ._dpir_processor import DPIRProcessor
2
2
  from ._dru_rbpn_sr_f3_processor import DRURBPNSRF3Processor
3
3
  from ._dru_rbpn_sr_f5_processor import DRURBPNSRF5Processor
4
+ from ._dru_asm_sr_f3_processor import DRUASMSRF3Processor
4
5
 
5
- __version__ = "0.2.0"
6
+ __version__ = "0.3.1"
@@ -144,3 +144,29 @@ class DRURBPNSRF5Config:
144
144
  number_of_input_channels=NUMBER_OF_INPUT_CHANNELS,
145
145
  number_of_output_channels=NUMBER_OF_OUTPUT_CHANNELS,
146
146
  )
147
+
148
+
149
+ class DRUASMSRF3Config:
150
+ NUMBER_OF_FRAMES = 3
151
+ NUMBER_OF_INPUT_CHANNELS: int = 3 * NUMBER_OF_FRAMES
152
+ NUMBER_OF_OUTPUT_CHANNELS: int = 3
153
+ UPSCALE_RATIO: int = 2
154
+ PATCH_SIZE = 512
155
+ MAX_BATCH_SIZE = 8
156
+ MIN_BATCH_SIZE = 1
157
+ OPT_BATCH_SIZE = MAX_BATCH_SIZE // 2
158
+ INPUT_OVERLAB_LENGTH = 16
159
+
160
+ PATCHER_CONFIG = PatcherIOConfig(
161
+ patch_size=PATCH_SIZE,
162
+ upscale_ratio=UPSCALE_RATIO,
163
+ number_of_input_channels=NUMBER_OF_INPUT_CHANNELS,
164
+ number_of_output_channels=NUMBER_OF_OUTPUT_CHANNELS,
165
+ input_overlab_length=INPUT_OVERLAB_LENGTH,
166
+ )
167
+ TRT_CONFIG = TRTIOConfig(
168
+ patch_size=PATCH_SIZE,
169
+ upscale_ratio=UPSCALE_RATIO,
170
+ number_of_input_channels=NUMBER_OF_INPUT_CHANNELS + 1,
171
+ number_of_output_channels=NUMBER_OF_OUTPUT_CHANNELS,
172
+ )
@@ -6,5 +6,6 @@ import uvloop
6
6
  import asyncio
7
7
  import numpy as np
8
8
  from loguru import logger
9
+ import cv2
9
10
  import pms_tensorrt as TRT
10
11
  from pms_inference_engine import IEngineProcessor, EngineIOData, register
@@ -0,0 +1,214 @@
1
+ import subprocess
2
+ from ._dependency import *
3
+ from ._const import *
4
+ from . import patcher
5
+
6
+
7
+ def _usm_sharp(img: np.ndarray, radius=15) -> np.ndarray:
8
+ if radius % 2 == 0:
9
+ radius += 1
10
+ blur = cv2.GaussianBlur(img, ksize=(radius, radius), sigmaX=0)
11
+ residual = img - blur
12
+ return residual
13
+
14
+
15
+ def _pre_processing(
16
+ batch_input_images: List[np.ndarray],
17
+ input_buffer: np.ndarray,
18
+ ) -> None:
19
+ b = len(batch_input_images)
20
+ usm_edge_images = [
21
+ # cv2.cvtColor(_usm_sharp(img=img[:, :, 3:6]), cv2.COLOR_BGR2GRAY) / 255.0
22
+ # cv2.cvtColor(_usm_sharp(img=(img[:, :, 3:6] / 255.0)), cv2.COLOR_BGR2GRAY)
23
+ np.mean(_usm_sharp(img=(img[:, :, 3:6] / 255.0)), axis=-1, dtype=np.float32)
24
+ for img in batch_input_images
25
+ ]
26
+
27
+ for batch_idx in range(b):
28
+ image = batch_input_images[batch_idx]
29
+ h, w, c = image.shape
30
+ for channel_idx in range(c):
31
+ np.divide(
32
+ image[:, :, channel_idx],
33
+ 255,
34
+ out=input_buffer[batch_idx, channel_idx, :h, :w],
35
+ )
36
+ # input_buffer[batch_idx, -1, :h, :w] = cv2.cvtColor(
37
+ # _usm_sharp(img=input_buffer[batch_idx, 3:6, :h, :w]), cv2.COLOR_BGR2GRAY
38
+ # )
39
+ input_buffer[batch_idx, -1, :h, :w] = usm_edge_images[batch_idx][:h, :w]
40
+
41
+
42
+ def _post_processing(
43
+ output_buffer: np.ndarray, # BxCxHxW
44
+ output_image: np.ndarray, # BxHxWxC
45
+ ) -> None:
46
+
47
+ b, h, w, c = output_image.shape
48
+ pred = np.clip(np.multiply(output_buffer[:b, :, :h, :w], 255), 0, 255).astype(
49
+ np.uint8
50
+ )
51
+ for i in range(3):
52
+ np.copyto(src=pred[:, i, :, :], dst=output_image[:, :, :, i])
53
+
54
+
55
+ @register
56
+ class DRUASMSRF3Processor(IEngineProcessor[EngineIOData, EngineIOData]):
57
+
58
+ def __init__(
59
+ self,
60
+ concurrency: int,
61
+ index: int,
62
+ model_path: str,
63
+ device_name: str = "cuda",
64
+ ):
65
+ # set member var
66
+ self.index = index
67
+ self.model_path = model_path
68
+ self.device_name = device_name
69
+ self._concurrency = concurrency
70
+
71
+ # set loop policy
72
+ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
73
+
74
+ # set device_id
75
+ device_count = TRT.get_device_count()
76
+ device_id = index % device_count
77
+ self.device_id = device_id
78
+ # start init
79
+ logger.info(f"{self.__class__}[{index}]>> Init Start")
80
+
81
+ # super
82
+ super().__init__(
83
+ concurrency=concurrency,
84
+ index=index,
85
+ )
86
+
87
+ # end init
88
+ logger.info(f"{self.__class__}[{index}]>> Init END")
89
+
90
+ self.config = DRUASMSRF3Config
91
+
92
+ async def inference(self, batch_input_data: List[np.ndarray]) -> List[np.ndarray]:
93
+ session = self.session
94
+ patch_size = self.config.PATCH_SIZE
95
+ batch = len(batch_input_data)
96
+
97
+ batch_output_data: np.ndarray = np.zeros(
98
+ (
99
+ batch,
100
+ self.config.PATCHER_CONFIG.patch_size * self.config.UPSCALE_RATIO,
101
+ self.config.PATCHER_CONFIG.patch_size * self.config.UPSCALE_RATIO,
102
+ self.config.NUMBER_OF_OUTPUT_CHANNELS,
103
+ ),
104
+ np.uint8,
105
+ )
106
+ _pre_processing(
107
+ batch_input_images=batch_input_data,
108
+ input_buffer=self.input_buffer,
109
+ )
110
+ session.run()
111
+ _post_processing(
112
+ output_buffer=self.output_buffer,
113
+ output_image=batch_output_data,
114
+ )
115
+ return [output_data for output_data in batch_output_data] # unpack
116
+
117
+ async def _run(self, input_data: EngineIOData) -> EngineIOData:
118
+ max_batch_size = self.io_shapes["input"][0][0]
119
+ # 여기서 patching
120
+ input_image: np.ndarray = input_data.frame # type: ignore
121
+ patcher_config = self.config.PATCHER_CONFIG
122
+ padded_input_image = patcher.pad_vector(
123
+ vector=input_image,
124
+ overlap_length=patcher_config.input_overlab_length,
125
+ )
126
+ output_image: np.ndarray = np.zeros(
127
+ (
128
+ input_image.shape[0] * self.config.UPSCALE_RATIO,
129
+ input_image.shape[1] * self.config.UPSCALE_RATIO,
130
+ self.config.NUMBER_OF_OUTPUT_CHANNELS,
131
+ ),
132
+ np.uint8,
133
+ )
134
+
135
+ # slice
136
+ input_patches = self.patcher.slice(input_vector=padded_input_image)
137
+
138
+ # batch inference
139
+ output_patches = []
140
+ for batch_items in TRT.batch(input_patches, max_batch_size):
141
+ ops = await self.inference(batch_input_data=batch_items)
142
+ output_patches += ops
143
+
144
+ self.patcher.merge(output_vector=output_image, patches=output_patches)
145
+ return EngineIOData(frame_id=input_data.frame_id, frame=output_image)
146
+
147
+ def _ready_processor(self) -> bool:
148
+ return True
149
+
150
+ def _bind_io(self, input_data: EngineIOData):
151
+ model_path = self.model_path
152
+ device_id = self.device_id
153
+ patcher_config = self.config.PATCHER_CONFIG
154
+ trt_config = self.config.TRT_CONFIG
155
+
156
+ input_image: np.ndarray = input_data.frame # type: ignore
157
+ padded_input_image = patcher.pad_vector(
158
+ input_image,
159
+ overlap_length=patcher_config.input_overlab_length,
160
+ )
161
+ output_image: np.ndarray = np.zeros(
162
+ (
163
+ input_image.shape[0] * self.config.UPSCALE_RATIO,
164
+ input_image.shape[1] * self.config.UPSCALE_RATIO,
165
+ self.config.NUMBER_OF_OUTPUT_CHANNELS,
166
+ )
167
+ )
168
+ self.patcher = patcher.Patcher(
169
+ **patcher_config.build_patcher_params(
170
+ input_vector=padded_input_image,
171
+ output_vector=output_image,
172
+ )
173
+ )
174
+ n_patches = len(self.patcher.slice(input_vector=padded_input_image))
175
+
176
+ # set io shape
177
+ self.batch_size = min(n_patches, self.config.MAX_BATCH_SIZE)
178
+ self.io_shapes = {
179
+ "input": (
180
+ [self.batch_size, *trt_config.input_shape],
181
+ np.float32,
182
+ ),
183
+ "output": (
184
+ [self.batch_size, *trt_config.output_shape],
185
+ np.float32,
186
+ ),
187
+ }
188
+
189
+ # init trt engine
190
+ self.session = TRT.TRTSession(
191
+ model_path=model_path,
192
+ device_id=device_id,
193
+ io_shapes=self.io_shapes,
194
+ )
195
+
196
+ # warm up
197
+ self.session.run()
198
+
199
+ # set io buffer
200
+ self.input_buffer = self.session._input_bindings[0].host_buffer.reshape(
201
+ self.io_shapes["input"][0]
202
+ )
203
+ self.input_buffer.fill(1.0 / 255.0)
204
+ self.output_buffer = self.session._output_bindings[0].host_buffer.reshape(
205
+ *self.io_shapes["output"][0]
206
+ )
207
+
208
+ return True
209
+
210
+ def _get_live(self) -> bool:
211
+ return True
212
+
213
+ def _get_concurrency(self) -> int:
214
+ return self._concurrency
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pms-nvidia-processor"
3
- version = "0.2.0"
3
+ version = "0.3.1"
4
4
  description = "lib for pms nvidia processors"
5
5
  authors = ["HyeongSeok Kim <tiryul@gmail.com>"]
6
6
  readme = "README.md"