rapidata 2.4.0__py3-none-any.whl → 2.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rapidata/rapidata_client/assets/_media_asset.py +118 -17
- {rapidata-2.4.0.dist-info → rapidata-2.4.1.dist-info}/METADATA +1 -1
- {rapidata-2.4.0.dist-info → rapidata-2.4.1.dist-info}/RECORD +5 -5
- {rapidata-2.4.0.dist-info → rapidata-2.4.1.dist-info}/LICENSE +0 -0
- {rapidata-2.4.0.dist-info → rapidata-2.4.1.dist-info}/WHEEL +0 -0
|
@@ -12,6 +12,8 @@ from PIL import Image
|
|
|
12
12
|
from tinytag import TinyTag
|
|
13
13
|
import tempfile
|
|
14
14
|
from pydantic import StrictStr, StrictBytes
|
|
15
|
+
from typing import Optional
|
|
16
|
+
import logging
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class MediaAsset(BaseAsset):
|
|
@@ -26,6 +28,7 @@ class MediaAsset(BaseAsset):
|
|
|
26
28
|
Raises:
|
|
27
29
|
FileNotFoundError: If the provided file path does not exist.
|
|
28
30
|
"""
|
|
31
|
+
_logger = logging.getLogger(__name__ + '.MediaAsset')
|
|
29
32
|
|
|
30
33
|
ALLOWED_TYPES = [
|
|
31
34
|
'image/',
|
|
@@ -33,6 +36,28 @@ class MediaAsset(BaseAsset):
|
|
|
33
36
|
'video/mp4', # MP4
|
|
34
37
|
]
|
|
35
38
|
|
|
39
|
+
MIME_TYPES = {
|
|
40
|
+
'jpg': 'image/jpeg',
|
|
41
|
+
'jpeg': 'image/jpeg',
|
|
42
|
+
'png': 'image/png',
|
|
43
|
+
'gif': 'image/gif',
|
|
44
|
+
'webp': 'image/webp',
|
|
45
|
+
'mp3': 'audio/mp3',
|
|
46
|
+
'mp4': 'video/mp4'
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
FILE_SIGNATURES = {
|
|
50
|
+
b'\xFF\xD8\xFF': 'image/jpeg',
|
|
51
|
+
b'\x89PNG\r\n\x1a\n': 'image/png',
|
|
52
|
+
b'GIF87a': 'image/gif',
|
|
53
|
+
b'GIF89a': 'image/gif',
|
|
54
|
+
b'RIFF': 'image/webp',
|
|
55
|
+
b'ID3': 'audio/mp3',
|
|
56
|
+
b'\xFF\xFB': 'audio/mp3',
|
|
57
|
+
b'\xFF\xF3': 'audio/mp3',
|
|
58
|
+
b'ftyp': 'video/mp4',
|
|
59
|
+
}
|
|
60
|
+
|
|
36
61
|
def __init__(self, path: str):
|
|
37
62
|
"""
|
|
38
63
|
Initialize a MediaAsset instance.
|
|
@@ -134,35 +159,111 @@ class MediaAsset(BaseAsset):
|
|
|
134
159
|
name = name + '.jpg'
|
|
135
160
|
return name
|
|
136
161
|
|
|
162
|
+
def __get_media_type_from_extension(self, url: str) -> Optional[str]:
|
|
163
|
+
"""
|
|
164
|
+
Determine media type from URL file extension.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
url: The URL to check
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
Optional[str]: MIME type if valid extension found, None otherwise
|
|
171
|
+
"""
|
|
172
|
+
try:
|
|
173
|
+
ext = url.lower().split('?')[0].split('.')[-1]
|
|
174
|
+
return self.MIME_TYPES.get(ext)
|
|
175
|
+
except IndexError:
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
def __validate_image_content(self, content: bytes) -> bool:
|
|
179
|
+
"""
|
|
180
|
+
Validate image content using PIL.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
content: Image bytes to validate
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
bool: True if valid image, False otherwise
|
|
187
|
+
"""
|
|
188
|
+
try:
|
|
189
|
+
img = Image.open(BytesIO(content))
|
|
190
|
+
img.verify()
|
|
191
|
+
return True
|
|
192
|
+
except Exception as e:
|
|
193
|
+
self._logger.debug(f"Image validation failed: {str(e)}")
|
|
194
|
+
return False
|
|
195
|
+
|
|
196
|
+
def __get_media_type_from_signature(self, content: bytes) -> Optional[str]:
|
|
197
|
+
"""
|
|
198
|
+
Determine media type from file signature.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
content: File content bytes
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Optional[str]: MIME type if valid signature found, None otherwise
|
|
205
|
+
"""
|
|
206
|
+
file_start = content[:32]
|
|
207
|
+
for signature, mime_type in self.FILE_SIGNATURES.items():
|
|
208
|
+
if file_start.startswith(signature) or (signature in file_start[:10]):
|
|
209
|
+
return mime_type
|
|
210
|
+
return None
|
|
211
|
+
|
|
137
212
|
def __get_media_bytes(self, url: str) -> bytes:
|
|
138
213
|
"""
|
|
139
|
-
Downloads media files from URL
|
|
214
|
+
Downloads and validates media files from URL.
|
|
140
215
|
|
|
141
216
|
Args:
|
|
142
217
|
url: URL of the media file
|
|
143
|
-
|
|
218
|
+
|
|
144
219
|
Returns:
|
|
145
|
-
bytes:
|
|
220
|
+
bytes: Validated media content
|
|
146
221
|
|
|
147
222
|
Raises:
|
|
148
|
-
ValueError: If media type is unsupported or
|
|
223
|
+
ValueError: If media type is unsupported or content validation fails
|
|
149
224
|
requests.exceptions.RequestException: If download fails
|
|
150
225
|
"""
|
|
151
|
-
|
|
152
|
-
|
|
226
|
+
try:
|
|
227
|
+
response = requests.get(url, stream=False)
|
|
228
|
+
response.raise_for_status()
|
|
229
|
+
except requests.exceptions.RequestException as e:
|
|
230
|
+
self._logger.error(f"Failed to download media from {url}: {str(e)}")
|
|
231
|
+
raise
|
|
153
232
|
|
|
233
|
+
content = response.content
|
|
154
234
|
content_type = response.headers.get('content-type', '').lower()
|
|
155
|
-
|
|
156
|
-
#
|
|
157
|
-
if
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
235
|
+
|
|
236
|
+
# Case 1: Content-type is already allowed
|
|
237
|
+
if any(content_type.startswith(t) for t in self.ALLOWED_TYPES):
|
|
238
|
+
self._logger.debug(f"Content-type {content_type} is allowed")
|
|
239
|
+
return content
|
|
240
|
+
|
|
241
|
+
# Case 2: Try to validate based on extension
|
|
242
|
+
mime_type = self.__get_media_type_from_extension(url)
|
|
243
|
+
if mime_type and mime_type.startswith(tuple(self.ALLOWED_TYPES)):
|
|
244
|
+
self._logger.debug(f"Found valid mime type from extension: {mime_type}")
|
|
245
|
+
return content
|
|
246
|
+
|
|
247
|
+
# Case 3: Try to validate based on file signature
|
|
248
|
+
mime_type = self.__get_media_type_from_signature(content)
|
|
249
|
+
if mime_type and mime_type.startswith(tuple(self.ALLOWED_TYPES)):
|
|
250
|
+
self._logger.debug(f"Found valid mime type from signature: {mime_type}")
|
|
251
|
+
return content
|
|
252
|
+
|
|
253
|
+
# Case 4: Last resort - try direct image validation
|
|
254
|
+
if self.__validate_image_content(content):
|
|
255
|
+
self._logger.debug("Content validated as image through direct validation")
|
|
256
|
+
return content
|
|
257
|
+
|
|
258
|
+
# If we get here, validation failed
|
|
259
|
+
error_msg = (
|
|
260
|
+
f'Could not validate media type from content.\n'
|
|
261
|
+
f'Content-Type: {content_type}\n'
|
|
262
|
+
f'URL extension: {url.split("?")[0].split(".")[-1]}\n'
|
|
263
|
+
f'Allowed types: {self.ALLOWED_TYPES}'
|
|
264
|
+
)
|
|
265
|
+
self._logger.error(error_msg)
|
|
266
|
+
raise ValueError(error_msg)
|
|
166
267
|
|
|
167
268
|
def to_file(self) -> StrictStr | tuple[StrictStr, StrictBytes] | StrictBytes: # types for autogenerated models
|
|
168
269
|
if isinstance(self.path, str):
|
|
@@ -349,7 +349,7 @@ rapidata/api_client_README.md,sha256=TwdMWXKwwnzD5CyLjXOqAwKVyMxTJcKewudONv31p40
|
|
|
349
349
|
rapidata/rapidata_client/__init__.py,sha256=kkT6FMU4P8rTdYKTJgZNcyptr-Rq1iZmsyLIyRCwpYA,896
|
|
350
350
|
rapidata/rapidata_client/assets/__init__.py,sha256=hKgrOSn8gJcBSULaf4auYhH1S1N5AfcwIhBSq1BOKwQ,323
|
|
351
351
|
rapidata/rapidata_client/assets/_base_asset.py,sha256=B2YWH1NgaeYUYHDW3OPpHM_bqawHbH4EjnRCE2BYwiM,298
|
|
352
|
-
rapidata/rapidata_client/assets/_media_asset.py,sha256=
|
|
352
|
+
rapidata/rapidata_client/assets/_media_asset.py,sha256=s7_bfhvYEFLIvpZvECsjtVSLnvZmi19xLJoOsDShzgk,9391
|
|
353
353
|
rapidata/rapidata_client/assets/_multi_asset.py,sha256=o4_-OvovADSVpl0tr6fPzRR_gHCcIQKfeZKcoFDFBLo,1667
|
|
354
354
|
rapidata/rapidata_client/assets/_text_asset.py,sha256=itoe3vicn41LbdJ7UeydImORUo9iDL0SZu-ptOlbMRM,618
|
|
355
355
|
rapidata/rapidata_client/assets/data_type_enum.py,sha256=ELC-ymeKnQlfNAzfqsI7MmUuRiGYamCHVcTc0qR6Fm4,185
|
|
@@ -427,7 +427,7 @@ rapidata/service/credential_manager.py,sha256=Of0BQs_V1T7rkrWX9groLX790nOknaARwn
|
|
|
427
427
|
rapidata/service/local_file_service.py,sha256=pgorvlWcx52Uh3cEG6VrdMK_t__7dacQ_5AnfY14BW8,877
|
|
428
428
|
rapidata/service/openapi_service.py,sha256=Z4NrAuilLlIWBdGOv6otz36tHS_vvU36w5jmvOUTmqo,3198
|
|
429
429
|
rapidata/service/token_manager.py,sha256=JZ5YbR5Di8dO3H4kK11d0kzWlrXxjgCmeNkHA4AapCM,6425
|
|
430
|
-
rapidata-2.4.
|
|
431
|
-
rapidata-2.4.
|
|
432
|
-
rapidata-2.4.
|
|
433
|
-
rapidata-2.4.
|
|
430
|
+
rapidata-2.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
431
|
+
rapidata-2.4.1.dist-info/METADATA,sha256=nOQUJTI2cPvoUj7WS7N1Y2d5L_ZW1Tx81tb8SawafqQ,1107
|
|
432
|
+
rapidata-2.4.1.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
433
|
+
rapidata-2.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|