media-toolkit 0.1.1.dev1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/PKG-INFO +2 -2
  2. media_toolkit-0.1.2/media_toolkit/core/file_content_buffer.py +114 -0
  3. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/core/image_file.py +29 -6
  4. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/core/media_file.py +78 -22
  5. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/core/video/video_file.py +20 -6
  6. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/utils/file_conversion.py +5 -2
  7. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit.egg-info/PKG-INFO +2 -2
  8. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit.egg-info/SOURCES.txt +1 -0
  9. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/pyproject.toml +1 -1
  10. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/LICENSE +0 -0
  11. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/README.md +0 -0
  12. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/__init__.py +0 -0
  13. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/core/__init__.py +0 -0
  14. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/core/audio_file.py +0 -0
  15. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/core/video/__init__.py +0 -0
  16. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/core/video/video_utils.py +0 -0
  17. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/utils/__init__.py +0 -0
  18. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/utils/dependency_requirements.py +0 -0
  19. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit/utils/generator_wrapper.py +0 -0
  20. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit.egg-info/dependency_links.txt +0 -0
  21. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit.egg-info/requires.txt +0 -0
  22. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/media_toolkit.egg-info/top_level.txt +0 -0
  23. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/setup.cfg +0 -0
  24. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/test/test_image_file.py +0 -0
  25. {media_toolkit-0.1.1.dev1 → media_toolkit-0.1.2}/test/test_video_file.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: media-toolkit
3
- Version: 0.1.1.dev1
3
+ Version: 0.1.2
4
4
  Summary: Web-ready standardized file processing and serialization. Read, load and convert to standard file types with a common interface.
5
5
  Author: SocAIty
6
6
  License: GPLv3
@@ -0,0 +1,114 @@
1
+ import io
2
+ import os
3
+ import tempfile
4
+
5
+
6
+ class FileContentBuffer:
7
+ """Handles file content storage either in memory or temporary file."""
8
+
9
+ def __init__(
10
+ self,
11
+ use_temp_file: bool = False,
12
+ temp_dir: str = None
13
+ ):
14
+ """Initialize buffer with given configuration.
15
+ :param use_temp_file: If True, store content in temporary file instead of keeping it in memory.
16
+ :param temp_dir: Directory to store temporary files in. If None, system default is used.
17
+ """
18
+ self._use_temp_file = use_temp_file
19
+ self._temp_dir = temp_dir
20
+
21
+ self._temp_file = None
22
+ self._memory_buffer = None
23
+ self._initialize_buffer()
24
+
25
+ @property
26
+ def name(self) -> str:
27
+ """Get path to temporary file if used.
28
+ Returns:
29
+ str: Path to temporary file. None if not used.
30
+ """
31
+ if self._temp_file:
32
+ return self._temp_file.name
33
+ return None
34
+
35
+ def _initialize_buffer(self):
36
+ """Initialize the appropriate buffer based on configuration."""
37
+ if self._use_temp_file:
38
+ self._temp_file = tempfile.NamedTemporaryFile(
39
+ delete=False,
40
+ dir=self._temp_dir
41
+ )
42
+ self._memory_buffer = None
43
+ else:
44
+ self._memory_buffer = io.BytesIO()
45
+ self._temp_file = None
46
+
47
+ def write(self, data: bytes):
48
+ """Write data to the buffer.
49
+
50
+ Args:
51
+ data (bytes): Data to write.
52
+
53
+ Raises:
54
+ ValueError: If data exceeds configured size limit.
55
+ """
56
+ if self._use_temp_file:
57
+ self._temp_file.write(data)
58
+ else:
59
+ self._memory_buffer.write(data)
60
+
61
+ def read(self) -> bytes:
62
+ """Read all content from the buffer.
63
+
64
+ Returns:
65
+ bytes: Buffer content.
66
+ """
67
+ if self._use_temp_file:
68
+ self._temp_file.seek(0)
69
+ return self._temp_file.read()
70
+ else:
71
+ self._memory_buffer.seek(0)
72
+ return self._memory_buffer.read()
73
+
74
+ def seek(self, offset: int):
75
+ """Seek to given position in buffer.
76
+
77
+ Args:
78
+ offset (int): Position to seek to.
79
+ """
80
+ if self._use_temp_file:
81
+ self._temp_file.seek(offset)
82
+ else:
83
+ self._memory_buffer.seek(offset)
84
+
85
+ def truncate(self, size: int):
86
+ """Truncate buffer to given size.
87
+
88
+ Args:
89
+ size (int): Size to truncate to.
90
+ """
91
+ if self._use_temp_file:
92
+ self._temp_file.truncate(size)
93
+ else:
94
+ self._memory_buffer.truncate(size)
95
+
96
+ def getbuffer(self) -> memoryview:
97
+ """Get a memoryview of the buffer content.
98
+
99
+ Returns:
100
+ memoryview: View of buffer content.
101
+ """
102
+ if self._use_temp_file:
103
+ return memoryview(self.read())
104
+ else:
105
+ return self._memory_buffer.getbuffer()
106
+
107
+ def __del__(self):
108
+ """Cleanup temporary files on deletion."""
109
+ if self._temp_file:
110
+ try:
111
+ self._temp_file.close()
112
+ os.remove(self._temp_file.name)
113
+ except:
114
+ pass
@@ -1,3 +1,5 @@
1
+ import os.path
2
+
1
3
  from media_toolkit.utils.dependency_requirements import requires_numpy, requires_cv2, requires
2
4
  from media_toolkit.core.media_file import MediaFile
3
5
 
@@ -44,15 +46,34 @@ class ImageFile(MediaFile):
44
46
 
45
47
  @requires_cv2()
46
48
  def save(self, path: str):
49
+ # set to working directory if path is None
50
+ if path is None:
51
+ path = os.path.curdir
52
+ # create folder if not exists
53
+ elif os.path.dirname(path) != "" and not os.path.exists(os.path.dirname(path)):
54
+ os.makedirs(os.path.dirname(path))
55
+
56
+ # check if path contains a file name add default if not given
57
+ if os.path.isdir(path):
58
+ if self.file_name is None:
59
+ self.file_name = "media_toolkit_output"
60
+ print(f"No file name given. Using {self.file_name}")
61
+ path = os.path.join(path, self.file_name)
47
62
  cv2.imwrite(path, self.to_np_array())
48
63
 
49
64
  def _file_info(self):
50
65
  super()._file_info()
51
66
  np_array = self.to_np_array()
67
+ if self.file_size() > 0:
68
+ try:
69
+ img_type, self._channels = self.detect_image_type_and_channels(np_array)
70
+ except Exception as e:
71
+ print(f"Could not detect image type and channels. Error: {e}")
72
+ img_type = None
73
+ self._channels = None
52
74
 
53
- img_type, self._channels = self.detect_image_type_and_channels(np_array)
54
- if img_type is not None:
55
- self.content_type = f"image/{img_type}"
75
+ if img_type is not None:
76
+ self.content_type = f"image/{img_type}"
56
77
 
57
78
 
58
79
  @staticmethod
@@ -61,14 +82,16 @@ class ImageFile(MediaFile):
61
82
  if isinstance(image, list):
62
83
  image = np.array(image)
63
84
 
64
- # Check the number of _channels
85
+ if not hasattr(image, 'shape'):
86
+ raise ValueError("Unsupported image type")
87
+
65
88
  if len(image.shape) == 2:
66
89
  channels = 1 # Grayscale
67
90
  elif len(image.shape) == 3:
68
91
  channels = image.shape[2]
69
92
  else:
70
- #raise ValueError("Unsupported image shape: {}".format(image.shape))
71
- return None, None
93
+ raise ValueError("Unsupported image shape: {}".format(image.shape))
94
+ # return None, None
72
95
 
73
96
  # Detect image type by checking for specific markers
74
97
  image_type = None
@@ -2,10 +2,11 @@ import base64
2
2
  import io
3
3
  import mimetypes
4
4
 
5
- from typing import Union, BinaryIO
5
+ from typing import Union, BinaryIO, Tuple, Optional
6
6
  import os
7
7
  from urllib.parse import urlparse
8
8
 
9
+ from media_toolkit.core.file_content_buffer import FileContentBuffer
9
10
  from media_toolkit.utils.dependency_requirements import requires_numpy
10
11
 
11
12
  import re
@@ -22,15 +23,25 @@ class MediaFile:
22
23
  Works natively with bytesio, base64 and binary data.
23
24
  """
24
25
 
25
- def __init__(self, file_name: str = "file", content_type: str = "application/octet-stream"):
26
+ def __init__(
27
+ self,
28
+ file_name: str = "file",
29
+ content_type: str = "application/octet-stream",
30
+ use_temp_file: bool = False,
31
+ temp_dir: str = None
32
+ ):
26
33
  """
27
34
  :param file_name: The name of the file. Note it is overwritten if you use from_file/from_starlette.
28
35
  :param content_type: The content type of the file. Note it is overwritten if you use from_file/from_starlette.
36
+ :param use_temp_file: If True, the file is saved to a temporary file. This is useful for large files.
37
+ :param max_file_size: The maximum file size in bytes. If the file is larger lib will throw an error.
38
+ :param temp_dir: The directory where the temporary file is saved. If None, the system temp dir is used.
29
39
  """
30
40
  self.content_type = content_type
31
41
  self.file_name = file_name # the name of the file also when specified in bytesio
32
42
  self.path = None # the path of the file if it was provided. Is also indicator if file was loaded from file.
33
- self._content_buffer = io.BytesIO()
43
+
44
+ self._content_buffer = FileContentBuffer(use_temp_file=use_temp_file, temp_dir=temp_dir)
34
45
 
35
46
  def from_any(self, data):
36
47
  """
@@ -71,7 +82,7 @@ class MediaFile:
71
82
  Set the content of the file from a BytesIO or a file handle.
72
83
  :params buffer: The buffer to read from.
73
84
  :params copy: If true, the buffer is completely read to bytes and the bytes copied to this file.
74
- If false file works with the provided buffer. Danger -- The buffer is kept open.
85
+ If false file works with the provided buffer. Danger -- The buffer is kept open (not thread safe).
75
86
  """
76
87
  if not type(buffer) in [io.BytesIO, io.BufferedReader]:
77
88
  raise ValueError(f"Buffer must be of type BytesIO or BufferedReader. Got {type(buffer)}")
@@ -141,10 +152,9 @@ class MediaFile:
141
152
  """
142
153
  Load a file which was encoded as a base64 string.
143
154
  """
144
-
145
- decoded = self._decode_base_64_if_is(base64_str)
155
+ decoded, media_type = self._decode_base_64_if_is(base64_str)
146
156
  if decoded is not None:
147
- return self.from_bytes(base64.b64decode(base64_str))
157
+ return self.from_bytes(decoded)
148
158
  else:
149
159
  err_str = base64_str if len(base64_str) <= 50 else base64_str[:50] + "..."
150
160
  raise ValueError(f"Decoding from base64 like string {err_str} was not possible. Check your data.")
@@ -169,14 +179,15 @@ class MediaFile:
169
179
  self.from_base64(file_result_json["content"])
170
180
  return self
171
181
 
172
- def from_url(self, url: str):
182
+ def from_url(self, url: str, headers: dict = None):
173
183
  """
174
184
  Download a file from an url.
175
185
  """
176
186
  # code inspired by: https://github.com/runpod/runpod-python/blob/main/runpod/serverless/utils/rp_download.py
177
187
  import requests
178
- HEADERS = {"User-Agent": "runpod-python/0.0.0 (https://runpod.io; support@runpod.io)"}
179
- with requests.get(url, headers=HEADERS, stream=True, timeout=5) as response:
188
+
189
+ headers = headers or {"User-Agent": "runpod-python/0.0.0 (https://runpod.io; support@runpod.io)"}
190
+ with requests.get(url, headers=headers, stream=True, timeout=5) as response:
180
191
  response.raise_for_status()
181
192
 
182
193
  # get orig file name or create new
@@ -210,9 +221,6 @@ class MediaFile:
210
221
  file.write(chunk)
211
222
  file.name = original_file_name
212
223
  self.file_name = original_file_name
213
-
214
- # self.url = url
215
-
216
224
  return self.from_bytesio_or_handle(file, copy=False)
217
225
 
218
226
  @requires_numpy()
@@ -280,10 +288,6 @@ class MediaFile:
280
288
  print(f"No file name given. Using {self.file_name}")
281
289
  path = os.path.join(path, self.file_name)
282
290
 
283
- # check if has extension
284
- # if os.path.splitext(path)[1] == "":
285
- # path += ".mp4"
286
-
287
291
  with open(path, 'wb') as file:
288
292
  file.write(self.read())
289
293
 
@@ -313,7 +317,7 @@ class MediaFile:
313
317
 
314
318
  def file_size(self, unit="bytes") -> int:
315
319
  """
316
- :param unit:
320
+ :param unit: bytes, kb, mb or gb
317
321
  """
318
322
  size_in_ = self._content_buffer.getbuffer().nbytes
319
323
  if unit == "bytes":
@@ -326,6 +330,26 @@ class MediaFile:
326
330
  size_in_ = size_in_ / 1000000000
327
331
  return size_in_
328
332
 
333
+ @property
334
+ def extension(self) -> Union[str, None]:
335
+ """
336
+ Will try to guess the file type based on the detected mimetype.
337
+ If no mimetype is detected it will try to guess the file extension based on the file name.
338
+ :return: the guessed file extension without '.'.
339
+ """
340
+ if self.file_name is None and self.content_type == "application/octet-stream":
341
+ return None
342
+
343
+ if self.content_type and self.content_type != "application/octet-stream":
344
+ guessed_ext = mimetypes.guess_extension(self.content_type)
345
+ if guessed_ext:
346
+ return guessed_ext.replace(".", "").lower()
347
+
348
+ if self.file_name is not None:
349
+ return None
350
+
351
+ return self.file_name.rsplit(".", 1)[-1]
352
+
329
353
  def __bytes__(self):
330
354
  return self.to_bytes()
331
355
 
@@ -344,13 +368,42 @@ class MediaFile:
344
368
  }
345
369
 
346
370
  @staticmethod
347
- def _decode_base_64_if_is(data: Union[bytes, str]):
371
+ def _parse_base64_uri(data: str) -> Tuple[str, Optional[str]]:
348
372
  """
349
- Checks if a string is base64. If it is, it returns the base64 string as bytes; else returns None.
373
+ Parse base64 string, handling data URI format and extracting content.
374
+ Args:
375
+ data (str): Base64 encoded string, potentially with data URI prefix
376
+ Returns:
377
+ Tuple of (base64 content, optional media_type)
350
378
  """
379
+ # Regex to match data URI format: data:[<media type>][;base64],<data>
380
+ data_uri_pattern = r'^data:(?P<mediatype>[\w/\-\.]+)?(?:;base64)?,(?P<base64>.*)'
381
+
382
+ # Check if the string matches data URI format
383
+ match = re.match(data_uri_pattern, data)
384
+ if match:
385
+ # Extract media type and base64 content
386
+ media_type = match.group('mediatype')
387
+ base64_content = match.group('base64')
388
+ return base64_content, media_type
389
+
390
+ # If no data URI prefix, return the original string
391
+ return data, None
392
+
393
+ @staticmethod
394
+ def _decode_base_64_if_is(data: Union[bytes, str]) -> [Union[str, None], Union[str, None]]:
395
+ """
396
+ Checks if a string is base64 (or base64uri).
397
+ :param data: The data to decode.
398
+ :return: If is base64 (decoded base64 data as bytes, optional media_type) else None, None
399
+ """
400
+ media_type = None
351
401
  if isinstance(data, str):
402
+ # check if is uri format and parse it
403
+ data, media_type = MediaFile._parse_base64_uri(data)
352
404
  data = data.encode()
353
405
 
406
+ # Decode and Re-encode the data to check if it is valid base64
354
407
  try:
355
408
  # Decode the data
356
409
  decoded = base64.b64decode(data, validate=True)
@@ -358,11 +411,11 @@ class MediaFile:
358
411
  back_encoded = base64.b64encode(decoded)
359
412
  # Compare with the original encoded data
360
413
  if back_encoded == data:
361
- return decoded
414
+ return decoded, media_type
362
415
  except Exception:
363
416
  pass
364
417
 
365
- return None
418
+ return None, None
366
419
 
367
420
  @staticmethod
368
421
  def _is_valid_file_path(path: str):
@@ -374,4 +427,7 @@ class MediaFile:
374
427
 
375
428
  @staticmethod
376
429
  def _is_url(url: str):
430
+ if not isinstance(url, str):
431
+ return False
432
+
377
433
  return urlparse(url).scheme in ['http', 'https']
@@ -43,6 +43,7 @@ class VideoFile(MediaFile):
43
43
  self.shape = None
44
44
  self.duration = None
45
45
  self.audio_sample_rate = None
46
+ self._temp_file_path = None # if to_temp_file is called, the path is stored here. Needed clean deletion
46
47
 
47
48
  def from_files(self, image_files: Union[List[str], list], frame_rate: int = 30, audio_file=None):
48
49
  """
@@ -142,6 +143,11 @@ class VideoFile(MediaFile):
142
143
  if suffix == 'octet-stream':
143
144
  raise ValueError("The content type of the video file is not valid. Read a video file first.")
144
145
 
146
+ # If already using temp file storage, return path
147
+ if self._content_buffer._use_temp_file:
148
+ return self._content_buffer.name
149
+
150
+ # create new temp file
145
151
  with tempfile.NamedTemporaryFile(delete=False, suffix=f".{suffix}") as temp_video_file:
146
152
  temp_video_file.write(self.read())
147
153
  temp_video_file_path = temp_video_file.name
@@ -215,20 +221,20 @@ class VideoFile(MediaFile):
215
221
  # get video info
216
222
  info = mediainfo(path)
217
223
 
218
- def info_to_number(key: str, default_val=None):
224
+ def info_to_number(key: str, default_val=None, cast=float):
219
225
  if key in info:
220
226
  val = info[key]
221
227
  if val == 'N/A':
222
228
  return default_val
223
229
  # split if / in val and take first
224
230
  val = val.split("/")[0]
225
- return float(val)
231
+ return cast(val)
226
232
  return default_val
227
233
 
228
- self.frame_count = info_to_number('nb_frames')
234
+ self.frame_count = info_to_number('nb_frames', cast=int)
229
235
  self.duration = info_to_number('duration')
230
- self.width = info_to_number('width')
231
- self.height = info_to_number('height')
236
+ self.width = info_to_number('width', cast=int)
237
+ self.height = info_to_number('height', cast=int)
232
238
  self.shape = (self.width, self.height)
233
239
  self.audio_sample_rate = info_to_number('sample_rate', 44100)
234
240
 
@@ -366,4 +372,12 @@ class VideoFile(MediaFile):
366
372
  return self.to_video_stream()
367
373
 
368
374
  def __len__(self):
369
- return self.frame_count
375
+ return int(self.frame_count)
376
+
377
+ def __del__(self):
378
+ if self._temp_file_path is not None:
379
+ try:
380
+ os.remove(self._temp_file_path)
381
+ except Exception as e:
382
+ print("Could not delete temporary file. Error: ", e)
383
+
@@ -37,12 +37,15 @@ def media_from_file(file_path: str) -> Union[MediaFile, ImageFile, AudioFile, Vi
37
37
  return MediaFile().from_file(file_path)
38
38
 
39
39
 
40
- def media_from_any(file, media_file_type=None):
40
+ def media_from_any(file, media_file_type=None, use_temp_file: bool = False, temp_dir: str = None) -> MediaFile:
41
41
  """
42
42
  Converts a file to a send able format.
43
43
  :param file: The file to convert.
44
44
  :param media_file_type: The target type to convert to. If not specified will be converted to MediaFile.
45
45
  Use ImageFile, AudioFile, VideoFile to convert to those types.
46
+ :param use_temp_file: If True, a temporary file will be used to store the data within the media-file.
47
+ If not stored in RAM.
48
+ :param temp_dir: The directory to store the temporary file in. If not specified, the default temp directory will be used.
46
49
  :return: The send able file.
47
50
  """
48
51
  # it is already converted
@@ -53,7 +56,7 @@ def media_from_any(file, media_file_type=None):
53
56
  target_class = MediaFile
54
57
  if media_file_type is not None and issubclass(media_file_type, MediaFile):
55
58
  target_class = media_file_type
56
- media_file_instance = target_class()
59
+ media_file_instance = target_class(use_temp_file=use_temp_file, temp_dir=temp_dir)
57
60
 
58
61
  # load data
59
62
  media_file_instance = media_file_instance.from_any(file)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: media-toolkit
3
- Version: 0.1.1.dev1
3
+ Version: 0.1.2
4
4
  Summary: Web-ready standardized file processing and serialization. Read, load and convert to standard file types with a common interface.
5
5
  Author: SocAIty
6
6
  License: GPLv3
@@ -9,6 +9,7 @@ media_toolkit.egg-info/requires.txt
9
9
  media_toolkit.egg-info/top_level.txt
10
10
  media_toolkit/core/__init__.py
11
11
  media_toolkit/core/audio_file.py
12
+ media_toolkit/core/file_content_buffer.py
12
13
  media_toolkit/core/image_file.py
13
14
  media_toolkit/core/media_file.py
14
15
  media_toolkit/core/video/__init__.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "media-toolkit"
7
- version = "0.1.1.dev1"
7
+ version = "0.1.2"
8
8
  description = "Web-ready standardized file processing and serialization. Read, load and convert to standard file types with a common interface."
9
9
  requires-python = ">=3.8"
10
10
  authors = [