media-toolkit 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/PKG-INFO +59 -11
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/README.md +59 -11
- media_toolkit-0.2.3/media_toolkit/__init__.py +4 -0
- media_toolkit-0.2.3/media_toolkit/core/IMediaFile.py +86 -0
- media_toolkit-0.2.3/media_toolkit/core/MediaDict.py +335 -0
- media_toolkit-0.2.3/media_toolkit/core/MediaList.py +210 -0
- media_toolkit-0.2.3/media_toolkit/core/__init__.py +9 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/core/audio_file.py +1 -1
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/core/image_file.py +2 -4
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/core/media_file.py +19 -11
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/core/video/video_file.py +5 -2
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/utils/__init__.py +2 -0
- media_toolkit-0.2.3/media_toolkit/utils/auto_async.py +10 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/utils/file_conversion.py +19 -10
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit.egg-info/PKG-INFO +59 -11
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit.egg-info/SOURCES.txt +4 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/pyproject.toml +1 -1
- media_toolkit-0.2.2/media_toolkit/__init__.py +0 -2
- media_toolkit-0.2.2/media_toolkit/core/__init__.py +0 -4
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/LICENSE +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/core/file_content_buffer.py +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/core/video/__init__.py +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/core/video/video_utils.py +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/utils/dependency_requirements.py +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/utils/generator_wrapper.py +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit/utils/utils.py +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit.egg-info/dependency_links.txt +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit.egg-info/requires.txt +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/media_toolkit.egg-info/top_level.txt +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/setup.cfg +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/test/test_image_file.py +0 -0
- {media_toolkit-0.2.2 → media_toolkit-0.2.3}/test/test_video_file.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: media-toolkit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Web-ready standardized file processing and serialization. Read, load and convert to standard file types with a common interface.
|
|
5
5
|
Author: SocAIty
|
|
6
6
|
License: MIT License
|
|
@@ -49,11 +49,11 @@ Requires-Dist: vidgear[core]; extra == "videofile"
|
|
|
49
49
|
Dynamic: license-file
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
|
|
52
|
+
<h1 align="center" style="margin-top:-25px">MediaToolkit</h1>
|
|
53
53
|
<p align="center">
|
|
54
54
|
<img align="center" src="docs/media-file-icon.png" height="200" />
|
|
55
55
|
</p>
|
|
56
|
-
|
|
56
|
+
<h3 align="center" style="margin-top:-10px">Web-ready standardized file processing and serialization</h3>
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
# Features
|
|
@@ -64,7 +64,7 @@ Especially useful for code that works with multiple file types like images, audi
|
|
|
64
64
|
Load and convert from and to common data types:
|
|
65
65
|
- numpy arrays
|
|
66
66
|
- file paths
|
|
67
|
-
- bytes
|
|
67
|
+
- bytes
|
|
68
68
|
- base64
|
|
69
69
|
- json
|
|
70
70
|
- urls
|
|
@@ -92,6 +92,7 @@ pip install media-toolkit[VideoFile] # or [AudioFile, VideoFile, ...]
|
|
|
92
92
|
# install from github for newest release
|
|
93
93
|
pip install git+git://github.com/SocAIty/media-toolkit
|
|
94
94
|
```
|
|
95
|
+
|
|
95
96
|
The package checks if you have missing dependencies for certain file types while using.
|
|
96
97
|
Use the ```--no-deps``` flag for a minimal tiny pure python installation.
|
|
97
98
|
The package with dependencies is quite small < 39kb itself.
|
|
@@ -144,7 +145,58 @@ as_base64 = my_file.to_base64()
|
|
|
144
145
|
as_json = my_file.to_json()
|
|
145
146
|
```
|
|
146
147
|
|
|
147
|
-
|
|
148
|
+
## Working with Collections of Files
|
|
149
|
+
|
|
150
|
+
### MediaList
|
|
151
|
+
A flexible list that can handle multiple media files with type safety:
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
from media_toolkit import MediaList, AudioFile
|
|
155
|
+
|
|
156
|
+
# Create a list that only accepts AudioFiles
|
|
157
|
+
audio_list = MediaList[AudioFile]()
|
|
158
|
+
|
|
159
|
+
# Add files to the list
|
|
160
|
+
audio_list.append("path/to/audio.mp3")
|
|
161
|
+
audio_list.extend(["url1", "url2"])
|
|
162
|
+
|
|
163
|
+
# Process all files
|
|
164
|
+
for audio in audio_list:
|
|
165
|
+
print(audio.file_size())
|
|
166
|
+
|
|
167
|
+
# Convert all files to base64
|
|
168
|
+
base64_files = audio_list.to_base64()
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### MediaDict
|
|
172
|
+
A dictionary for organizing media files with keys:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
from media_toolkit import MediaDict, ImageFile
|
|
176
|
+
|
|
177
|
+
# Create a dictionary that only accepts ImageFiles
|
|
178
|
+
image_dict = MediaDict[ImageFile]()
|
|
179
|
+
|
|
180
|
+
# Add files with keys
|
|
181
|
+
image_dict["profile"] = "path/to/profile.jpg"
|
|
182
|
+
image_dict["banner"] = "https://example.com/banner.png"
|
|
183
|
+
|
|
184
|
+
# Process files
|
|
185
|
+
for key, image in image_dict.items():
|
|
186
|
+
print(f"{key}: {image.file_size()}")
|
|
187
|
+
|
|
188
|
+
# Convert to JSON
|
|
189
|
+
json_data = image_dict.to_json()
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Both `MediaList` and `MediaDict` support:
|
|
193
|
+
- Type safety with generic types (e.g., `MediaList[AudioFile]`)
|
|
194
|
+
- Lazy loading of files
|
|
195
|
+
- Batch processing
|
|
196
|
+
- Common operations (to_base64, to_bytes, etc.)
|
|
197
|
+
- Nested structures (MediaDict inside MediaList and vice versa)
|
|
198
|
+
|
|
199
|
+
### Working with VideoFiles
|
|
148
200
|
|
|
149
201
|
The VideoFiles wrap the famous [vidgear](https://abhitronix.github.io/vidgear/latest/) package as well as [pydub](https://github.com/jiaaro/pydub).
|
|
150
202
|
VideoFiles support extra methods like audio extraction, combining video and audio.
|
|
@@ -193,7 +245,7 @@ You can use the files in fastapi and transform the starlette upload file to a Me
|
|
|
193
245
|
```python
|
|
194
246
|
@app.post("/upload")
|
|
195
247
|
async def upload_file(file: UploadFile = File(...)):
|
|
196
|
-
mf = ImageFile().
|
|
248
|
+
mf = ImageFile().from_any(file)
|
|
197
249
|
return {"filename": file.filename}
|
|
198
250
|
```
|
|
199
251
|
|
|
@@ -211,16 +263,12 @@ my_files = {
|
|
|
211
263
|
response = httpx.Client().post(url, files=my_files)
|
|
212
264
|
```
|
|
213
265
|
|
|
214
|
-
|
|
215
266
|
# How it works
|
|
216
267
|
|
|
217
268
|
If media-file is instantiated with ```from_*``` it converts it to an intermediate representation.
|
|
218
269
|
The ```to_*``` methods then convert it to the desired format.
|
|
219
270
|
|
|
220
|
-
Currently the intermediate representation is supported in memory with (BytesIO).
|
|
221
|
-
|
|
271
|
+
Currently the intermediate representation is supported in memory with (BytesIO) or on disk with temporary files.
|
|
222
272
|
|
|
223
273
|
# ToDo:
|
|
224
|
-
|
|
225
|
-
- [x] additionally support tempfile backend instead of working bytesio memory mode only.
|
|
226
274
|
- [x] decreasing redundancies for _file_info() method
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
|
|
2
|
-
|
|
2
|
+
<h1 align="center" style="margin-top:-25px">MediaToolkit</h1>
|
|
3
3
|
<p align="center">
|
|
4
4
|
<img align="center" src="docs/media-file-icon.png" height="200" />
|
|
5
5
|
</p>
|
|
6
|
-
|
|
6
|
+
<h3 align="center" style="margin-top:-10px">Web-ready standardized file processing and serialization</h3>
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
# Features
|
|
@@ -14,7 +14,7 @@ Especially useful for code that works with multiple file types like images, audi
|
|
|
14
14
|
Load and convert from and to common data types:
|
|
15
15
|
- numpy arrays
|
|
16
16
|
- file paths
|
|
17
|
-
- bytes
|
|
17
|
+
- bytes
|
|
18
18
|
- base64
|
|
19
19
|
- json
|
|
20
20
|
- urls
|
|
@@ -42,6 +42,7 @@ pip install media-toolkit[VideoFile] # or [AudioFile, VideoFile, ...]
|
|
|
42
42
|
# install from github for newest release
|
|
43
43
|
pip install git+git://github.com/SocAIty/media-toolkit
|
|
44
44
|
```
|
|
45
|
+
|
|
45
46
|
The package checks if you have missing dependencies for certain file types while using.
|
|
46
47
|
Use the ```--no-deps``` flag for a minimal tiny pure python installation.
|
|
47
48
|
The package with dependencies is quite small < 39kb itself.
|
|
@@ -94,7 +95,58 @@ as_base64 = my_file.to_base64()
|
|
|
94
95
|
as_json = my_file.to_json()
|
|
95
96
|
```
|
|
96
97
|
|
|
97
|
-
|
|
98
|
+
## Working with Collections of Files
|
|
99
|
+
|
|
100
|
+
### MediaList
|
|
101
|
+
A flexible list that can handle multiple media files with type safety:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from media_toolkit import MediaList, AudioFile
|
|
105
|
+
|
|
106
|
+
# Create a list that only accepts AudioFiles
|
|
107
|
+
audio_list = MediaList[AudioFile]()
|
|
108
|
+
|
|
109
|
+
# Add files to the list
|
|
110
|
+
audio_list.append("path/to/audio.mp3")
|
|
111
|
+
audio_list.extend(["url1", "url2"])
|
|
112
|
+
|
|
113
|
+
# Process all files
|
|
114
|
+
for audio in audio_list:
|
|
115
|
+
print(audio.file_size())
|
|
116
|
+
|
|
117
|
+
# Convert all files to base64
|
|
118
|
+
base64_files = audio_list.to_base64()
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### MediaDict
|
|
122
|
+
A dictionary for organizing media files with keys:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from media_toolkit import MediaDict, ImageFile
|
|
126
|
+
|
|
127
|
+
# Create a dictionary that only accepts ImageFiles
|
|
128
|
+
image_dict = MediaDict[ImageFile]()
|
|
129
|
+
|
|
130
|
+
# Add files with keys
|
|
131
|
+
image_dict["profile"] = "path/to/profile.jpg"
|
|
132
|
+
image_dict["banner"] = "https://example.com/banner.png"
|
|
133
|
+
|
|
134
|
+
# Process files
|
|
135
|
+
for key, image in image_dict.items():
|
|
136
|
+
print(f"{key}: {image.file_size()}")
|
|
137
|
+
|
|
138
|
+
# Convert to JSON
|
|
139
|
+
json_data = image_dict.to_json()
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Both `MediaList` and `MediaDict` support:
|
|
143
|
+
- Type safety with generic types (e.g., `MediaList[AudioFile]`)
|
|
144
|
+
- Lazy loading of files
|
|
145
|
+
- Batch processing
|
|
146
|
+
- Common operations (to_base64, to_bytes, etc.)
|
|
147
|
+
- Nested structures (MediaDict inside MediaList and vice versa)
|
|
148
|
+
|
|
149
|
+
### Working with VideoFiles
|
|
98
150
|
|
|
99
151
|
The VideoFiles wrap the famous [vidgear](https://abhitronix.github.io/vidgear/latest/) package as well as [pydub](https://github.com/jiaaro/pydub).
|
|
100
152
|
VideoFiles support extra methods like audio extraction, combining video and audio.
|
|
@@ -143,7 +195,7 @@ You can use the files in fastapi and transform the starlette upload file to a Me
|
|
|
143
195
|
```python
|
|
144
196
|
@app.post("/upload")
|
|
145
197
|
async def upload_file(file: UploadFile = File(...)):
|
|
146
|
-
mf = ImageFile().
|
|
198
|
+
mf = ImageFile().from_any(file)
|
|
147
199
|
return {"filename": file.filename}
|
|
148
200
|
```
|
|
149
201
|
|
|
@@ -161,16 +213,12 @@ my_files = {
|
|
|
161
213
|
response = httpx.Client().post(url, files=my_files)
|
|
162
214
|
```
|
|
163
215
|
|
|
164
|
-
|
|
165
216
|
# How it works
|
|
166
217
|
|
|
167
218
|
If media-file is instantiated with ```from_*``` it converts it to an intermediate representation.
|
|
168
219
|
The ```to_*``` methods then convert it to the desired format.
|
|
169
220
|
|
|
170
|
-
Currently the intermediate representation is supported in memory with (BytesIO).
|
|
171
|
-
|
|
221
|
+
Currently the intermediate representation is supported in memory with (BytesIO) or on disk with temporary files.
|
|
172
222
|
|
|
173
223
|
# ToDo:
|
|
174
|
-
|
|
175
|
-
- [x] additionally support tempfile backend instead of working bytesio memory mode only.
|
|
176
|
-
- [x] decreasing redundancies for _file_info() method
|
|
224
|
+
- [x] decreasing redundancies for _file_info() method
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
from media_toolkit.core import MediaFile, ImageFile, VideoFile, AudioFile, MediaList, MediaDict
|
|
2
|
+
from media_toolkit.utils.file_conversion import media_from_file, media_from_any
|
|
3
|
+
|
|
4
|
+
__all__ = ["MediaFile", "ImageFile", "VideoFile", "AudioFile", "MediaList", "MediaDict", "media_from_file", "media_from_any"]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
import io
|
|
3
|
+
from typing import Dict, Any, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class IMediaFile(ABC):
|
|
7
|
+
"""
|
|
8
|
+
Abstract base interface defining the core contract for media file handling
|
|
9
|
+
in the Media Toolkit ecosystem.
|
|
10
|
+
"""
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def from_any(self, data: Any, allow_reads_from_disk: bool = True) -> 'IMediaFile':
|
|
13
|
+
"""
|
|
14
|
+
Load file content from various input sources.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
data: Input source (bytes, file path, URL, base64, etc.)
|
|
18
|
+
allow_reads_from_disk: Flag to control disk file reading
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Self, for method chaining
|
|
22
|
+
"""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def to_bytes(self) -> bytes:
|
|
27
|
+
"""Convert file content to raw bytes."""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def to_base64(self) -> str:
|
|
32
|
+
"""Encode file content to base64."""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def to_bytes_io(self) -> io.BytesIO:
|
|
37
|
+
"""Convert file content to BytesIO object."""
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def to_httpx_send_able_tuple(self) -> tuple:
|
|
42
|
+
"""
|
|
43
|
+
Prepare file for HTTP transmission.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Tuple of (filename, content, content_type)
|
|
47
|
+
"""
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def save(self, path: Optional[str] = None):
|
|
52
|
+
"""
|
|
53
|
+
Save file to specified path.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
path: Destination path. Uses current directory if None.
|
|
57
|
+
"""
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def file_size(self, unit: str = "bytes") -> float:
|
|
62
|
+
"""
|
|
63
|
+
Get file size in specified units.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
unit: Size unit (bytes, kb, mb, gb)
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
File size in specified unit
|
|
70
|
+
"""
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
@abstractmethod
|
|
74
|
+
def to_json(self) -> Dict[str, Any]:
|
|
75
|
+
"""
|
|
76
|
+
Serialize file to JSON-compatible dictionary.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Dictionary representation of the file
|
|
80
|
+
"""
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def __sizeof__(self):
|
|
85
|
+
"""Get the size of the file in bytes."""
|
|
86
|
+
pass
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import List, Union, Optional, Any, Dict, TypeVar, Generic
|
|
4
|
+
from media_toolkit.core.IMediaFile import IMediaFile
|
|
5
|
+
from media_toolkit.core.media_file import MediaFile
|
|
6
|
+
from media_toolkit.core.MediaList import MediaList
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
T = TypeVar('T', bound=IMediaFile)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MediaDict(IMediaFile, Generic[T]):
|
|
13
|
+
"""
|
|
14
|
+
A flexible media file dictionary that handles multiple file types
|
|
15
|
+
and sources with configurable loading behaviors.
|
|
16
|
+
|
|
17
|
+
Supports:
|
|
18
|
+
- Multiple MediaFile types as dictionary values
|
|
19
|
+
- Batch media processing
|
|
20
|
+
- Generic type restrictions (e.g. MediaDict[AudioFile])
|
|
21
|
+
"""
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
files: Optional[Dict[str, Union[str, T, MediaList[T], 'MediaDict[T]']]] = None,
|
|
25
|
+
download_files: bool = True,
|
|
26
|
+
read_system_files: bool = True,
|
|
27
|
+
file_name: str = "MediaDict",
|
|
28
|
+
use_temp_file: bool = False,
|
|
29
|
+
temp_dir: str = None
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Initialize MediaDict with optional files and configuration.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
files: Dictionary of files with keys as identifiers
|
|
36
|
+
download_files: Flag if files provided as URLs are downloaded and converted
|
|
37
|
+
read_system_files: Flag if files provided as paths are read and converted
|
|
38
|
+
file_name: Name of the media dictionary
|
|
39
|
+
use_temp_file: Flag to use temp file for file processing
|
|
40
|
+
temp_dir: Temp directory path for file processing
|
|
41
|
+
"""
|
|
42
|
+
self.file_name = file_name
|
|
43
|
+
self.use_temp_file = use_temp_file
|
|
44
|
+
self.temp_dir = temp_dir
|
|
45
|
+
self.download_files = download_files
|
|
46
|
+
self.read_system_files = read_system_files
|
|
47
|
+
self._media_files: Dict[str, Union[str, T, MediaList[T]]] = {}
|
|
48
|
+
|
|
49
|
+
if files:
|
|
50
|
+
self.update(files)
|
|
51
|
+
|
|
52
|
+
def _process_file(
|
|
53
|
+
self,
|
|
54
|
+
file: Union[str, T, MediaList[T], 'MediaDict[T]']
|
|
55
|
+
) -> Union[str, T, MediaList[T], 'MediaDict[T]']:
|
|
56
|
+
"""
|
|
57
|
+
Process a single file based on configuration.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
file: File to process (URL, path, MediaFile, MediaList)
|
|
61
|
+
Returns:
|
|
62
|
+
Processed file (MediaFile, MediaList, or original str)
|
|
63
|
+
"""
|
|
64
|
+
if isinstance(file, (IMediaFile, MediaList, MediaDict)):
|
|
65
|
+
return file
|
|
66
|
+
|
|
67
|
+
if isinstance(file, str):
|
|
68
|
+
if MediaFile._is_url(file):
|
|
69
|
+
if not self.download_files:
|
|
70
|
+
return file
|
|
71
|
+
return MediaFile(use_temp_file=self.use_temp_file, temp_dir=self.temp_dir).from_url(file)
|
|
72
|
+
|
|
73
|
+
if MediaFile._is_valid_file_path(file):
|
|
74
|
+
if not self.read_system_files:
|
|
75
|
+
return file
|
|
76
|
+
return MediaFile(use_temp_file=self.use_temp_file, temp_dir=self.temp_dir).from_file(file)
|
|
77
|
+
|
|
78
|
+
if isinstance(file, list):
|
|
79
|
+
return MediaList[T](
|
|
80
|
+
files=file,
|
|
81
|
+
download_files=self.download_files,
|
|
82
|
+
read_system_files=self.read_system_files,
|
|
83
|
+
use_temp_file=self.use_temp_file,
|
|
84
|
+
temp_dir=self.temp_dir
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if isinstance(file, dict) and not MediaFile._is_file_model(file):
|
|
88
|
+
return MediaDict[T](
|
|
89
|
+
files=file,
|
|
90
|
+
download_files=self.download_files,
|
|
91
|
+
read_system_files=self.read_system_files,
|
|
92
|
+
use_temp_file=self.use_temp_file,
|
|
93
|
+
temp_dir=self.temp_dir
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return MediaFile(use_temp_file=self.use_temp_file, temp_dir=self.temp_dir).from_any(file)
|
|
97
|
+
|
|
98
|
+
def from_any(
|
|
99
|
+
self,
|
|
100
|
+
data: Union[Dict[str, Union[str, T, MediaList[T]]], Any]
|
|
101
|
+
) -> 'MediaDict[T]':
|
|
102
|
+
"""
|
|
103
|
+
Load files from a dictionary of files.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
data: Dictionary of files to load
|
|
107
|
+
Returns:
|
|
108
|
+
Self, for method chaining
|
|
109
|
+
"""
|
|
110
|
+
self.update(data)
|
|
111
|
+
return self
|
|
112
|
+
|
|
113
|
+
def get_processable_files(
|
|
114
|
+
self,
|
|
115
|
+
ignore_all_potential_errors: bool = False,
|
|
116
|
+
raise_exception: bool = True,
|
|
117
|
+
silent: bool = False
|
|
118
|
+
) -> 'MediaDict[T]':
|
|
119
|
+
"""
|
|
120
|
+
Validate that all files can be processed for batch operations.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
ignore_all_potential_errors: Ignore processing errors
|
|
124
|
+
raise_exception: Raise exceptions for unprocessable files
|
|
125
|
+
silent: Suppress error messages
|
|
126
|
+
Returns:
|
|
127
|
+
Dictionary of processable files
|
|
128
|
+
"""
|
|
129
|
+
if ignore_all_potential_errors:
|
|
130
|
+
return self
|
|
131
|
+
|
|
132
|
+
processable_files = {
|
|
133
|
+
key: file for key, file in self._media_files.items()
|
|
134
|
+
if isinstance(file, (IMediaFile, MediaList))
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if len(processable_files) != len(self._media_files):
|
|
138
|
+
not_processable_file_names = [
|
|
139
|
+
str(key) for key, file in self._media_files.items()
|
|
140
|
+
if file not in processable_files.values()
|
|
141
|
+
]
|
|
142
|
+
message = (
|
|
143
|
+
f"Files not processed: {not_processable_file_names}. "
|
|
144
|
+
f"Check configuration (download_files={self.download_files}, "
|
|
145
|
+
f"read_system_files={self.read_system_files})"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if raise_exception:
|
|
149
|
+
raise ValueError(message)
|
|
150
|
+
if not silent:
|
|
151
|
+
print(message)
|
|
152
|
+
|
|
153
|
+
return self._shallow_copy_with_settings(processable_files)
|
|
154
|
+
|
|
155
|
+
return self
|
|
156
|
+
|
|
157
|
+
def _shallow_copy_with_settings(self, data: dict | None = None) -> 'MediaDict[T]':
|
|
158
|
+
"""
|
|
159
|
+
Creates a new MediaDict with the same settings but shallow copies the media files dictionary.
|
|
160
|
+
This avoids re-reading all files when creating a copy.
|
|
161
|
+
"""
|
|
162
|
+
md = MediaDict[T](
|
|
163
|
+
file_name=self.file_name, download_files=self.download_files,
|
|
164
|
+
read_system_files=self.read_system_files, use_temp_file=self.use_temp_file, temp_dir=self.temp_dir
|
|
165
|
+
)
|
|
166
|
+
md._media_files = data
|
|
167
|
+
return md
|
|
168
|
+
|
|
169
|
+
def get_url_files(self) -> Union['MediaDict[T]', dict]:
|
|
170
|
+
"""
|
|
171
|
+
Get all non-processed files that are URLs.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Dictionary of URL files
|
|
175
|
+
"""
|
|
176
|
+
if self.download_files:
|
|
177
|
+
return {}
|
|
178
|
+
|
|
179
|
+
return self._shallow_copy_with_settings({
|
|
180
|
+
key: file for key, file in self._media_files.items()
|
|
181
|
+
if isinstance(file, str) and MediaFile._is_url(file)
|
|
182
|
+
})
|
|
183
|
+
|
|
184
|
+
def get_file_path_files(self) -> Union['MediaDict[T]', dict]:
|
|
185
|
+
"""
|
|
186
|
+
Get all non-processed files that are file paths.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Dictionary of file path files
|
|
190
|
+
"""
|
|
191
|
+
if self.read_system_files:
|
|
192
|
+
return {}
|
|
193
|
+
return self._shallow_copy_with_settings({
|
|
194
|
+
key: file for key, file in self._media_files.items()
|
|
195
|
+
if isinstance(file, str) and MediaFile._is_valid_file_path(file)
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
def to_base64(self) -> Dict[str, str]:
|
|
199
|
+
"""Convert all processable files to base64."""
|
|
200
|
+
return {
|
|
201
|
+
key: file.to_base64()
|
|
202
|
+
for key, file in self.get_processable_files(raise_exception=False).items()
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
def to_bytes_io(self) -> Dict[str, io.BytesIO]:
|
|
206
|
+
"""Convert all processable files to BytesIO."""
|
|
207
|
+
return {
|
|
208
|
+
key: file.to_bytes_io()
|
|
209
|
+
for key, file in self.get_processable_files(raise_exception=False).items()
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
def file_size(self, unit: str = "bytes") -> float:
|
|
213
|
+
"""Calculate total file size."""
|
|
214
|
+
return sum(
|
|
215
|
+
file.file_size(unit)
|
|
216
|
+
for file in self.get_processable_files(raise_exception=False).values()
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def to_json(self) -> Dict[str, Any]:
|
|
220
|
+
"""Convert files to JSON representation."""
|
|
221
|
+
files = self.get_processable_files(ignore_all_potential_errors=True)
|
|
222
|
+
return {
|
|
223
|
+
key: (file.to_json() if isinstance(file, (IMediaFile, MediaList)) else file)
|
|
224
|
+
for key, file in files.items()
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
def to_bytes(self) -> Dict[str, bytes]:
|
|
228
|
+
"""Convert all processable files to bytes."""
|
|
229
|
+
return {
|
|
230
|
+
key: file.to_bytes()
|
|
231
|
+
for key, file in self.get_processable_files(raise_exception=False).items()
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
def to_httpx_send_able_tuple(self) -> List[tuple] | dict:
|
|
235
|
+
"""
|
|
236
|
+
Convert files to httpx-send-able format.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
param_name: Optional parameter name for API endpoint
|
|
240
|
+
Returns:
|
|
241
|
+
List of tuples for httpx file transmission
|
|
242
|
+
"""
|
|
243
|
+
files = self.get_processable_files(raise_exception=False, silent=True)
|
|
244
|
+
|
|
245
|
+
ret = []
|
|
246
|
+
for k, file in files.items():
|
|
247
|
+
if isinstance(file, MediaList):
|
|
248
|
+
ret.extend(file.to_httpx_sendable_tuple(k))
|
|
249
|
+
elif isinstance(file, MediaDict):
|
|
250
|
+
fls = file.to_httpx_sendable_tuple()
|
|
251
|
+
if isinstance(fls, dict):
|
|
252
|
+
ret.append((k, fls))
|
|
253
|
+
else:
|
|
254
|
+
ret.extend(fls)
|
|
255
|
+
else:
|
|
256
|
+
ret.append((k, file.to_httpx_send_able_tuple()))
|
|
257
|
+
|
|
258
|
+
if len(ret) == 1:
|
|
259
|
+
return {ret[0][0]: ret[0][1]}
|
|
260
|
+
return ret
|
|
261
|
+
|
|
262
|
+
def save(self, directory: Optional[str] = None):
|
|
263
|
+
"""
|
|
264
|
+
Save all processable files to a specified directory.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
directory: Target directory (uses current directory if None)
|
|
268
|
+
"""
|
|
269
|
+
import os
|
|
270
|
+
directory = directory or os.path.curdir
|
|
271
|
+
os.makedirs(directory, exist_ok=True)
|
|
272
|
+
|
|
273
|
+
for key, file in self.get_processable_files(raise_exception=False).items():
|
|
274
|
+
file.save(directory)
|
|
275
|
+
|
|
276
|
+
def __getitem__(self, key: str):
|
|
277
|
+
"""Allow dictionary-style access."""
|
|
278
|
+
return self._media_files[key]
|
|
279
|
+
|
|
280
|
+
def __setitem__(self, key: str, value: Union[str, T, MediaList[T]]):
|
|
281
|
+
"""Allow dictionary-style assignment with processing."""
|
|
282
|
+
self._media_files[key] = self._process_file(value)
|
|
283
|
+
|
|
284
|
+
def __delitem__(self, key: str):
|
|
285
|
+
"""Allow dictionary-style deletion."""
|
|
286
|
+
del self._media_files[key]
|
|
287
|
+
|
|
288
|
+
def __iter__(self):
|
|
289
|
+
"""Make the class iterable."""
|
|
290
|
+
return iter(self._media_files)
|
|
291
|
+
|
|
292
|
+
def __len__(self):
|
|
293
|
+
"""Return the number of files in the dictionary."""
|
|
294
|
+
return len(self._media_files)
|
|
295
|
+
|
|
296
|
+
def __contains__(self, key: str):
|
|
297
|
+
"""Check if a key exists in the dictionary."""
|
|
298
|
+
return key in self._media_files
|
|
299
|
+
|
|
300
|
+
def keys(self):
|
|
301
|
+
"""Return dictionary keys."""
|
|
302
|
+
return self._media_files.keys()
|
|
303
|
+
|
|
304
|
+
def values(self):
|
|
305
|
+
"""Return dictionary values."""
|
|
306
|
+
return self._media_files.values()
|
|
307
|
+
|
|
308
|
+
def items(self):
|
|
309
|
+
"""Return dictionary items."""
|
|
310
|
+
return self._media_files.items()
|
|
311
|
+
|
|
312
|
+
def update(self, files: Union['MediaDict[T]', Dict[str, Union[str, T, MediaList[T]]]]):
|
|
313
|
+
"""
|
|
314
|
+
Update the dictionary with new files.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
files: Dictionary of files to add or update
|
|
318
|
+
"""
|
|
319
|
+
if not isinstance(files, dict) and not isinstance(files, MediaDict):
|
|
320
|
+
files = {str(uuid.uuid4()): files}
|
|
321
|
+
|
|
322
|
+
for key, file in files.items():
|
|
323
|
+
self[key] = self._process_file(file)
|
|
324
|
+
|
|
325
|
+
def __sizeof__(self):
|
|
326
|
+
"""Returns the memory size of the instance + actual file/buffer size."""
|
|
327
|
+
size = super().__sizeof__() + self.file_size("bytes")
|
|
328
|
+
return size
|
|
329
|
+
|
|
330
|
+
def to_dict(self) -> Dict[str, Union[str, T, MediaList[T]]]:
|
|
331
|
+
"""Convert MediaDict to a standard dictionary."""
|
|
332
|
+
return {
|
|
333
|
+
key: (file.to_dict() if isinstance(file, MediaDict) else file)
|
|
334
|
+
for key, file in self._media_files.items()
|
|
335
|
+
}
|