MeUtils 2025.3.3.18.41.24__py3-none-any.whl → 2025.3.5.19.55.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/METADATA +264 -264
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/RECORD +61 -33
- examples/_openaisdk/open_router.py +2 -1
- examples/_openaisdk/openai_files.py +16 -5
- examples/_openaisdk/openai_images.py +1 -0
- examples/_openaisdk/openai_moon.py +22 -19
- examples/sh/__init__.py +11 -0
- meutils/apis/baidu/bdaitpzs.py +9 -17
- meutils/apis/chatglm/glm_video_api.py +2 -2
- meutils/apis/images/edits.py +7 -2
- meutils/apis/jimeng/common.py +1 -1
- meutils/apis/oneapi/common.py +4 -4
- meutils/apis/proxy/ips.py +2 -0
- meutils/caches/common.py +4 -0
- meutils/data/VERSION +1 -1
- meutils/data/oneapi/NOTICE.html +12 -0
- meutils/data/oneapi/__init__.py +1 -1
- meutils/data/oneapi/index.html +275 -0
- meutils/io/_openai_files.py +31 -0
- meutils/io/openai_files.py +138 -0
- meutils/io/parsers/__init__.py +10 -0
- meutils/io/parsers/fileparser/PDF/346/212/275/345/217/226.py +58 -0
- meutils/io/parsers/fileparser/__init__.py +11 -0
- meutils/io/parsers/fileparser/common.py +91 -0
- meutils/io/parsers/fileparser/demo.py +41 -0
- meutils/io/parsers/fileparser/filetype/__init__.py +10 -0
- meutils/io/parsers/fileparser/filetype/__main__.py +37 -0
- meutils/io/parsers/fileparser/filetype/filetype.py +98 -0
- meutils/io/parsers/fileparser/filetype/helpers.py +140 -0
- meutils/io/parsers/fileparser/filetype/match.py +155 -0
- meutils/io/parsers/fileparser/filetype/types/__init__.py +118 -0
- meutils/io/parsers/fileparser/filetype/types/application.py +22 -0
- meutils/io/parsers/fileparser/filetype/types/archive.py +687 -0
- meutils/io/parsers/fileparser/filetype/types/audio.py +212 -0
- meutils/io/parsers/fileparser/filetype/types/base.py +29 -0
- meutils/io/parsers/fileparser/filetype/types/document.py +256 -0
- meutils/io/parsers/fileparser/filetype/types/font.py +115 -0
- meutils/io/parsers/fileparser/filetype/types/image.py +383 -0
- meutils/io/parsers/fileparser/filetype/types/isobmff.py +33 -0
- meutils/io/parsers/fileparser/filetype/types/video.py +223 -0
- meutils/io/parsers/fileparser/filetype/utils.py +84 -0
- meutils/io/parsers/fileparser/filetype.py +41 -0
- meutils/io/parsers/fileparser/mineru.py +48 -0
- meutils/io/parsers/fileparser/pdf.py +30 -0
- meutils/io/parsers/fileparser//350/241/250/346/240/274/346/212/275/345/217/226.py +118 -0
- meutils/llm/check_utils.py +33 -2
- meutils/llm/clients.py +1 -0
- meutils/llm/completions/chat_gemini.py +72 -0
- meutils/llm/completions/chat_plus.py +78 -0
- meutils/llm/completions/{agents/file.py → chat_spark.py} +46 -26
- meutils/llm/completions/qwenllm.py +57 -16
- meutils/llm/completions/yuanbao.py +29 -3
- meutils/llm/openai_utils/common.py +2 -2
- meutils/schemas/oneapi/common.py +22 -19
- meutils/schemas/openai_types.py +65 -29
- meutils/schemas/yuanbao_types.py +6 -7
- meutils/types.py +2 -0
- meutils/data/oneapi/NOTICE.md +0 -1
- meutils/data/oneapi/_NOTICE.md +0 -140
- meutils/llm/completions/gemini.py +0 -69
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/LICENSE +0 -0
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/WHEEL +0 -0
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/entry_points.txt +0 -0
- {MeUtils-2025.3.3.18.41.24.dist-info → MeUtils-2025.3.5.19.55.22.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,383 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
from __future__ import absolute_import
|
4
|
+
|
5
|
+
from .base import Type
|
6
|
+
from .isobmff import IsoBmff
|
7
|
+
|
8
|
+
|
9
|
+
class Jpeg(Type):
|
10
|
+
"""
|
11
|
+
Implements the JPEG image type matcher.
|
12
|
+
"""
|
13
|
+
MIME = 'image/jpeg'
|
14
|
+
EXTENSION = 'jpg'
|
15
|
+
|
16
|
+
def __init__(self):
|
17
|
+
super(Jpeg, self).__init__(
|
18
|
+
mime=Jpeg.MIME,
|
19
|
+
extension=Jpeg.EXTENSION
|
20
|
+
)
|
21
|
+
|
22
|
+
def match(self, buf):
|
23
|
+
return (len(buf) > 2 and
|
24
|
+
buf[0] == 0xFF and
|
25
|
+
buf[1] == 0xD8 and
|
26
|
+
buf[2] == 0xFF)
|
27
|
+
|
28
|
+
|
29
|
+
class Jpx(Type):
|
30
|
+
"""
|
31
|
+
Implements the JPEG2000 image type matcher.
|
32
|
+
"""
|
33
|
+
|
34
|
+
MIME = "image/jpx"
|
35
|
+
EXTENSION = "jpx"
|
36
|
+
|
37
|
+
def __init__(self):
|
38
|
+
super(Jpx, self).__init__(mime=Jpx.MIME, extension=Jpx.EXTENSION)
|
39
|
+
|
40
|
+
def match(self, buf):
|
41
|
+
return (
|
42
|
+
len(buf) > 50
|
43
|
+
and buf[0] == 0x00
|
44
|
+
and buf[1] == 0x00
|
45
|
+
and buf[2] == 0x00
|
46
|
+
and buf[3] == 0x0C
|
47
|
+
and buf[16:24] == b"ftypjp2 "
|
48
|
+
)
|
49
|
+
|
50
|
+
|
51
|
+
class Apng(Type):
|
52
|
+
"""
|
53
|
+
Implements the APNG image type matcher.
|
54
|
+
"""
|
55
|
+
MIME = 'image/apng'
|
56
|
+
EXTENSION = 'apng'
|
57
|
+
|
58
|
+
def __init__(self):
|
59
|
+
super(Apng, self).__init__(
|
60
|
+
mime=Apng.MIME,
|
61
|
+
extension=Apng.EXTENSION
|
62
|
+
)
|
63
|
+
|
64
|
+
def match(self, buf):
|
65
|
+
if (len(buf) > 8 and
|
66
|
+
buf[:8] == bytearray([0x89, 0x50, 0x4e, 0x47,
|
67
|
+
0x0d, 0x0a, 0x1a, 0x0a])):
|
68
|
+
# cursor in buf, skip already readed 8 bytes
|
69
|
+
i = 8
|
70
|
+
while len(buf) > i:
|
71
|
+
data_length = int.from_bytes(buf[i:i+4], byteorder="big")
|
72
|
+
i += 4
|
73
|
+
|
74
|
+
chunk_type = buf[i:i+4].decode("ascii", errors='ignore')
|
75
|
+
i += 4
|
76
|
+
|
77
|
+
# acTL chunk in APNG should appears first than IDAT
|
78
|
+
# IEND is end of PNG
|
79
|
+
if (chunk_type == "IDAT" or chunk_type == "IEND"):
|
80
|
+
return False
|
81
|
+
elif (chunk_type == "acTL"):
|
82
|
+
return True
|
83
|
+
|
84
|
+
# move to the next chunk by skipping data and crc (4 bytes)
|
85
|
+
i += data_length + 4
|
86
|
+
|
87
|
+
return False
|
88
|
+
|
89
|
+
|
90
|
+
class Png(Type):
|
91
|
+
"""
|
92
|
+
Implements the PNG image type matcher.
|
93
|
+
"""
|
94
|
+
MIME = 'image/png'
|
95
|
+
EXTENSION = 'png'
|
96
|
+
|
97
|
+
def __init__(self):
|
98
|
+
super(Png, self).__init__(
|
99
|
+
mime=Png.MIME,
|
100
|
+
extension=Png.EXTENSION
|
101
|
+
)
|
102
|
+
|
103
|
+
def match(self, buf):
|
104
|
+
return (len(buf) > 3 and
|
105
|
+
buf[0] == 0x89 and
|
106
|
+
buf[1] == 0x50 and
|
107
|
+
buf[2] == 0x4E and
|
108
|
+
buf[3] == 0x47)
|
109
|
+
|
110
|
+
|
111
|
+
class Gif(Type):
|
112
|
+
"""
|
113
|
+
Implements the GIF image type matcher.
|
114
|
+
"""
|
115
|
+
MIME = 'image/gif'
|
116
|
+
EXTENSION = 'gif'
|
117
|
+
|
118
|
+
def __init__(self):
|
119
|
+
super(Gif, self).__init__(
|
120
|
+
mime=Gif.MIME,
|
121
|
+
extension=Gif.EXTENSION,
|
122
|
+
)
|
123
|
+
|
124
|
+
def match(self, buf):
|
125
|
+
return (len(buf) > 2 and
|
126
|
+
buf[0] == 0x47 and
|
127
|
+
buf[1] == 0x49 and
|
128
|
+
buf[2] == 0x46)
|
129
|
+
|
130
|
+
|
131
|
+
class Webp(Type):
|
132
|
+
"""
|
133
|
+
Implements the WEBP image type matcher.
|
134
|
+
"""
|
135
|
+
MIME = 'image/webp'
|
136
|
+
EXTENSION = 'webp'
|
137
|
+
|
138
|
+
def __init__(self):
|
139
|
+
super(Webp, self).__init__(
|
140
|
+
mime=Webp.MIME,
|
141
|
+
extension=Webp.EXTENSION,
|
142
|
+
)
|
143
|
+
|
144
|
+
def match(self, buf):
|
145
|
+
return (len(buf) > 13 and
|
146
|
+
buf[0] == 0x52 and
|
147
|
+
buf[1] == 0x49 and
|
148
|
+
buf[2] == 0x46 and
|
149
|
+
buf[3] == 0x46 and
|
150
|
+
buf[8] == 0x57 and
|
151
|
+
buf[9] == 0x45 and
|
152
|
+
buf[10] == 0x42 and
|
153
|
+
buf[11] == 0x50 and
|
154
|
+
buf[12] == 0x56 and
|
155
|
+
buf[13] == 0x50)
|
156
|
+
|
157
|
+
|
158
|
+
class Cr2(Type):
|
159
|
+
"""
|
160
|
+
Implements the CR2 image type matcher.
|
161
|
+
"""
|
162
|
+
MIME = 'image/x-canon-cr2'
|
163
|
+
EXTENSION = 'cr2'
|
164
|
+
|
165
|
+
def __init__(self):
|
166
|
+
super(Cr2, self).__init__(
|
167
|
+
mime=Cr2.MIME,
|
168
|
+
extension=Cr2.EXTENSION,
|
169
|
+
)
|
170
|
+
|
171
|
+
def match(self, buf):
|
172
|
+
return (len(buf) > 9 and
|
173
|
+
((buf[0] == 0x49 and buf[1] == 0x49 and
|
174
|
+
buf[2] == 0x2A and buf[3] == 0x0) or
|
175
|
+
(buf[0] == 0x4D and buf[1] == 0x4D and
|
176
|
+
buf[2] == 0x0 and buf[3] == 0x2A)) and
|
177
|
+
buf[8] == 0x43 and buf[9] == 0x52)
|
178
|
+
|
179
|
+
|
180
|
+
class Tiff(Type):
|
181
|
+
"""
|
182
|
+
Implements the TIFF image type matcher.
|
183
|
+
"""
|
184
|
+
MIME = 'image/tiff'
|
185
|
+
EXTENSION = 'tif'
|
186
|
+
|
187
|
+
def __init__(self):
|
188
|
+
super(Tiff, self).__init__(
|
189
|
+
mime=Tiff.MIME,
|
190
|
+
extension=Tiff.EXTENSION,
|
191
|
+
)
|
192
|
+
|
193
|
+
def match(self, buf):
|
194
|
+
return (len(buf) > 9 and
|
195
|
+
((buf[0] == 0x49 and buf[1] == 0x49 and
|
196
|
+
buf[2] == 0x2A and buf[3] == 0x0) or
|
197
|
+
(buf[0] == 0x4D and buf[1] == 0x4D and
|
198
|
+
buf[2] == 0x0 and buf[3] == 0x2A))
|
199
|
+
and not (buf[8] == 0x43 and buf[9] == 0x52))
|
200
|
+
|
201
|
+
|
202
|
+
class Bmp(Type):
|
203
|
+
"""
|
204
|
+
Implements the BMP image type matcher.
|
205
|
+
"""
|
206
|
+
MIME = 'image/bmp'
|
207
|
+
EXTENSION = 'bmp'
|
208
|
+
|
209
|
+
def __init__(self):
|
210
|
+
super(Bmp, self).__init__(
|
211
|
+
mime=Bmp.MIME,
|
212
|
+
extension=Bmp.EXTENSION,
|
213
|
+
)
|
214
|
+
|
215
|
+
def match(self, buf):
|
216
|
+
return (len(buf) > 1 and
|
217
|
+
buf[0] == 0x42 and
|
218
|
+
buf[1] == 0x4D)
|
219
|
+
|
220
|
+
|
221
|
+
class Jxr(Type):
|
222
|
+
"""
|
223
|
+
Implements the JXR image type matcher.
|
224
|
+
"""
|
225
|
+
MIME = 'image/vnd.ms-photo'
|
226
|
+
EXTENSION = 'jxr'
|
227
|
+
|
228
|
+
def __init__(self):
|
229
|
+
super(Jxr, self).__init__(
|
230
|
+
mime=Jxr.MIME,
|
231
|
+
extension=Jxr.EXTENSION,
|
232
|
+
)
|
233
|
+
|
234
|
+
def match(self, buf):
|
235
|
+
return (len(buf) > 2 and
|
236
|
+
buf[0] == 0x49 and
|
237
|
+
buf[1] == 0x49 and
|
238
|
+
buf[2] == 0xBC)
|
239
|
+
|
240
|
+
|
241
|
+
class Psd(Type):
|
242
|
+
"""
|
243
|
+
Implements the PSD image type matcher.
|
244
|
+
"""
|
245
|
+
MIME = 'image/vnd.adobe.photoshop'
|
246
|
+
EXTENSION = 'psd'
|
247
|
+
|
248
|
+
def __init__(self):
|
249
|
+
super(Psd, self).__init__(
|
250
|
+
mime=Psd.MIME,
|
251
|
+
extension=Psd.EXTENSION,
|
252
|
+
)
|
253
|
+
|
254
|
+
def match(self, buf):
|
255
|
+
return (len(buf) > 3 and
|
256
|
+
buf[0] == 0x38 and
|
257
|
+
buf[1] == 0x42 and
|
258
|
+
buf[2] == 0x50 and
|
259
|
+
buf[3] == 0x53)
|
260
|
+
|
261
|
+
|
262
|
+
class Ico(Type):
|
263
|
+
"""
|
264
|
+
Implements the ICO image type matcher.
|
265
|
+
"""
|
266
|
+
MIME = 'image/x-icon'
|
267
|
+
EXTENSION = 'ico'
|
268
|
+
|
269
|
+
def __init__(self):
|
270
|
+
super(Ico, self).__init__(
|
271
|
+
mime=Ico.MIME,
|
272
|
+
extension=Ico.EXTENSION,
|
273
|
+
)
|
274
|
+
|
275
|
+
def match(self, buf):
|
276
|
+
return (len(buf) > 3 and
|
277
|
+
buf[0] == 0x00 and
|
278
|
+
buf[1] == 0x00 and
|
279
|
+
buf[2] == 0x01 and
|
280
|
+
buf[3] == 0x00)
|
281
|
+
|
282
|
+
|
283
|
+
class Heic(IsoBmff):
|
284
|
+
"""
|
285
|
+
Implements the HEIC image type matcher.
|
286
|
+
"""
|
287
|
+
MIME = 'image/heic'
|
288
|
+
EXTENSION = 'heic'
|
289
|
+
|
290
|
+
def __init__(self):
|
291
|
+
super(Heic, self).__init__(
|
292
|
+
mime=Heic.MIME,
|
293
|
+
extension=Heic.EXTENSION
|
294
|
+
)
|
295
|
+
|
296
|
+
def match(self, buf):
|
297
|
+
if not self._is_isobmff(buf):
|
298
|
+
return False
|
299
|
+
|
300
|
+
major_brand, minor_version, compatible_brands = self._get_ftyp(buf)
|
301
|
+
if major_brand == 'heic':
|
302
|
+
return True
|
303
|
+
if major_brand in ['mif1', 'msf1'] and 'heic' in compatible_brands:
|
304
|
+
return True
|
305
|
+
return False
|
306
|
+
|
307
|
+
|
308
|
+
class Dcm(Type):
|
309
|
+
|
310
|
+
MIME = 'application/dicom'
|
311
|
+
EXTENSION = 'dcm'
|
312
|
+
OFFSET = 128
|
313
|
+
|
314
|
+
def __init__(self):
|
315
|
+
super(Dcm, self).__init__(
|
316
|
+
mime=Dcm.MIME,
|
317
|
+
extension=Dcm.EXTENSION
|
318
|
+
)
|
319
|
+
|
320
|
+
def match(self, buf):
|
321
|
+
return (len(buf) > Dcm.OFFSET + 4 and
|
322
|
+
buf[Dcm.OFFSET + 0] == 0x44 and
|
323
|
+
buf[Dcm.OFFSET + 1] == 0x49 and
|
324
|
+
buf[Dcm.OFFSET + 2] == 0x43 and
|
325
|
+
buf[Dcm.OFFSET + 3] == 0x4D)
|
326
|
+
|
327
|
+
|
328
|
+
class Dwg(Type):
|
329
|
+
"""Implements the Dwg image type matcher."""
|
330
|
+
|
331
|
+
MIME = 'image/vnd.dwg'
|
332
|
+
EXTENSION = 'dwg'
|
333
|
+
|
334
|
+
def __init__(self):
|
335
|
+
super(Dwg, self).__init__(
|
336
|
+
mime=Dwg.MIME,
|
337
|
+
extension=Dwg.EXTENSION
|
338
|
+
)
|
339
|
+
|
340
|
+
def match(self, buf):
|
341
|
+
return buf[:4] == bytearray([0x41, 0x43, 0x31, 0x30])
|
342
|
+
|
343
|
+
|
344
|
+
class Xcf(Type):
|
345
|
+
"""Implements the Xcf image type matcher."""
|
346
|
+
|
347
|
+
MIME = 'image/x-xcf'
|
348
|
+
EXTENSION = 'xcf'
|
349
|
+
|
350
|
+
def __init__(self):
|
351
|
+
super(Xcf, self).__init__(
|
352
|
+
mime=Xcf.MIME,
|
353
|
+
extension=Xcf.EXTENSION
|
354
|
+
)
|
355
|
+
|
356
|
+
def match(self, buf):
|
357
|
+
return buf[:10] == bytearray([0x67, 0x69, 0x6d, 0x70, 0x20,
|
358
|
+
0x78, 0x63, 0x66, 0x20, 0x76])
|
359
|
+
|
360
|
+
|
361
|
+
class Avif(IsoBmff):
|
362
|
+
"""
|
363
|
+
Implements the AVIF image type matcher.
|
364
|
+
"""
|
365
|
+
MIME = 'image/avif'
|
366
|
+
EXTENSION = 'avif'
|
367
|
+
|
368
|
+
def __init__(self):
|
369
|
+
super(Avif, self).__init__(
|
370
|
+
mime=Avif.MIME,
|
371
|
+
extension=Avif.EXTENSION
|
372
|
+
)
|
373
|
+
|
374
|
+
def match(self, buf):
|
375
|
+
if not self._is_isobmff(buf):
|
376
|
+
return False
|
377
|
+
|
378
|
+
major_brand, minor_version, compatible_brands = self._get_ftyp(buf)
|
379
|
+
if major_brand == 'avif':
|
380
|
+
return True
|
381
|
+
if major_brand in ['mif1', 'msf1'] and 'avif' in compatible_brands:
|
382
|
+
return True
|
383
|
+
return False
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
from __future__ import absolute_import
|
3
|
+
import codecs
|
4
|
+
|
5
|
+
from .base import Type
|
6
|
+
|
7
|
+
|
8
|
+
class IsoBmff(Type):
|
9
|
+
"""
|
10
|
+
Implements the ISO-BMFF base type.
|
11
|
+
"""
|
12
|
+
def __init__(self, mime, extension):
|
13
|
+
super(IsoBmff, self).__init__(
|
14
|
+
mime=mime,
|
15
|
+
extension=extension
|
16
|
+
)
|
17
|
+
|
18
|
+
def _is_isobmff(self, buf):
|
19
|
+
if len(buf) < 16 or buf[4:8] != b'ftyp':
|
20
|
+
return False
|
21
|
+
if len(buf) < int(codecs.encode(buf[0:4], 'hex'), 16):
|
22
|
+
return False
|
23
|
+
return True
|
24
|
+
|
25
|
+
def _get_ftyp(self, buf):
|
26
|
+
ftyp_len = int(codecs.encode(buf[0:4], 'hex'), 16)
|
27
|
+
major_brand = buf[8:12].decode(errors='ignore')
|
28
|
+
minor_version = int(codecs.encode(buf[12:16], 'hex'), 16)
|
29
|
+
compatible_brands = []
|
30
|
+
for i in range(16, ftyp_len, 4):
|
31
|
+
compatible_brands.append(buf[i:i+4].decode(errors='ignore'))
|
32
|
+
|
33
|
+
return major_brand, minor_version, compatible_brands
|
@@ -0,0 +1,223 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
from __future__ import absolute_import
|
4
|
+
|
5
|
+
from .base import Type
|
6
|
+
from .isobmff import IsoBmff
|
7
|
+
|
8
|
+
|
9
|
+
class Mp4(IsoBmff):
|
10
|
+
"""
|
11
|
+
Implements the MP4 video type matcher.
|
12
|
+
"""
|
13
|
+
MIME = 'video/mp4'
|
14
|
+
EXTENSION = 'mp4'
|
15
|
+
|
16
|
+
def __init__(self):
|
17
|
+
super(Mp4, self).__init__(
|
18
|
+
mime=Mp4.MIME,
|
19
|
+
extension=Mp4.EXTENSION
|
20
|
+
)
|
21
|
+
|
22
|
+
def match(self, buf):
|
23
|
+
if not self._is_isobmff(buf):
|
24
|
+
return False
|
25
|
+
|
26
|
+
major_brand, minor_version, compatible_brands = self._get_ftyp(buf)
|
27
|
+
for brand in compatible_brands:
|
28
|
+
if brand in ['mp41', 'mp42', 'isom']:
|
29
|
+
return True
|
30
|
+
return major_brand in ['mp41', 'mp42', 'isom']
|
31
|
+
|
32
|
+
|
33
|
+
class M4v(Type):
|
34
|
+
"""
|
35
|
+
Implements the M4V video type matcher.
|
36
|
+
"""
|
37
|
+
MIME = 'video/x-m4v'
|
38
|
+
EXTENSION = 'm4v'
|
39
|
+
|
40
|
+
def __init__(self):
|
41
|
+
super(M4v, self).__init__(
|
42
|
+
mime=M4v.MIME,
|
43
|
+
extension=M4v.EXTENSION
|
44
|
+
)
|
45
|
+
|
46
|
+
def match(self, buf):
|
47
|
+
return (len(buf) > 10 and
|
48
|
+
buf[0] == 0x0 and buf[1] == 0x0 and
|
49
|
+
buf[2] == 0x0 and buf[3] == 0x1C and
|
50
|
+
buf[4] == 0x66 and buf[5] == 0x74 and
|
51
|
+
buf[6] == 0x79 and buf[7] == 0x70 and
|
52
|
+
buf[8] == 0x4D and buf[9] == 0x34 and
|
53
|
+
buf[10] == 0x56)
|
54
|
+
|
55
|
+
|
56
|
+
class Mkv(Type):
|
57
|
+
"""
|
58
|
+
Implements the MKV video type matcher.
|
59
|
+
"""
|
60
|
+
MIME = 'video/x-matroska'
|
61
|
+
EXTENSION = 'mkv'
|
62
|
+
|
63
|
+
def __init__(self):
|
64
|
+
super(Mkv, self).__init__(
|
65
|
+
mime=Mkv.MIME,
|
66
|
+
extension=Mkv.EXTENSION
|
67
|
+
)
|
68
|
+
|
69
|
+
def match(self, buf):
|
70
|
+
contains_ebml_element = buf.startswith(b'\x1A\x45\xDF\xA3')
|
71
|
+
contains_doctype_element = buf.find(b'\x42\x82\x88matroska') > -1
|
72
|
+
return contains_ebml_element and contains_doctype_element
|
73
|
+
|
74
|
+
|
75
|
+
class Webm(Type):
|
76
|
+
"""
|
77
|
+
Implements the WebM video type matcher.
|
78
|
+
"""
|
79
|
+
MIME = 'video/webm'
|
80
|
+
EXTENSION = 'webm'
|
81
|
+
|
82
|
+
def __init__(self):
|
83
|
+
super(Webm, self).__init__(
|
84
|
+
mime=Webm.MIME,
|
85
|
+
extension=Webm.EXTENSION
|
86
|
+
)
|
87
|
+
|
88
|
+
def match(self, buf):
|
89
|
+
contains_ebml_element = buf.startswith(b'\x1A\x45\xDF\xA3')
|
90
|
+
contains_doctype_element = buf.find(b'\x42\x82\x84webm') > -1
|
91
|
+
return contains_ebml_element and contains_doctype_element
|
92
|
+
|
93
|
+
|
94
|
+
class Mov(IsoBmff):
|
95
|
+
"""
|
96
|
+
Implements the MOV video type matcher.
|
97
|
+
"""
|
98
|
+
MIME = 'video/quicktime'
|
99
|
+
EXTENSION = 'mov'
|
100
|
+
|
101
|
+
def __init__(self):
|
102
|
+
super(Mov, self).__init__(
|
103
|
+
mime=Mov.MIME,
|
104
|
+
extension=Mov.EXTENSION
|
105
|
+
)
|
106
|
+
|
107
|
+
def match(self, buf):
|
108
|
+
if not self._is_isobmff(buf):
|
109
|
+
return False
|
110
|
+
|
111
|
+
major_brand, minor_version, compatible_brands = self._get_ftyp(buf)
|
112
|
+
return major_brand == 'qt '
|
113
|
+
|
114
|
+
|
115
|
+
class Avi(Type):
|
116
|
+
"""
|
117
|
+
Implements the AVI video type matcher.
|
118
|
+
"""
|
119
|
+
MIME = 'video/x-msvideo'
|
120
|
+
EXTENSION = 'avi'
|
121
|
+
|
122
|
+
def __init__(self):
|
123
|
+
super(Avi, self).__init__(
|
124
|
+
mime=Avi.MIME,
|
125
|
+
extension=Avi.EXTENSION
|
126
|
+
)
|
127
|
+
|
128
|
+
def match(self, buf):
|
129
|
+
return (len(buf) > 11 and
|
130
|
+
buf[0] == 0x52 and
|
131
|
+
buf[1] == 0x49 and
|
132
|
+
buf[2] == 0x46 and
|
133
|
+
buf[3] == 0x46 and
|
134
|
+
buf[8] == 0x41 and
|
135
|
+
buf[9] == 0x56 and
|
136
|
+
buf[10] == 0x49 and
|
137
|
+
buf[11] == 0x20)
|
138
|
+
|
139
|
+
|
140
|
+
class Wmv(Type):
|
141
|
+
"""
|
142
|
+
Implements the WMV video type matcher.
|
143
|
+
"""
|
144
|
+
MIME = 'video/x-ms-wmv'
|
145
|
+
EXTENSION = 'wmv'
|
146
|
+
|
147
|
+
def __init__(self):
|
148
|
+
super(Wmv, self).__init__(
|
149
|
+
mime=Wmv.MIME,
|
150
|
+
extension=Wmv.EXTENSION
|
151
|
+
)
|
152
|
+
|
153
|
+
def match(self, buf):
|
154
|
+
return (len(buf) > 9 and
|
155
|
+
buf[0] == 0x30 and
|
156
|
+
buf[1] == 0x26 and
|
157
|
+
buf[2] == 0xB2 and
|
158
|
+
buf[3] == 0x75 and
|
159
|
+
buf[4] == 0x8E and
|
160
|
+
buf[5] == 0x66 and
|
161
|
+
buf[6] == 0xCF and
|
162
|
+
buf[7] == 0x11 and
|
163
|
+
buf[8] == 0xA6 and
|
164
|
+
buf[9] == 0xD9)
|
165
|
+
|
166
|
+
|
167
|
+
class Flv(Type):
|
168
|
+
"""
|
169
|
+
Implements the FLV video type matcher.
|
170
|
+
"""
|
171
|
+
MIME = 'video/x-flv'
|
172
|
+
EXTENSION = 'flv'
|
173
|
+
|
174
|
+
def __init__(self):
|
175
|
+
super(Flv, self).__init__(
|
176
|
+
mime=Flv.MIME,
|
177
|
+
extension=Flv.EXTENSION
|
178
|
+
)
|
179
|
+
|
180
|
+
def match(self, buf):
|
181
|
+
return (len(buf) > 3 and
|
182
|
+
buf[0] == 0x46 and
|
183
|
+
buf[1] == 0x4C and
|
184
|
+
buf[2] == 0x56 and
|
185
|
+
buf[3] == 0x01)
|
186
|
+
|
187
|
+
|
188
|
+
class Mpeg(Type):
|
189
|
+
"""
|
190
|
+
Implements the MPEG video type matcher.
|
191
|
+
"""
|
192
|
+
MIME = 'video/mpeg'
|
193
|
+
EXTENSION = 'mpg'
|
194
|
+
|
195
|
+
def __init__(self):
|
196
|
+
super(Mpeg, self).__init__(
|
197
|
+
mime=Mpeg.MIME,
|
198
|
+
extension=Mpeg.EXTENSION
|
199
|
+
)
|
200
|
+
|
201
|
+
def match(self, buf):
|
202
|
+
return (len(buf) > 3 and
|
203
|
+
buf[0] == 0x0 and
|
204
|
+
buf[1] == 0x0 and
|
205
|
+
buf[2] == 0x1 and
|
206
|
+
buf[3] >= 0xb0 and
|
207
|
+
buf[3] <= 0xbf)
|
208
|
+
|
209
|
+
|
210
|
+
class M3gp(Type):
|
211
|
+
"""Implements the 3gp image type matcher."""
|
212
|
+
|
213
|
+
MIME = 'video/3gpp'
|
214
|
+
EXTENSION = '3gp'
|
215
|
+
|
216
|
+
def __init__(self):
|
217
|
+
super(M3gp, self).__init__(
|
218
|
+
mime=M3gp.MIME,
|
219
|
+
extension=M3gp.EXTENSION
|
220
|
+
)
|
221
|
+
|
222
|
+
def match(self, buf):
|
223
|
+
return buf[:7] == bytearray([0x66, 0x74, 0x79, 0x70, 0x33, 0x67, 0x70])
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
# Python 2.7 workaround
|
4
|
+
try:
|
5
|
+
import pathlib
|
6
|
+
except ImportError:
|
7
|
+
pass
|
8
|
+
|
9
|
+
|
10
|
+
_NUM_SIGNATURE_BYTES = 8192
|
11
|
+
|
12
|
+
|
13
|
+
def get_signature_bytes(path):
|
14
|
+
"""
|
15
|
+
Reads file from disk and returns the first 8192 bytes
|
16
|
+
of data representing the magic number header signature.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
path: path string to file.
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
First 8192 bytes of the file content as bytearray type.
|
23
|
+
"""
|
24
|
+
with open(path, 'rb') as fp:
|
25
|
+
return bytearray(fp.read(_NUM_SIGNATURE_BYTES))
|
26
|
+
|
27
|
+
|
28
|
+
def signature(array):
|
29
|
+
"""
|
30
|
+
Returns the first 8192 bytes of the given bytearray
|
31
|
+
as part of the file header signature.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
array: bytearray to extract the header signature.
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
First 8192 bytes of the file content as bytearray type.
|
38
|
+
"""
|
39
|
+
length = len(array)
|
40
|
+
index = _NUM_SIGNATURE_BYTES if length > _NUM_SIGNATURE_BYTES else length
|
41
|
+
|
42
|
+
return array[:index]
|
43
|
+
|
44
|
+
|
45
|
+
def get_bytes(obj):
|
46
|
+
"""
|
47
|
+
Infers the input type and reads the first 8192 bytes,
|
48
|
+
returning a sliced bytearray.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
obj: path to readable, file-like object(with read() method), bytes,
|
52
|
+
bytearray or memoryview
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
First 8192 bytes of the file content as bytearray type.
|
56
|
+
|
57
|
+
Raises:
|
58
|
+
TypeError: if obj is not a supported type.
|
59
|
+
"""
|
60
|
+
if isinstance(obj, bytearray):
|
61
|
+
return signature(obj)
|
62
|
+
|
63
|
+
if isinstance(obj, str):
|
64
|
+
return get_signature_bytes(obj)
|
65
|
+
|
66
|
+
if isinstance(obj, bytes):
|
67
|
+
return signature(obj)
|
68
|
+
|
69
|
+
if isinstance(obj, memoryview):
|
70
|
+
return bytearray(signature(obj).tolist())
|
71
|
+
|
72
|
+
if isinstance(obj, pathlib.PurePath):
|
73
|
+
return get_signature_bytes(obj)
|
74
|
+
|
75
|
+
if hasattr(obj, 'read'):
|
76
|
+
if hasattr(obj, 'tell') and hasattr(obj, 'seek'):
|
77
|
+
start_pos = obj.tell()
|
78
|
+
obj.seek(0)
|
79
|
+
magic_bytes = obj.read(_NUM_SIGNATURE_BYTES)
|
80
|
+
obj.seek(start_pos)
|
81
|
+
return get_bytes(magic_bytes)
|
82
|
+
return get_bytes(obj.read(_NUM_SIGNATURE_BYTES))
|
83
|
+
|
84
|
+
raise TypeError('Unsupported type as file input: %s' % type(obj))
|