chatterer 0.1.25__py3-none-any.whl → 0.1.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/__init__.py +87 -97
- chatterer/common_types/__init__.py +21 -21
- chatterer/common_types/io.py +19 -19
- chatterer/constants.py +5 -0
- chatterer/examples/__main__.py +75 -75
- chatterer/examples/any2md.py +83 -85
- chatterer/examples/pdf2md.py +231 -338
- chatterer/examples/pdf2txt.py +52 -54
- chatterer/examples/ppt.py +487 -486
- chatterer/examples/pw.py +141 -143
- chatterer/examples/snippet.py +54 -56
- chatterer/examples/transcribe.py +192 -192
- chatterer/examples/upstage.py +87 -89
- chatterer/examples/web2md.py +80 -80
- chatterer/interactive.py +422 -354
- chatterer/language_model.py +530 -536
- chatterer/messages.py +21 -21
- chatterer/tools/__init__.py +46 -46
- chatterer/tools/caption_markdown_images.py +388 -384
- chatterer/tools/citation_chunking/__init__.py +3 -3
- chatterer/tools/citation_chunking/chunks.py +51 -53
- chatterer/tools/citation_chunking/citation_chunker.py +117 -118
- chatterer/tools/citation_chunking/citations.py +284 -285
- chatterer/tools/citation_chunking/prompt.py +157 -157
- chatterer/tools/citation_chunking/reference.py +26 -26
- chatterer/tools/citation_chunking/utils.py +138 -138
- chatterer/tools/convert_pdf_to_markdown.py +636 -645
- chatterer/tools/convert_to_text.py +446 -446
- chatterer/tools/upstage_document_parser.py +704 -705
- chatterer/tools/webpage_to_markdown.py +739 -739
- chatterer/tools/youtube.py +146 -147
- chatterer/utils/__init__.py +15 -15
- chatterer/utils/base64_image.py +349 -293
- chatterer/utils/bytesio.py +59 -59
- chatterer/utils/code_agent.py +237 -237
- chatterer/utils/imghdr.py +145 -148
- {chatterer-0.1.25.dist-info → chatterer-0.1.27.dist-info}/METADATA +377 -390
- chatterer-0.1.27.dist-info/RECORD +43 -0
- chatterer/strategies/__init__.py +0 -13
- chatterer/strategies/atom_of_thoughts.py +0 -975
- chatterer/strategies/base.py +0 -14
- chatterer-0.1.25.dist-info/RECORD +0 -45
- {chatterer-0.1.25.dist-info → chatterer-0.1.27.dist-info}/WHEEL +0 -0
- {chatterer-0.1.25.dist-info → chatterer-0.1.27.dist-info}/entry_points.txt +0 -0
- {chatterer-0.1.25.dist-info → chatterer-0.1.27.dist-info}/top_level.txt +0 -0
chatterer/utils/imghdr.py
CHANGED
@@ -1,148 +1,145 @@
|
|
1
|
-
"""
|
2
|
-
Recognize image file formats based on their first few bytes (base64-encoded).
|
3
|
-
Originally derived from Python's imghdr, modified for base64 inputs.
|
4
|
-
"""
|
5
|
-
|
6
|
-
import base64
|
7
|
-
import math
|
8
|
-
from typing import Callable, List, Literal, Optional
|
9
|
-
|
10
|
-
ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
|
11
|
-
|
12
|
-
tests: List[Callable[[bytes], Optional[ImageType]]] = []
|
13
|
-
|
14
|
-
|
15
|
-
def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
|
16
|
-
tests.append(func)
|
17
|
-
return func
|
18
|
-
|
19
|
-
|
20
|
-
def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
|
21
|
-
needed_chars = math.ceil(prefix_bytes * 4 / 3)
|
22
|
-
truncated_data = b64_data[:needed_chars]
|
23
|
-
|
24
|
-
try:
|
25
|
-
return base64.b64decode(truncated_data)
|
26
|
-
except Exception:
|
27
|
-
return base64.b64decode(b64_data)
|
28
|
-
|
29
|
-
|
30
|
-
def what(
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
)
|
146
|
-
|
147
|
-
fmt = what(example_png_base64)
|
148
|
-
print(f"Detected format: {fmt}") # Expected: png
|
1
|
+
"""
|
2
|
+
Recognize image file formats based on their first few bytes (base64-encoded).
|
3
|
+
Originally derived from Python's imghdr, modified for base64 inputs.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import base64
|
7
|
+
import math
|
8
|
+
from typing import Callable, List, Literal, Optional
|
9
|
+
|
10
|
+
ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
|
11
|
+
|
12
|
+
tests: List[Callable[[bytes], Optional[ImageType]]] = []
|
13
|
+
|
14
|
+
|
15
|
+
def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
|
16
|
+
tests.append(func)
|
17
|
+
return func
|
18
|
+
|
19
|
+
|
20
|
+
def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
|
21
|
+
needed_chars = math.ceil(prefix_bytes * 4 / 3)
|
22
|
+
truncated_data = b64_data[:needed_chars]
|
23
|
+
|
24
|
+
try:
|
25
|
+
return base64.b64decode(truncated_data)
|
26
|
+
except Exception:
|
27
|
+
return base64.b64decode(b64_data)
|
28
|
+
|
29
|
+
|
30
|
+
def what(b64_or_bytes: str | bytes, prefix_bytes: int = 32) -> Optional[ImageType]:
|
31
|
+
if isinstance(b64_or_bytes, str):
|
32
|
+
h: bytes = decode_prefix(b64_or_bytes, prefix_bytes=prefix_bytes)
|
33
|
+
else:
|
34
|
+
h = b64_or_bytes
|
35
|
+
|
36
|
+
for tf in tests:
|
37
|
+
res = tf(h)
|
38
|
+
if res:
|
39
|
+
return res
|
40
|
+
return None
|
41
|
+
|
42
|
+
|
43
|
+
# --- 테스트 함수들 --- #
|
44
|
+
|
45
|
+
|
46
|
+
@register_test
|
47
|
+
def test_jpeg(h: bytes) -> Optional[ImageType]:
|
48
|
+
if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
|
49
|
+
return "jpeg"
|
50
|
+
elif h.startswith(b"\xff\xd8\xff\xdb"):
|
51
|
+
return "jpeg"
|
52
|
+
return None
|
53
|
+
|
54
|
+
|
55
|
+
@register_test
|
56
|
+
def test_png(h: bytes) -> Optional[ImageType]:
|
57
|
+
if h.startswith(b"\x89PNG\r\n\x1a\n"):
|
58
|
+
return "png"
|
59
|
+
return None
|
60
|
+
|
61
|
+
|
62
|
+
@register_test
|
63
|
+
def test_gif(h: bytes) -> Optional[ImageType]:
|
64
|
+
if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
|
65
|
+
return "gif"
|
66
|
+
return None
|
67
|
+
|
68
|
+
|
69
|
+
@register_test
|
70
|
+
def test_tiff(h: bytes) -> Optional[ImageType]:
|
71
|
+
if h[:2] in (b"MM", b"II"):
|
72
|
+
return "tiff"
|
73
|
+
return None
|
74
|
+
|
75
|
+
|
76
|
+
@register_test
|
77
|
+
def test_rgb(h: bytes) -> Optional[ImageType]:
|
78
|
+
if h.startswith(b"\x01\xda"):
|
79
|
+
return "rgb"
|
80
|
+
return None
|
81
|
+
|
82
|
+
|
83
|
+
@register_test
|
84
|
+
def test_pbm(h: bytes) -> Optional[ImageType]:
|
85
|
+
if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
|
86
|
+
return "pbm"
|
87
|
+
return None
|
88
|
+
|
89
|
+
|
90
|
+
@register_test
|
91
|
+
def test_pgm(h: bytes) -> Optional[ImageType]:
|
92
|
+
if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
|
93
|
+
return "pgm"
|
94
|
+
return None
|
95
|
+
|
96
|
+
|
97
|
+
@register_test
|
98
|
+
def test_ppm(h: bytes) -> Optional[ImageType]:
|
99
|
+
if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
|
100
|
+
return "ppm"
|
101
|
+
return None
|
102
|
+
|
103
|
+
|
104
|
+
@register_test
|
105
|
+
def test_rast(h: bytes) -> Optional[ImageType]:
|
106
|
+
if h.startswith(b"\x59\xa6\x6a\x95"):
|
107
|
+
return "rast"
|
108
|
+
return None
|
109
|
+
|
110
|
+
|
111
|
+
@register_test
|
112
|
+
def test_xbm(h: bytes) -> Optional[ImageType]:
|
113
|
+
if h.startswith(b"#define "):
|
114
|
+
return "xbm"
|
115
|
+
return None
|
116
|
+
|
117
|
+
|
118
|
+
@register_test
|
119
|
+
def test_bmp(h: bytes) -> Optional[ImageType]:
|
120
|
+
if h.startswith(b"BM"):
|
121
|
+
return "bmp"
|
122
|
+
return None
|
123
|
+
|
124
|
+
|
125
|
+
@register_test
|
126
|
+
def test_webp(h: bytes) -> Optional[ImageType]:
|
127
|
+
if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
|
128
|
+
return "webp"
|
129
|
+
return None
|
130
|
+
|
131
|
+
|
132
|
+
@register_test
|
133
|
+
def test_exr(h: bytes) -> Optional[ImageType]:
|
134
|
+
if h.startswith(b"\x76\x2f\x31\x01"):
|
135
|
+
return "exr"
|
136
|
+
return None
|
137
|
+
|
138
|
+
|
139
|
+
if __name__ == "__main__":
|
140
|
+
example_png_base64 = (
|
141
|
+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
|
142
|
+
)
|
143
|
+
|
144
|
+
fmt = what(example_png_base64)
|
145
|
+
print(f"Detected format: {fmt}") # Expected: png
|