chatterer 0.1.24__py3-none-any.whl → 0.1.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. chatterer/__init__.py +87 -93
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/examples/__main__.py +75 -75
  5. chatterer/examples/any2md.py +85 -85
  6. chatterer/examples/pdf2md.py +338 -338
  7. chatterer/examples/pdf2txt.py +54 -54
  8. chatterer/examples/ppt.py +486 -486
  9. chatterer/examples/pw.py +143 -137
  10. chatterer/examples/snippet.py +56 -55
  11. chatterer/examples/transcribe.py +192 -112
  12. chatterer/examples/upstage.py +89 -89
  13. chatterer/examples/web2md.py +80 -66
  14. chatterer/interactive.py +354 -354
  15. chatterer/language_model.py +536 -536
  16. chatterer/messages.py +21 -21
  17. chatterer/tools/__init__.py +46 -46
  18. chatterer/tools/caption_markdown_images.py +384 -384
  19. chatterer/tools/citation_chunking/__init__.py +3 -3
  20. chatterer/tools/citation_chunking/chunks.py +53 -53
  21. chatterer/tools/citation_chunking/citation_chunker.py +118 -118
  22. chatterer/tools/citation_chunking/citations.py +285 -285
  23. chatterer/tools/citation_chunking/prompt.py +157 -157
  24. chatterer/tools/citation_chunking/reference.py +26 -26
  25. chatterer/tools/citation_chunking/utils.py +138 -138
  26. chatterer/tools/convert_pdf_to_markdown.py +645 -625
  27. chatterer/tools/convert_to_text.py +446 -446
  28. chatterer/tools/upstage_document_parser.py +705 -705
  29. chatterer/tools/webpage_to_markdown.py +739 -739
  30. chatterer/tools/youtube.py +146 -146
  31. chatterer/utils/__init__.py +15 -15
  32. chatterer/utils/base64_image.py +350 -285
  33. chatterer/utils/bytesio.py +59 -59
  34. chatterer/utils/code_agent.py +237 -237
  35. chatterer/utils/imghdr.py +145 -148
  36. {chatterer-0.1.24.dist-info → chatterer-0.1.26.dist-info}/METADATA +390 -389
  37. chatterer-0.1.26.dist-info/RECORD +42 -0
  38. chatterer/strategies/__init__.py +0 -13
  39. chatterer/strategies/atom_of_thoughts.py +0 -975
  40. chatterer/strategies/base.py +0 -14
  41. chatterer-0.1.24.dist-info/RECORD +0 -45
  42. {chatterer-0.1.24.dist-info → chatterer-0.1.26.dist-info}/WHEEL +0 -0
  43. {chatterer-0.1.24.dist-info → chatterer-0.1.26.dist-info}/entry_points.txt +0 -0
  44. {chatterer-0.1.24.dist-info → chatterer-0.1.26.dist-info}/top_level.txt +0 -0
chatterer/utils/imghdr.py CHANGED
@@ -1,148 +1,145 @@
1
- """
2
- Recognize image file formats based on their first few bytes (base64-encoded).
3
- Originally derived from Python's imghdr, modified for base64 inputs.
4
- """
5
-
6
- import base64
7
- import math
8
- from typing import Callable, List, Literal, Optional
9
-
10
- ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
11
-
12
- tests: List[Callable[[bytes], Optional[ImageType]]] = []
13
-
14
-
15
- def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
16
- tests.append(func)
17
- return func
18
-
19
-
20
- def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
21
- needed_chars = math.ceil(prefix_bytes * 4 / 3)
22
- truncated_data = b64_data[:needed_chars]
23
-
24
- try:
25
- return base64.b64decode(truncated_data)
26
- except Exception:
27
- return base64.b64decode(b64_data)
28
-
29
-
30
- def what(b64_data: str) -> Optional[ImageType]:
31
- """
32
- base64 인코딩된 문자열에 포함된 이미지의 타입을 반환한다.
33
-
34
- :param b64_data: 이미지 데이터를 담은 base64 문자열.
35
- :return: 이미지 포맷 문자열 (예: "jpeg", "png", "gif", 등) 또는 인식되지 않으면 None.
36
- """
37
- h: bytes = decode_prefix(b64_data, prefix_bytes=32)
38
-
39
- for tf in tests:
40
- res = tf(h)
41
- if res:
42
- return res
43
- return None
44
-
45
-
46
- # --- 테스트 함수들 --- #
47
-
48
-
49
- @register_test
50
- def test_jpeg(h: bytes) -> Optional[ImageType]:
51
- if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
52
- return "jpeg"
53
- elif h.startswith(b"\xff\xd8\xff\xdb"):
54
- return "jpeg"
55
- return None
56
-
57
-
58
- @register_test
59
- def test_png(h: bytes) -> Optional[ImageType]:
60
- if h.startswith(b"\x89PNG\r\n\x1a\n"):
61
- return "png"
62
- return None
63
-
64
-
65
- @register_test
66
- def test_gif(h: bytes) -> Optional[ImageType]:
67
- if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
68
- return "gif"
69
- return None
70
-
71
-
72
- @register_test
73
- def test_tiff(h: bytes) -> Optional[ImageType]:
74
- if h[:2] in (b"MM", b"II"):
75
- return "tiff"
76
- return None
77
-
78
-
79
- @register_test
80
- def test_rgb(h: bytes) -> Optional[ImageType]:
81
- if h.startswith(b"\x01\xda"):
82
- return "rgb"
83
- return None
84
-
85
-
86
- @register_test
87
- def test_pbm(h: bytes) -> Optional[ImageType]:
88
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
89
- return "pbm"
90
- return None
91
-
92
-
93
- @register_test
94
- def test_pgm(h: bytes) -> Optional[ImageType]:
95
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
96
- return "pgm"
97
- return None
98
-
99
-
100
- @register_test
101
- def test_ppm(h: bytes) -> Optional[ImageType]:
102
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
103
- return "ppm"
104
- return None
105
-
106
-
107
- @register_test
108
- def test_rast(h: bytes) -> Optional[ImageType]:
109
- if h.startswith(b"\x59\xa6\x6a\x95"):
110
- return "rast"
111
- return None
112
-
113
-
114
- @register_test
115
- def test_xbm(h: bytes) -> Optional[ImageType]:
116
- if h.startswith(b"#define "):
117
- return "xbm"
118
- return None
119
-
120
-
121
- @register_test
122
- def test_bmp(h: bytes) -> Optional[ImageType]:
123
- if h.startswith(b"BM"):
124
- return "bmp"
125
- return None
126
-
127
-
128
- @register_test
129
- def test_webp(h: bytes) -> Optional[ImageType]:
130
- if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
131
- return "webp"
132
- return None
133
-
134
-
135
- @register_test
136
- def test_exr(h: bytes) -> Optional[ImageType]:
137
- if h.startswith(b"\x76\x2f\x31\x01"):
138
- return "exr"
139
- return None
140
-
141
-
142
- if __name__ == "__main__":
143
- example_png_base64 = (
144
- "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
145
- )
146
-
147
- fmt = what(example_png_base64)
148
- print(f"Detected format: {fmt}") # Expected: png
1
+ """
2
+ Recognize image file formats based on their first few bytes (base64-encoded).
3
+ Originally derived from Python's imghdr, modified for base64 inputs.
4
+ """
5
+
6
+ import base64
7
+ import math
8
+ from typing import Callable, List, Literal, Optional
9
+
10
+ ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
11
+
12
+ tests: List[Callable[[bytes], Optional[ImageType]]] = []
13
+
14
+
15
+ def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
16
+ tests.append(func)
17
+ return func
18
+
19
+
20
+ def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
21
+ needed_chars = math.ceil(prefix_bytes * 4 / 3)
22
+ truncated_data = b64_data[:needed_chars]
23
+
24
+ try:
25
+ return base64.b64decode(truncated_data)
26
+ except Exception:
27
+ return base64.b64decode(b64_data)
28
+
29
+
30
+ def what(b64_or_bytes: str | bytes, prefix_bytes: int = 32) -> Optional[ImageType]:
31
+ if isinstance(b64_or_bytes, str):
32
+ h: bytes = decode_prefix(b64_or_bytes, prefix_bytes=prefix_bytes)
33
+ else:
34
+ h = b64_or_bytes
35
+
36
+ for tf in tests:
37
+ res = tf(h)
38
+ if res:
39
+ return res
40
+ return None
41
+
42
+
43
+ # --- 테스트 함수들 --- #
44
+
45
+
46
+ @register_test
47
+ def test_jpeg(h: bytes) -> Optional[ImageType]:
48
+ if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
49
+ return "jpeg"
50
+ elif h.startswith(b"\xff\xd8\xff\xdb"):
51
+ return "jpeg"
52
+ return None
53
+
54
+
55
+ @register_test
56
+ def test_png(h: bytes) -> Optional[ImageType]:
57
+ if h.startswith(b"\x89PNG\r\n\x1a\n"):
58
+ return "png"
59
+ return None
60
+
61
+
62
+ @register_test
63
+ def test_gif(h: bytes) -> Optional[ImageType]:
64
+ if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
65
+ return "gif"
66
+ return None
67
+
68
+
69
+ @register_test
70
+ def test_tiff(h: bytes) -> Optional[ImageType]:
71
+ if h[:2] in (b"MM", b"II"):
72
+ return "tiff"
73
+ return None
74
+
75
+
76
+ @register_test
77
+ def test_rgb(h: bytes) -> Optional[ImageType]:
78
+ if h.startswith(b"\x01\xda"):
79
+ return "rgb"
80
+ return None
81
+
82
+
83
+ @register_test
84
+ def test_pbm(h: bytes) -> Optional[ImageType]:
85
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
86
+ return "pbm"
87
+ return None
88
+
89
+
90
+ @register_test
91
+ def test_pgm(h: bytes) -> Optional[ImageType]:
92
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
93
+ return "pgm"
94
+ return None
95
+
96
+
97
+ @register_test
98
+ def test_ppm(h: bytes) -> Optional[ImageType]:
99
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
100
+ return "ppm"
101
+ return None
102
+
103
+
104
+ @register_test
105
+ def test_rast(h: bytes) -> Optional[ImageType]:
106
+ if h.startswith(b"\x59\xa6\x6a\x95"):
107
+ return "rast"
108
+ return None
109
+
110
+
111
+ @register_test
112
+ def test_xbm(h: bytes) -> Optional[ImageType]:
113
+ if h.startswith(b"#define "):
114
+ return "xbm"
115
+ return None
116
+
117
+
118
+ @register_test
119
+ def test_bmp(h: bytes) -> Optional[ImageType]:
120
+ if h.startswith(b"BM"):
121
+ return "bmp"
122
+ return None
123
+
124
+
125
+ @register_test
126
+ def test_webp(h: bytes) -> Optional[ImageType]:
127
+ if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
128
+ return "webp"
129
+ return None
130
+
131
+
132
+ @register_test
133
+ def test_exr(h: bytes) -> Optional[ImageType]:
134
+ if h.startswith(b"\x76\x2f\x31\x01"):
135
+ return "exr"
136
+ return None
137
+
138
+
139
+ if __name__ == "__main__":
140
+ example_png_base64 = (
141
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
142
+ )
143
+
144
+ fmt = what(example_png_base64)
145
+ print(f"Detected format: {fmt}") # Expected: png