chatterer 0.1.25__py3-none-any.whl → 0.1.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. chatterer/__init__.py +87 -97
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/constants.py +5 -0
  5. chatterer/examples/__main__.py +75 -75
  6. chatterer/examples/any2md.py +83 -85
  7. chatterer/examples/pdf2md.py +231 -338
  8. chatterer/examples/pdf2txt.py +52 -54
  9. chatterer/examples/ppt.py +487 -486
  10. chatterer/examples/pw.py +141 -143
  11. chatterer/examples/snippet.py +54 -56
  12. chatterer/examples/transcribe.py +192 -192
  13. chatterer/examples/upstage.py +87 -89
  14. chatterer/examples/web2md.py +80 -80
  15. chatterer/interactive.py +422 -354
  16. chatterer/language_model.py +530 -536
  17. chatterer/messages.py +21 -21
  18. chatterer/tools/__init__.py +46 -46
  19. chatterer/tools/caption_markdown_images.py +388 -384
  20. chatterer/tools/citation_chunking/__init__.py +3 -3
  21. chatterer/tools/citation_chunking/chunks.py +51 -53
  22. chatterer/tools/citation_chunking/citation_chunker.py +117 -118
  23. chatterer/tools/citation_chunking/citations.py +284 -285
  24. chatterer/tools/citation_chunking/prompt.py +157 -157
  25. chatterer/tools/citation_chunking/reference.py +26 -26
  26. chatterer/tools/citation_chunking/utils.py +138 -138
  27. chatterer/tools/convert_pdf_to_markdown.py +636 -645
  28. chatterer/tools/convert_to_text.py +446 -446
  29. chatterer/tools/upstage_document_parser.py +704 -705
  30. chatterer/tools/webpage_to_markdown.py +739 -739
  31. chatterer/tools/youtube.py +146 -147
  32. chatterer/utils/__init__.py +15 -15
  33. chatterer/utils/base64_image.py +349 -293
  34. chatterer/utils/bytesio.py +59 -59
  35. chatterer/utils/code_agent.py +237 -237
  36. chatterer/utils/imghdr.py +145 -148
  37. {chatterer-0.1.25.dist-info → chatterer-0.1.27.dist-info}/METADATA +377 -390
  38. chatterer-0.1.27.dist-info/RECORD +43 -0
  39. chatterer/strategies/__init__.py +0 -13
  40. chatterer/strategies/atom_of_thoughts.py +0 -975
  41. chatterer/strategies/base.py +0 -14
  42. chatterer-0.1.25.dist-info/RECORD +0 -45
  43. {chatterer-0.1.25.dist-info → chatterer-0.1.27.dist-info}/WHEEL +0 -0
  44. {chatterer-0.1.25.dist-info → chatterer-0.1.27.dist-info}/entry_points.txt +0 -0
  45. {chatterer-0.1.25.dist-info → chatterer-0.1.27.dist-info}/top_level.txt +0 -0
chatterer/utils/imghdr.py CHANGED
@@ -1,148 +1,145 @@
1
- """
2
- Recognize image file formats based on their first few bytes (base64-encoded).
3
- Originally derived from Python's imghdr, modified for base64 inputs.
4
- """
5
-
6
- import base64
7
- import math
8
- from typing import Callable, List, Literal, Optional
9
-
10
- ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
11
-
12
- tests: List[Callable[[bytes], Optional[ImageType]]] = []
13
-
14
-
15
- def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
16
- tests.append(func)
17
- return func
18
-
19
-
20
- def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
21
- needed_chars = math.ceil(prefix_bytes * 4 / 3)
22
- truncated_data = b64_data[:needed_chars]
23
-
24
- try:
25
- return base64.b64decode(truncated_data)
26
- except Exception:
27
- return base64.b64decode(b64_data)
28
-
29
-
30
- def what(b64_data: str) -> Optional[ImageType]:
31
- """
32
- base64 인코딩된 문자열에 포함된 이미지의 타입을 반환한다.
33
-
34
- :param b64_data: 이미지 데이터를 담은 base64 문자열.
35
- :return: 이미지 포맷 문자열 (예: "jpeg", "png", "gif", 등) 또는 인식되지 않으면 None.
36
- """
37
- h: bytes = decode_prefix(b64_data, prefix_bytes=32)
38
-
39
- for tf in tests:
40
- res = tf(h)
41
- if res:
42
- return res
43
- return None
44
-
45
-
46
- # --- 테스트 함수들 --- #
47
-
48
-
49
- @register_test
50
- def test_jpeg(h: bytes) -> Optional[ImageType]:
51
- if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
52
- return "jpeg"
53
- elif h.startswith(b"\xff\xd8\xff\xdb"):
54
- return "jpeg"
55
- return None
56
-
57
-
58
- @register_test
59
- def test_png(h: bytes) -> Optional[ImageType]:
60
- if h.startswith(b"\x89PNG\r\n\x1a\n"):
61
- return "png"
62
- return None
63
-
64
-
65
- @register_test
66
- def test_gif(h: bytes) -> Optional[ImageType]:
67
- if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
68
- return "gif"
69
- return None
70
-
71
-
72
- @register_test
73
- def test_tiff(h: bytes) -> Optional[ImageType]:
74
- if h[:2] in (b"MM", b"II"):
75
- return "tiff"
76
- return None
77
-
78
-
79
- @register_test
80
- def test_rgb(h: bytes) -> Optional[ImageType]:
81
- if h.startswith(b"\x01\xda"):
82
- return "rgb"
83
- return None
84
-
85
-
86
- @register_test
87
- def test_pbm(h: bytes) -> Optional[ImageType]:
88
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
89
- return "pbm"
90
- return None
91
-
92
-
93
- @register_test
94
- def test_pgm(h: bytes) -> Optional[ImageType]:
95
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
96
- return "pgm"
97
- return None
98
-
99
-
100
- @register_test
101
- def test_ppm(h: bytes) -> Optional[ImageType]:
102
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
103
- return "ppm"
104
- return None
105
-
106
-
107
- @register_test
108
- def test_rast(h: bytes) -> Optional[ImageType]:
109
- if h.startswith(b"\x59\xa6\x6a\x95"):
110
- return "rast"
111
- return None
112
-
113
-
114
- @register_test
115
- def test_xbm(h: bytes) -> Optional[ImageType]:
116
- if h.startswith(b"#define "):
117
- return "xbm"
118
- return None
119
-
120
-
121
- @register_test
122
- def test_bmp(h: bytes) -> Optional[ImageType]:
123
- if h.startswith(b"BM"):
124
- return "bmp"
125
- return None
126
-
127
-
128
- @register_test
129
- def test_webp(h: bytes) -> Optional[ImageType]:
130
- if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
131
- return "webp"
132
- return None
133
-
134
-
135
- @register_test
136
- def test_exr(h: bytes) -> Optional[ImageType]:
137
- if h.startswith(b"\x76\x2f\x31\x01"):
138
- return "exr"
139
- return None
140
-
141
-
142
- if __name__ == "__main__":
143
- example_png_base64 = (
144
- "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
145
- )
146
-
147
- fmt = what(example_png_base64)
148
- print(f"Detected format: {fmt}") # Expected: png
1
+ """
2
+ Recognize image file formats based on their first few bytes (base64-encoded).
3
+ Originally derived from Python's imghdr, modified for base64 inputs.
4
+ """
5
+
6
+ import base64
7
+ import math
8
+ from typing import Callable, List, Literal, Optional
9
+
10
+ ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
11
+
12
+ tests: List[Callable[[bytes], Optional[ImageType]]] = []
13
+
14
+
15
+ def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
16
+ tests.append(func)
17
+ return func
18
+
19
+
20
+ def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
21
+ needed_chars = math.ceil(prefix_bytes * 4 / 3)
22
+ truncated_data = b64_data[:needed_chars]
23
+
24
+ try:
25
+ return base64.b64decode(truncated_data)
26
+ except Exception:
27
+ return base64.b64decode(b64_data)
28
+
29
+
30
+ def what(b64_or_bytes: str | bytes, prefix_bytes: int = 32) -> Optional[ImageType]:
31
+ if isinstance(b64_or_bytes, str):
32
+ h: bytes = decode_prefix(b64_or_bytes, prefix_bytes=prefix_bytes)
33
+ else:
34
+ h = b64_or_bytes
35
+
36
+ for tf in tests:
37
+ res = tf(h)
38
+ if res:
39
+ return res
40
+ return None
41
+
42
+
43
+ # --- 테스트 함수들 --- #
44
+
45
+
46
+ @register_test
47
+ def test_jpeg(h: bytes) -> Optional[ImageType]:
48
+ if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
49
+ return "jpeg"
50
+ elif h.startswith(b"\xff\xd8\xff\xdb"):
51
+ return "jpeg"
52
+ return None
53
+
54
+
55
+ @register_test
56
+ def test_png(h: bytes) -> Optional[ImageType]:
57
+ if h.startswith(b"\x89PNG\r\n\x1a\n"):
58
+ return "png"
59
+ return None
60
+
61
+
62
+ @register_test
63
+ def test_gif(h: bytes) -> Optional[ImageType]:
64
+ if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
65
+ return "gif"
66
+ return None
67
+
68
+
69
+ @register_test
70
+ def test_tiff(h: bytes) -> Optional[ImageType]:
71
+ if h[:2] in (b"MM", b"II"):
72
+ return "tiff"
73
+ return None
74
+
75
+
76
+ @register_test
77
+ def test_rgb(h: bytes) -> Optional[ImageType]:
78
+ if h.startswith(b"\x01\xda"):
79
+ return "rgb"
80
+ return None
81
+
82
+
83
+ @register_test
84
+ def test_pbm(h: bytes) -> Optional[ImageType]:
85
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
86
+ return "pbm"
87
+ return None
88
+
89
+
90
+ @register_test
91
+ def test_pgm(h: bytes) -> Optional[ImageType]:
92
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
93
+ return "pgm"
94
+ return None
95
+
96
+
97
+ @register_test
98
+ def test_ppm(h: bytes) -> Optional[ImageType]:
99
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
100
+ return "ppm"
101
+ return None
102
+
103
+
104
+ @register_test
105
+ def test_rast(h: bytes) -> Optional[ImageType]:
106
+ if h.startswith(b"\x59\xa6\x6a\x95"):
107
+ return "rast"
108
+ return None
109
+
110
+
111
+ @register_test
112
+ def test_xbm(h: bytes) -> Optional[ImageType]:
113
+ if h.startswith(b"#define "):
114
+ return "xbm"
115
+ return None
116
+
117
+
118
+ @register_test
119
+ def test_bmp(h: bytes) -> Optional[ImageType]:
120
+ if h.startswith(b"BM"):
121
+ return "bmp"
122
+ return None
123
+
124
+
125
+ @register_test
126
+ def test_webp(h: bytes) -> Optional[ImageType]:
127
+ if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
128
+ return "webp"
129
+ return None
130
+
131
+
132
+ @register_test
133
+ def test_exr(h: bytes) -> Optional[ImageType]:
134
+ if h.startswith(b"\x76\x2f\x31\x01"):
135
+ return "exr"
136
+ return None
137
+
138
+
139
+ if __name__ == "__main__":
140
+ example_png_base64 = (
141
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
142
+ )
143
+
144
+ fmt = what(example_png_base64)
145
+ print(f"Detected format: {fmt}") # Expected: png