chatterer 0.1.24__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. chatterer/__init__.py +97 -93
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/examples/__main__.py +75 -75
  5. chatterer/examples/any2md.py +85 -85
  6. chatterer/examples/pdf2md.py +338 -338
  7. chatterer/examples/pdf2txt.py +54 -54
  8. chatterer/examples/ppt.py +486 -486
  9. chatterer/examples/pw.py +143 -137
  10. chatterer/examples/snippet.py +56 -55
  11. chatterer/examples/transcribe.py +192 -112
  12. chatterer/examples/upstage.py +89 -89
  13. chatterer/examples/web2md.py +80 -66
  14. chatterer/interactive.py +354 -354
  15. chatterer/language_model.py +536 -536
  16. chatterer/messages.py +21 -21
  17. chatterer/strategies/__init__.py +13 -13
  18. chatterer/strategies/atom_of_thoughts.py +975 -975
  19. chatterer/strategies/base.py +14 -14
  20. chatterer/tools/__init__.py +46 -46
  21. chatterer/tools/caption_markdown_images.py +384 -384
  22. chatterer/tools/citation_chunking/__init__.py +3 -3
  23. chatterer/tools/citation_chunking/chunks.py +53 -53
  24. chatterer/tools/citation_chunking/citation_chunker.py +118 -118
  25. chatterer/tools/citation_chunking/citations.py +285 -285
  26. chatterer/tools/citation_chunking/prompt.py +157 -157
  27. chatterer/tools/citation_chunking/reference.py +26 -26
  28. chatterer/tools/citation_chunking/utils.py +138 -138
  29. chatterer/tools/convert_pdf_to_markdown.py +645 -625
  30. chatterer/tools/convert_to_text.py +446 -446
  31. chatterer/tools/upstage_document_parser.py +705 -705
  32. chatterer/tools/webpage_to_markdown.py +739 -739
  33. chatterer/tools/youtube.py +146 -146
  34. chatterer/utils/__init__.py +15 -15
  35. chatterer/utils/base64_image.py +293 -285
  36. chatterer/utils/bytesio.py +59 -59
  37. chatterer/utils/code_agent.py +237 -237
  38. chatterer/utils/imghdr.py +148 -148
  39. {chatterer-0.1.24.dist-info → chatterer-0.1.25.dist-info}/METADATA +390 -389
  40. chatterer-0.1.25.dist-info/RECORD +45 -0
  41. chatterer-0.1.24.dist-info/RECORD +0 -45
  42. {chatterer-0.1.24.dist-info → chatterer-0.1.25.dist-info}/WHEEL +0 -0
  43. {chatterer-0.1.24.dist-info → chatterer-0.1.25.dist-info}/entry_points.txt +0 -0
  44. {chatterer-0.1.24.dist-info → chatterer-0.1.25.dist-info}/top_level.txt +0 -0
chatterer/utils/imghdr.py CHANGED
@@ -1,148 +1,148 @@
1
- """
2
- Recognize image file formats based on their first few bytes (base64-encoded).
3
- Originally derived from Python's imghdr, modified for base64 inputs.
4
- """
5
-
6
- import base64
7
- import math
8
- from typing import Callable, List, Literal, Optional
9
-
10
- ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
11
-
12
- tests: List[Callable[[bytes], Optional[ImageType]]] = []
13
-
14
-
15
- def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
16
- tests.append(func)
17
- return func
18
-
19
-
20
- def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
21
- needed_chars = math.ceil(prefix_bytes * 4 / 3)
22
- truncated_data = b64_data[:needed_chars]
23
-
24
- try:
25
- return base64.b64decode(truncated_data)
26
- except Exception:
27
- return base64.b64decode(b64_data)
28
-
29
-
30
- def what(b64_data: str) -> Optional[ImageType]:
31
- """
32
- base64 인코딩된 문자열에 포함된 이미지의 타입을 반환한다.
33
-
34
- :param b64_data: 이미지 데이터를 담은 base64 문자열.
35
- :return: 이미지 포맷 문자열 (예: "jpeg", "png", "gif", 등) 또는 인식되지 않으면 None.
36
- """
37
- h: bytes = decode_prefix(b64_data, prefix_bytes=32)
38
-
39
- for tf in tests:
40
- res = tf(h)
41
- if res:
42
- return res
43
- return None
44
-
45
-
46
- # --- 테스트 함수들 --- #
47
-
48
-
49
- @register_test
50
- def test_jpeg(h: bytes) -> Optional[ImageType]:
51
- if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
52
- return "jpeg"
53
- elif h.startswith(b"\xff\xd8\xff\xdb"):
54
- return "jpeg"
55
- return None
56
-
57
-
58
- @register_test
59
- def test_png(h: bytes) -> Optional[ImageType]:
60
- if h.startswith(b"\x89PNG\r\n\x1a\n"):
61
- return "png"
62
- return None
63
-
64
-
65
- @register_test
66
- def test_gif(h: bytes) -> Optional[ImageType]:
67
- if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
68
- return "gif"
69
- return None
70
-
71
-
72
- @register_test
73
- def test_tiff(h: bytes) -> Optional[ImageType]:
74
- if h[:2] in (b"MM", b"II"):
75
- return "tiff"
76
- return None
77
-
78
-
79
- @register_test
80
- def test_rgb(h: bytes) -> Optional[ImageType]:
81
- if h.startswith(b"\x01\xda"):
82
- return "rgb"
83
- return None
84
-
85
-
86
- @register_test
87
- def test_pbm(h: bytes) -> Optional[ImageType]:
88
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
89
- return "pbm"
90
- return None
91
-
92
-
93
- @register_test
94
- def test_pgm(h: bytes) -> Optional[ImageType]:
95
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
96
- return "pgm"
97
- return None
98
-
99
-
100
- @register_test
101
- def test_ppm(h: bytes) -> Optional[ImageType]:
102
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
103
- return "ppm"
104
- return None
105
-
106
-
107
- @register_test
108
- def test_rast(h: bytes) -> Optional[ImageType]:
109
- if h.startswith(b"\x59\xa6\x6a\x95"):
110
- return "rast"
111
- return None
112
-
113
-
114
- @register_test
115
- def test_xbm(h: bytes) -> Optional[ImageType]:
116
- if h.startswith(b"#define "):
117
- return "xbm"
118
- return None
119
-
120
-
121
- @register_test
122
- def test_bmp(h: bytes) -> Optional[ImageType]:
123
- if h.startswith(b"BM"):
124
- return "bmp"
125
- return None
126
-
127
-
128
- @register_test
129
- def test_webp(h: bytes) -> Optional[ImageType]:
130
- if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
131
- return "webp"
132
- return None
133
-
134
-
135
- @register_test
136
- def test_exr(h: bytes) -> Optional[ImageType]:
137
- if h.startswith(b"\x76\x2f\x31\x01"):
138
- return "exr"
139
- return None
140
-
141
-
142
- if __name__ == "__main__":
143
- example_png_base64 = (
144
- "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
145
- )
146
-
147
- fmt = what(example_png_base64)
148
- print(f"Detected format: {fmt}") # Expected: png
1
+ """
2
+ Recognize image file formats based on their first few bytes (base64-encoded).
3
+ Originally derived from Python's imghdr, modified for base64 inputs.
4
+ """
5
+
6
+ import base64
7
+ import math
8
+ from typing import Callable, List, Literal, Optional
9
+
10
+ ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
11
+
12
+ tests: List[Callable[[bytes], Optional[ImageType]]] = []
13
+
14
+
15
+ def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
16
+ tests.append(func)
17
+ return func
18
+
19
+
20
+ def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
21
+ needed_chars = math.ceil(prefix_bytes * 4 / 3)
22
+ truncated_data = b64_data[:needed_chars]
23
+
24
+ try:
25
+ return base64.b64decode(truncated_data)
26
+ except Exception:
27
+ return base64.b64decode(b64_data)
28
+
29
+
30
+ def what(b64_data: str) -> Optional[ImageType]:
31
+ """
32
+ base64 인코딩된 문자열에 포함된 이미지의 타입을 반환한다.
33
+
34
+ :param b64_data: 이미지 데이터를 담은 base64 문자열.
35
+ :return: 이미지 포맷 문자열 (예: "jpeg", "png", "gif", 등) 또는 인식되지 않으면 None.
36
+ """
37
+ h: bytes = decode_prefix(b64_data, prefix_bytes=32)
38
+
39
+ for tf in tests:
40
+ res = tf(h)
41
+ if res:
42
+ return res
43
+ return None
44
+
45
+
46
+ # --- 테스트 함수들 --- #
47
+
48
+
49
+ @register_test
50
+ def test_jpeg(h: bytes) -> Optional[ImageType]:
51
+ if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
52
+ return "jpeg"
53
+ elif h.startswith(b"\xff\xd8\xff\xdb"):
54
+ return "jpeg"
55
+ return None
56
+
57
+
58
+ @register_test
59
+ def test_png(h: bytes) -> Optional[ImageType]:
60
+ if h.startswith(b"\x89PNG\r\n\x1a\n"):
61
+ return "png"
62
+ return None
63
+
64
+
65
+ @register_test
66
+ def test_gif(h: bytes) -> Optional[ImageType]:
67
+ if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
68
+ return "gif"
69
+ return None
70
+
71
+
72
+ @register_test
73
+ def test_tiff(h: bytes) -> Optional[ImageType]:
74
+ if h[:2] in (b"MM", b"II"):
75
+ return "tiff"
76
+ return None
77
+
78
+
79
+ @register_test
80
+ def test_rgb(h: bytes) -> Optional[ImageType]:
81
+ if h.startswith(b"\x01\xda"):
82
+ return "rgb"
83
+ return None
84
+
85
+
86
+ @register_test
87
+ def test_pbm(h: bytes) -> Optional[ImageType]:
88
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
89
+ return "pbm"
90
+ return None
91
+
92
+
93
+ @register_test
94
+ def test_pgm(h: bytes) -> Optional[ImageType]:
95
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
96
+ return "pgm"
97
+ return None
98
+
99
+
100
+ @register_test
101
+ def test_ppm(h: bytes) -> Optional[ImageType]:
102
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
103
+ return "ppm"
104
+ return None
105
+
106
+
107
+ @register_test
108
+ def test_rast(h: bytes) -> Optional[ImageType]:
109
+ if h.startswith(b"\x59\xa6\x6a\x95"):
110
+ return "rast"
111
+ return None
112
+
113
+
114
+ @register_test
115
+ def test_xbm(h: bytes) -> Optional[ImageType]:
116
+ if h.startswith(b"#define "):
117
+ return "xbm"
118
+ return None
119
+
120
+
121
+ @register_test
122
+ def test_bmp(h: bytes) -> Optional[ImageType]:
123
+ if h.startswith(b"BM"):
124
+ return "bmp"
125
+ return None
126
+
127
+
128
+ @register_test
129
+ def test_webp(h: bytes) -> Optional[ImageType]:
130
+ if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
131
+ return "webp"
132
+ return None
133
+
134
+
135
+ @register_test
136
+ def test_exr(h: bytes) -> Optional[ImageType]:
137
+ if h.startswith(b"\x76\x2f\x31\x01"):
138
+ return "exr"
139
+ return None
140
+
141
+
142
+ if __name__ == "__main__":
143
+ example_png_base64 = (
144
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
145
+ )
146
+
147
+ fmt = what(example_png_base64)
148
+ print(f"Detected format: {fmt}") # Expected: png