chatterer 0.1.26__py3-none-any.whl → 0.1.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. chatterer/__init__.py +87 -87
  2. chatterer/common_types/__init__.py +21 -21
  3. chatterer/common_types/io.py +19 -19
  4. chatterer/constants.py +5 -0
  5. chatterer/examples/__main__.py +75 -75
  6. chatterer/examples/any2md.py +83 -85
  7. chatterer/examples/pdf2md.py +231 -338
  8. chatterer/examples/pdf2txt.py +52 -54
  9. chatterer/examples/ppt.py +487 -486
  10. chatterer/examples/pw.py +141 -143
  11. chatterer/examples/snippet.py +54 -56
  12. chatterer/examples/transcribe.py +192 -192
  13. chatterer/examples/upstage.py +87 -89
  14. chatterer/examples/web2md.py +80 -80
  15. chatterer/interactive.py +422 -354
  16. chatterer/language_model.py +530 -536
  17. chatterer/messages.py +21 -21
  18. chatterer/tools/__init__.py +46 -46
  19. chatterer/tools/caption_markdown_images.py +388 -384
  20. chatterer/tools/citation_chunking/__init__.py +3 -3
  21. chatterer/tools/citation_chunking/chunks.py +51 -53
  22. chatterer/tools/citation_chunking/citation_chunker.py +117 -118
  23. chatterer/tools/citation_chunking/citations.py +284 -285
  24. chatterer/tools/citation_chunking/prompt.py +157 -157
  25. chatterer/tools/citation_chunking/reference.py +26 -26
  26. chatterer/tools/citation_chunking/utils.py +138 -138
  27. chatterer/tools/convert_pdf_to_markdown.py +636 -645
  28. chatterer/tools/convert_to_text.py +446 -446
  29. chatterer/tools/upstage_document_parser.py +704 -705
  30. chatterer/tools/webpage_to_markdown.py +739 -739
  31. chatterer/tools/youtube.py +146 -147
  32. chatterer/utils/__init__.py +15 -15
  33. chatterer/utils/base64_image.py +349 -350
  34. chatterer/utils/bytesio.py +59 -59
  35. chatterer/utils/code_agent.py +237 -237
  36. chatterer/utils/imghdr.py +145 -145
  37. {chatterer-0.1.26.dist-info → chatterer-0.1.27.dist-info}/METADATA +377 -390
  38. chatterer-0.1.27.dist-info/RECORD +43 -0
  39. chatterer-0.1.26.dist-info/RECORD +0 -42
  40. {chatterer-0.1.26.dist-info → chatterer-0.1.27.dist-info}/WHEEL +0 -0
  41. {chatterer-0.1.26.dist-info → chatterer-0.1.27.dist-info}/entry_points.txt +0 -0
  42. {chatterer-0.1.26.dist-info → chatterer-0.1.27.dist-info}/top_level.txt +0 -0
chatterer/utils/imghdr.py CHANGED
@@ -1,145 +1,145 @@
1
- """
2
- Recognize image file formats based on their first few bytes (base64-encoded).
3
- Originally derived from Python's imghdr, modified for base64 inputs.
4
- """
5
-
6
- import base64
7
- import math
8
- from typing import Callable, List, Literal, Optional
9
-
10
- ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
11
-
12
- tests: List[Callable[[bytes], Optional[ImageType]]] = []
13
-
14
-
15
- def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
16
- tests.append(func)
17
- return func
18
-
19
-
20
- def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
21
- needed_chars = math.ceil(prefix_bytes * 4 / 3)
22
- truncated_data = b64_data[:needed_chars]
23
-
24
- try:
25
- return base64.b64decode(truncated_data)
26
- except Exception:
27
- return base64.b64decode(b64_data)
28
-
29
-
30
- def what(b64_or_bytes: str | bytes, prefix_bytes: int = 32) -> Optional[ImageType]:
31
- if isinstance(b64_or_bytes, str):
32
- h: bytes = decode_prefix(b64_or_bytes, prefix_bytes=prefix_bytes)
33
- else:
34
- h = b64_or_bytes
35
-
36
- for tf in tests:
37
- res = tf(h)
38
- if res:
39
- return res
40
- return None
41
-
42
-
43
- # --- 테스트 함수들 --- #
44
-
45
-
46
- @register_test
47
- def test_jpeg(h: bytes) -> Optional[ImageType]:
48
- if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
49
- return "jpeg"
50
- elif h.startswith(b"\xff\xd8\xff\xdb"):
51
- return "jpeg"
52
- return None
53
-
54
-
55
- @register_test
56
- def test_png(h: bytes) -> Optional[ImageType]:
57
- if h.startswith(b"\x89PNG\r\n\x1a\n"):
58
- return "png"
59
- return None
60
-
61
-
62
- @register_test
63
- def test_gif(h: bytes) -> Optional[ImageType]:
64
- if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
65
- return "gif"
66
- return None
67
-
68
-
69
- @register_test
70
- def test_tiff(h: bytes) -> Optional[ImageType]:
71
- if h[:2] in (b"MM", b"II"):
72
- return "tiff"
73
- return None
74
-
75
-
76
- @register_test
77
- def test_rgb(h: bytes) -> Optional[ImageType]:
78
- if h.startswith(b"\x01\xda"):
79
- return "rgb"
80
- return None
81
-
82
-
83
- @register_test
84
- def test_pbm(h: bytes) -> Optional[ImageType]:
85
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
86
- return "pbm"
87
- return None
88
-
89
-
90
- @register_test
91
- def test_pgm(h: bytes) -> Optional[ImageType]:
92
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
93
- return "pgm"
94
- return None
95
-
96
-
97
- @register_test
98
- def test_ppm(h: bytes) -> Optional[ImageType]:
99
- if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
100
- return "ppm"
101
- return None
102
-
103
-
104
- @register_test
105
- def test_rast(h: bytes) -> Optional[ImageType]:
106
- if h.startswith(b"\x59\xa6\x6a\x95"):
107
- return "rast"
108
- return None
109
-
110
-
111
- @register_test
112
- def test_xbm(h: bytes) -> Optional[ImageType]:
113
- if h.startswith(b"#define "):
114
- return "xbm"
115
- return None
116
-
117
-
118
- @register_test
119
- def test_bmp(h: bytes) -> Optional[ImageType]:
120
- if h.startswith(b"BM"):
121
- return "bmp"
122
- return None
123
-
124
-
125
- @register_test
126
- def test_webp(h: bytes) -> Optional[ImageType]:
127
- if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
128
- return "webp"
129
- return None
130
-
131
-
132
- @register_test
133
- def test_exr(h: bytes) -> Optional[ImageType]:
134
- if h.startswith(b"\x76\x2f\x31\x01"):
135
- return "exr"
136
- return None
137
-
138
-
139
- if __name__ == "__main__":
140
- example_png_base64 = (
141
- "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
142
- )
143
-
144
- fmt = what(example_png_base64)
145
- print(f"Detected format: {fmt}") # Expected: png
1
+ """
2
+ Recognize image file formats based on their first few bytes (base64-encoded).
3
+ Originally derived from Python's imghdr, modified for base64 inputs.
4
+ """
5
+
6
+ import base64
7
+ import math
8
+ from typing import Callable, List, Literal, Optional
9
+
10
+ ImageType = Literal["jpeg", "png", "gif", "tiff", "rgb", "pbm", "pgm", "ppm", "rast", "xbm", "bmp", "webp", "exr"]
11
+
12
+ tests: List[Callable[[bytes], Optional[ImageType]]] = []
13
+
14
+
15
+ def register_test(func: Callable[[bytes], Optional[ImageType]]) -> Callable[[bytes], Optional[ImageType]]:
16
+ tests.append(func)
17
+ return func
18
+
19
+
20
+ def decode_prefix(b64_data: str, prefix_bytes: int = 32) -> bytes:
21
+ needed_chars = math.ceil(prefix_bytes * 4 / 3)
22
+ truncated_data = b64_data[:needed_chars]
23
+
24
+ try:
25
+ return base64.b64decode(truncated_data)
26
+ except Exception:
27
+ return base64.b64decode(b64_data)
28
+
29
+
30
+ def what(b64_or_bytes: str | bytes, prefix_bytes: int = 32) -> Optional[ImageType]:
31
+ if isinstance(b64_or_bytes, str):
32
+ h: bytes = decode_prefix(b64_or_bytes, prefix_bytes=prefix_bytes)
33
+ else:
34
+ h = b64_or_bytes
35
+
36
+ for tf in tests:
37
+ res = tf(h)
38
+ if res:
39
+ return res
40
+ return None
41
+
42
+
43
+ # --- 테스트 함수들 --- #
44
+
45
+
46
+ @register_test
47
+ def test_jpeg(h: bytes) -> Optional[ImageType]:
48
+ if len(h) >= 10 and h[6:10] in (b"JFIF", b"Exif"):
49
+ return "jpeg"
50
+ elif h.startswith(b"\xff\xd8\xff\xdb"):
51
+ return "jpeg"
52
+ return None
53
+
54
+
55
+ @register_test
56
+ def test_png(h: bytes) -> Optional[ImageType]:
57
+ if h.startswith(b"\x89PNG\r\n\x1a\n"):
58
+ return "png"
59
+ return None
60
+
61
+
62
+ @register_test
63
+ def test_gif(h: bytes) -> Optional[ImageType]:
64
+ if h.startswith(b"GIF87a") or h.startswith(b"GIF89a"):
65
+ return "gif"
66
+ return None
67
+
68
+
69
+ @register_test
70
+ def test_tiff(h: bytes) -> Optional[ImageType]:
71
+ if h[:2] in (b"MM", b"II"):
72
+ return "tiff"
73
+ return None
74
+
75
+
76
+ @register_test
77
+ def test_rgb(h: bytes) -> Optional[ImageType]:
78
+ if h.startswith(b"\x01\xda"):
79
+ return "rgb"
80
+ return None
81
+
82
+
83
+ @register_test
84
+ def test_pbm(h: bytes) -> Optional[ImageType]:
85
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"14" and h[2] in b" \t\n\r":
86
+ return "pbm"
87
+ return None
88
+
89
+
90
+ @register_test
91
+ def test_pgm(h: bytes) -> Optional[ImageType]:
92
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"25" and h[2] in b" \t\n\r":
93
+ return "pgm"
94
+ return None
95
+
96
+
97
+ @register_test
98
+ def test_ppm(h: bytes) -> Optional[ImageType]:
99
+ if len(h) >= 3 and h[0] == ord(b"P") and h[1] in b"36" and h[2] in b" \t\n\r":
100
+ return "ppm"
101
+ return None
102
+
103
+
104
+ @register_test
105
+ def test_rast(h: bytes) -> Optional[ImageType]:
106
+ if h.startswith(b"\x59\xa6\x6a\x95"):
107
+ return "rast"
108
+ return None
109
+
110
+
111
+ @register_test
112
+ def test_xbm(h: bytes) -> Optional[ImageType]:
113
+ if h.startswith(b"#define "):
114
+ return "xbm"
115
+ return None
116
+
117
+
118
+ @register_test
119
+ def test_bmp(h: bytes) -> Optional[ImageType]:
120
+ if h.startswith(b"BM"):
121
+ return "bmp"
122
+ return None
123
+
124
+
125
+ @register_test
126
+ def test_webp(h: bytes) -> Optional[ImageType]:
127
+ if len(h) >= 12 and h.startswith(b"RIFF") and h[8:12] == b"WEBP":
128
+ return "webp"
129
+ return None
130
+
131
+
132
+ @register_test
133
+ def test_exr(h: bytes) -> Optional[ImageType]:
134
+ if h.startswith(b"\x76\x2f\x31\x01"):
135
+ return "exr"
136
+ return None
137
+
138
+
139
+ if __name__ == "__main__":
140
+ example_png_base64 = (
141
+ "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/5+BAQAE/wH+U6az4wAAAABJRU5ErkJggg=="
142
+ )
143
+
144
+ fmt = what(example_png_base64)
145
+ print(f"Detected format: {fmt}") # Expected: png