chatterer 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/__init__.py +39 -27
- chatterer/language_model.py +371 -327
- chatterer/messages.py +8 -0
- chatterer/strategies/__init__.py +13 -13
- chatterer/strategies/atom_of_thoughts.py +975 -976
- chatterer/strategies/base.py +14 -14
- chatterer/tools/__init__.py +17 -15
- chatterer/tools/citation_chunking/__init__.py +3 -0
- chatterer/tools/citation_chunking/chunks.py +53 -0
- chatterer/tools/citation_chunking/citation_chunker.py +118 -0
- chatterer/tools/citation_chunking/citations.py +285 -0
- chatterer/tools/citation_chunking/prompt.py +157 -0
- chatterer/tools/citation_chunking/reference.py +26 -0
- chatterer/tools/citation_chunking/utils.py +138 -0
- chatterer/tools/convert_to_text.py +466 -464
- chatterer/tools/webpage_to_markdown/__init__.py +4 -4
- chatterer/tools/webpage_to_markdown/playwright_bot.py +649 -631
- chatterer/tools/webpage_to_markdown/utils.py +329 -556
- chatterer/utils/image.py +284 -0
- {chatterer-0.1.6.dist-info → chatterer-0.1.7.dist-info}/METADATA +166 -166
- chatterer-0.1.7.dist-info/RECORD +24 -0
- {chatterer-0.1.6.dist-info → chatterer-0.1.7.dist-info}/WHEEL +1 -1
- chatterer-0.1.6.dist-info/RECORD +0 -15
- {chatterer-0.1.6.dist-info → chatterer-0.1.7.dist-info}/top_level.txt +0 -0
chatterer/utils/image.py
ADDED
@@ -0,0 +1,284 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import re
|
4
|
+
from base64 import b64encode
|
5
|
+
from io import BytesIO
|
6
|
+
from pathlib import Path
|
7
|
+
from traceback import print_exc
|
8
|
+
from typing import (
|
9
|
+
Awaitable,
|
10
|
+
ClassVar,
|
11
|
+
Literal,
|
12
|
+
NotRequired,
|
13
|
+
Optional,
|
14
|
+
Self,
|
15
|
+
Sequence,
|
16
|
+
TypeAlias,
|
17
|
+
TypedDict,
|
18
|
+
TypeGuard,
|
19
|
+
cast,
|
20
|
+
get_args,
|
21
|
+
overload,
|
22
|
+
)
|
23
|
+
from urllib.parse import urlparse
|
24
|
+
|
25
|
+
import requests
|
26
|
+
from aiohttp import ClientSession
|
27
|
+
from PIL.Image import Resampling
|
28
|
+
from PIL.Image import open as image_open
|
29
|
+
from pydantic import BaseModel
|
30
|
+
|
31
|
+
ImageType: TypeAlias = Literal["jpeg", "jpg", "png", "gif", "webp", "bmp"]
|
32
|
+
|
33
|
+
|
34
|
+
class ImageProcessingConfig(TypedDict):
|
35
|
+
"""
|
36
|
+
이미지 필터링/변환 시 사용할 설정.
|
37
|
+
- formats: (Sequence[str]) 허용할 이미지 포맷(소문자, 예: ["jpeg", "png", "webp"]).
|
38
|
+
- max_size_mb: (float) 이미지 용량 상한(MB). 초과 시 제외.
|
39
|
+
- min_largest_side: (int) 가로나 세로 중 가장 큰 변의 최소 크기. 미만 시 제외.
|
40
|
+
- resize_if_min_side_exceeds: (int) 가로나 세로 중 작은 변이 이 값 이상이면 리스케일.
|
41
|
+
- resize_target_for_min_side: (int) 리스케일시, '가장 작은 변'을 이 값으로 줄임(비율 유지는 Lanczos).
|
42
|
+
"""
|
43
|
+
|
44
|
+
formats: Sequence[ImageType]
|
45
|
+
max_size_mb: NotRequired[float]
|
46
|
+
min_largest_side: NotRequired[int]
|
47
|
+
resize_if_min_side_exceeds: NotRequired[int]
|
48
|
+
resize_target_for_min_side: NotRequired[int]
|
49
|
+
|
50
|
+
|
51
|
+
def get_default_image_processing_config() -> ImageProcessingConfig:
|
52
|
+
return {
|
53
|
+
"max_size_mb": 5,
|
54
|
+
"min_largest_side": 200,
|
55
|
+
"resize_if_min_side_exceeds": 2000,
|
56
|
+
"resize_target_for_min_side": 1000,
|
57
|
+
"formats": ["png", "jpeg", "gif", "bmp", "webp"],
|
58
|
+
}
|
59
|
+
|
60
|
+
|
61
|
+
class Base64Image(BaseModel):
|
62
|
+
ext: ImageType
|
63
|
+
data: str
|
64
|
+
|
65
|
+
IMAGE_TYPES: ClassVar[tuple[str, ...]] = tuple(map(str, get_args(ImageType)))
|
66
|
+
IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
|
67
|
+
rf"data:image/({'|'.join(IMAGE_TYPES)});base64,[A-Za-z0-9+/]+={0, 2}$"
|
68
|
+
)
|
69
|
+
|
70
|
+
def __hash__(self) -> int:
|
71
|
+
return hash((self.ext, self.data))
|
72
|
+
|
73
|
+
def model_post_init(self, __context: object) -> None:
|
74
|
+
if self.ext == "jpg":
|
75
|
+
self.ext = "jpeg"
|
76
|
+
|
77
|
+
@classmethod
|
78
|
+
def from_string(cls, data: str) -> Optional[Self]:
|
79
|
+
match = cls.IMAGE_PATTERN.fullmatch(data)
|
80
|
+
if not match:
|
81
|
+
return None
|
82
|
+
return cls(ext=cast(ImageType, match.group(1)), data=match.group(2))
|
83
|
+
|
84
|
+
@overload
|
85
|
+
@classmethod
|
86
|
+
def from_url_or_path(
|
87
|
+
cls,
|
88
|
+
url_or_path: str,
|
89
|
+
*,
|
90
|
+
headers: dict[str, str] = ...,
|
91
|
+
config: ImageProcessingConfig = ...,
|
92
|
+
return_coro: Literal[True],
|
93
|
+
) -> Awaitable[Optional[Self]]: ...
|
94
|
+
|
95
|
+
@overload
|
96
|
+
@classmethod
|
97
|
+
def from_url_or_path(
|
98
|
+
cls,
|
99
|
+
url_or_path: str,
|
100
|
+
*,
|
101
|
+
headers: dict[str, str] = ...,
|
102
|
+
config: ImageProcessingConfig = ...,
|
103
|
+
return_coro: Literal[False] = False,
|
104
|
+
) -> Optional[Self]: ...
|
105
|
+
|
106
|
+
@classmethod
|
107
|
+
def from_url_or_path(
|
108
|
+
cls,
|
109
|
+
url_or_path: str,
|
110
|
+
*,
|
111
|
+
headers: dict[str, str] = {},
|
112
|
+
config: ImageProcessingConfig = get_default_image_processing_config(),
|
113
|
+
return_coro: bool = False,
|
114
|
+
) -> Optional[Self] | Awaitable[Optional[Self]]:
|
115
|
+
"""Return a Base64Image instance from a URL or local file path."""
|
116
|
+
if maybe_base64 := cls.from_string(url_or_path):
|
117
|
+
return maybe_base64
|
118
|
+
elif _is_remote_url(url_or_path):
|
119
|
+
if return_coro:
|
120
|
+
return cls._afetch_remote_image(url_or_path, headers, config)
|
121
|
+
return cls._fetch_remote_image(url_or_path, headers, config)
|
122
|
+
return cls._process_local_image(Path(url_or_path), config)
|
123
|
+
|
124
|
+
@property
|
125
|
+
def data_uri(self) -> str:
|
126
|
+
return f"data:image/{self.ext.replace('jpg', 'jpeg')};base64,{self.data}"
|
127
|
+
|
128
|
+
@property
|
129
|
+
def data_uri_content(self) -> dict[Literal["type", "image_url"], Literal["image_url"] | dict[Literal["url"], str]]:
|
130
|
+
return {"type": "image_url", "image_url": {"url": self.data_uri}}
|
131
|
+
|
132
|
+
@staticmethod
|
133
|
+
def _verify_ext(ext: str, allowed_types: Sequence[ImageType]) -> TypeGuard[ImageType]:
|
134
|
+
return ext in allowed_types
|
135
|
+
|
136
|
+
@classmethod
|
137
|
+
def _fetch_remote_image(cls, url: str, headers: dict[str, str], config: ImageProcessingConfig) -> Optional[Self]:
|
138
|
+
image_bytes = _get_image_bytes(image_url=url.strip(), headers=headers)
|
139
|
+
if not image_bytes:
|
140
|
+
return None
|
141
|
+
return cls._convert_image_into_base64(image_bytes, config)
|
142
|
+
|
143
|
+
@classmethod
|
144
|
+
async def _afetch_remote_image(
|
145
|
+
cls, url: str, headers: dict[str, str], config: ImageProcessingConfig
|
146
|
+
) -> Optional[Self]:
|
147
|
+
image_bytes = await _aget_image_bytes(image_url=url.strip(), headers=headers)
|
148
|
+
if not image_bytes:
|
149
|
+
return None
|
150
|
+
return cls._convert_image_into_base64(image_bytes, config)
|
151
|
+
|
152
|
+
@classmethod
|
153
|
+
def _convert_image_into_base64(cls, image_data: bytes, config: Optional[ImageProcessingConfig]) -> Optional[Self]:
|
154
|
+
"""
|
155
|
+
Retrieve an image in bytes and return a base64-encoded data URL,
|
156
|
+
applying dynamic rules from 'config'.
|
157
|
+
"""
|
158
|
+
if not config:
|
159
|
+
# config 없으면 그냥 기존 헤더만 보고 돌려주는 간단 로직
|
160
|
+
return cls._simple_base64_encode(image_data)
|
161
|
+
|
162
|
+
# 1) 용량 검사
|
163
|
+
max_size_mb = config.get("max_size_mb", float("inf"))
|
164
|
+
image_size_mb = len(image_data) / (1024 * 1024)
|
165
|
+
if image_size_mb > max_size_mb:
|
166
|
+
print(f"Image too large: {image_size_mb:.2f} MB > {max_size_mb} MB")
|
167
|
+
return None
|
168
|
+
|
169
|
+
# 2) Pillow로 이미지 열기
|
170
|
+
try:
|
171
|
+
with image_open(BytesIO(image_data)) as im:
|
172
|
+
w, h = im.size
|
173
|
+
# 가장 큰 변
|
174
|
+
largest_side = max(w, h)
|
175
|
+
# 가장 작은 변
|
176
|
+
smallest_side = min(w, h)
|
177
|
+
|
178
|
+
# min_largest_side 기준
|
179
|
+
min_largest_side = config.get("min_largest_side", 1)
|
180
|
+
if largest_side < min_largest_side:
|
181
|
+
print(f"Image too small: {largest_side} < {min_largest_side}")
|
182
|
+
return None
|
183
|
+
|
184
|
+
# resize 로직
|
185
|
+
resize_if_min_side_exceeds = config.get("resize_if_min_side_exceeds", float("inf"))
|
186
|
+
if smallest_side >= resize_if_min_side_exceeds:
|
187
|
+
# resize_target_for_min_side 로 축소
|
188
|
+
resize_target = config.get("resize_target_for_min_side", 1000)
|
189
|
+
ratio = resize_target / float(smallest_side)
|
190
|
+
new_w = int(w * ratio)
|
191
|
+
new_h = int(h * ratio)
|
192
|
+
im = im.resize((new_w, new_h), Resampling.LANCZOS)
|
193
|
+
|
194
|
+
# 포맷 제한
|
195
|
+
# PIL이 인식한 포맷이 대문자(JPEG)일 수 있으므로 소문자로
|
196
|
+
pil_format: str = (im.format or "").lower()
|
197
|
+
allowed_formats: Sequence[ImageType] = config.get("formats", [])
|
198
|
+
if not cls._verify_ext(pil_format, allowed_formats):
|
199
|
+
print(f"Invalid format: {pil_format} not in {allowed_formats}")
|
200
|
+
return None
|
201
|
+
|
202
|
+
# 다시 bytes 로 저장
|
203
|
+
output_buffer = BytesIO()
|
204
|
+
im.save(output_buffer, format=pil_format.upper()) # PIL에 맞춰서 대문자로
|
205
|
+
output_buffer.seek(0)
|
206
|
+
final_bytes = output_buffer.read()
|
207
|
+
|
208
|
+
except Exception:
|
209
|
+
print_exc()
|
210
|
+
return None
|
211
|
+
|
212
|
+
# 최종 base64 인코딩
|
213
|
+
encoded_data = b64encode(final_bytes).decode("utf-8")
|
214
|
+
return cls(ext=pil_format, data=encoded_data)
|
215
|
+
|
216
|
+
@classmethod
|
217
|
+
def _simple_base64_encode(cls, image_data: bytes) -> Optional[Self]:
|
218
|
+
"""
|
219
|
+
Retrieve an image URL and return a base64-encoded data URL.
|
220
|
+
"""
|
221
|
+
ext = _detect_image_type(image_data)
|
222
|
+
if not ext:
|
223
|
+
return
|
224
|
+
return cls(ext=ext, data=b64encode(image_data).decode("utf-8"))
|
225
|
+
|
226
|
+
@classmethod
|
227
|
+
def _process_local_image(cls, path: Path, config: ImageProcessingConfig) -> Optional[Self]:
|
228
|
+
"""로컬 파일이 존재하고 유효한 이미지 포맷이면 Base64 데이터 URL을 반환, 아니면 None."""
|
229
|
+
if not path.is_file():
|
230
|
+
return None
|
231
|
+
ext = path.suffix.lower().removeprefix(".")
|
232
|
+
if not cls._verify_ext(ext, config["formats"]):
|
233
|
+
return None
|
234
|
+
return cls(ext=ext, data=b64encode(path.read_bytes()).decode("ascii"))
|
235
|
+
|
236
|
+
|
237
|
+
def _is_remote_url(path: str) -> bool:
|
238
|
+
parsed = urlparse(path)
|
239
|
+
return bool(parsed.scheme and parsed.netloc)
|
240
|
+
|
241
|
+
|
242
|
+
def _detect_image_type(image_data: bytes) -> Optional[ImageType]:
|
243
|
+
"""
|
244
|
+
Detect the image format based on the image binary signature (header).
|
245
|
+
Only JPEG, PNG, GIF, WEBP, and BMP are handled as examples.
|
246
|
+
If the format is not recognized, return None.
|
247
|
+
"""
|
248
|
+
# JPEG: 시작 바이트가 FF D8 FF
|
249
|
+
if image_data.startswith(b"\xff\xd8\xff"):
|
250
|
+
return "jpeg"
|
251
|
+
# PNG: 시작 바이트가 89 50 4E 47 0D 0A 1A 0A
|
252
|
+
elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
|
253
|
+
return "png"
|
254
|
+
# GIF: 시작 바이트가 GIF87a 또는 GIF89a
|
255
|
+
elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
|
256
|
+
return "gif"
|
257
|
+
# WEBP: 시작 바이트가 RIFF....WEBP
|
258
|
+
elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
|
259
|
+
return "webp"
|
260
|
+
# BMP: 시작 바이트가 BM
|
261
|
+
elif image_data.startswith(b"BM"):
|
262
|
+
return "bmp"
|
263
|
+
|
264
|
+
|
265
|
+
def _get_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
|
266
|
+
try:
|
267
|
+
with requests.Session() as session:
|
268
|
+
response = session.get(image_url, headers={k: str(v) for k, v in headers.items()})
|
269
|
+
if not response.ok:
|
270
|
+
return
|
271
|
+
return bytes(response.content or b"")
|
272
|
+
except Exception:
|
273
|
+
return
|
274
|
+
|
275
|
+
|
276
|
+
async def _aget_image_bytes(image_url: str, headers: dict[str, str]) -> Optional[bytes]:
|
277
|
+
try:
|
278
|
+
async with ClientSession() as session:
|
279
|
+
async with session.get(image_url, headers={k: str(v) for k, v in headers.items()}) as response:
|
280
|
+
if not response.ok:
|
281
|
+
return
|
282
|
+
return await response.read()
|
283
|
+
except Exception:
|
284
|
+
return
|
@@ -1,166 +1,166 @@
|
|
1
|
-
Metadata-Version: 2.
|
2
|
-
Name: chatterer
|
3
|
-
Version: 0.1.
|
4
|
-
Summary: The highest-level interface for various LLM APIs.
|
5
|
-
Requires-Python: >=3.12
|
6
|
-
Description-Content-Type: text/markdown
|
7
|
-
Requires-Dist: instructor>=1.7.2
|
8
|
-
Requires-Dist: langchain>=0.3.19
|
9
|
-
Provides-Extra: dev
|
10
|
-
Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
|
11
|
-
Requires-Dist: colorama>=0.4.6; extra == "dev"
|
12
|
-
Requires-Dist: ipykernel>=6.29.5; extra == "dev"
|
13
|
-
Provides-Extra: conversion
|
14
|
-
Requires-Dist: markdownify>=1.1.0; extra == "conversion"
|
15
|
-
Requires-Dist: commonmark>=0.9.1; extra == "conversion"
|
16
|
-
Requires-Dist: playwright>=1.50.0; extra == "conversion"
|
17
|
-
Requires-Dist: pillow>=11.1.0; extra == "conversion"
|
18
|
-
Requires-Dist: mistune>=3.1.2; extra == "conversion"
|
19
|
-
Requires-Dist: markitdown>=0.0.2; extra == "conversion"
|
20
|
-
Requires-Dist: pymupdf>=1.25.4; extra == "conversion"
|
21
|
-
Provides-Extra: langchain-providers
|
22
|
-
Requires-Dist: langchain-openai>=0.3.7; extra == "langchain-providers"
|
23
|
-
Requires-Dist: langchain-anthropic>=0.3.8; extra == "langchain-providers"
|
24
|
-
Requires-Dist: langchain-google-genai>=2.0.10; extra == "langchain-providers"
|
25
|
-
Requires-Dist: langchain-ollama>=0.2.3; extra == "langchain-providers"
|
26
|
-
Provides-Extra: all
|
27
|
-
Requires-Dist: chatterer[langchain-providers]; extra == "all"
|
28
|
-
Requires-Dist: chatterer[conversion]; extra == "all"
|
29
|
-
Requires-Dist: chatterer[dev]; extra == "all"
|
30
|
-
|
31
|
-
# Chatterer
|
32
|
-
|
33
|
-
**Simplified, Structured AI Assistant Framework**
|
34
|
-
|
35
|
-
`chatterer` is a Python library designed as a type-safe LangChain wrapper for interacting with various language models (OpenAI, Anthropic, Gemini, Ollama, etc.). It supports structured outputs via Pydantic models, plain text responses, and asynchronous calls.
|
36
|
-
|
37
|
-
The structured reasoning in `chatterer` is inspired by the [Atom-of-Thought](https://github.com/qixucen/atom) pipeline.
|
38
|
-
|
39
|
-
---
|
40
|
-
|
41
|
-
## Quick Install
|
42
|
-
|
43
|
-
```bash
|
44
|
-
pip install chatterer
|
45
|
-
```
|
46
|
-
|
47
|
-
---
|
48
|
-
|
49
|
-
## Quickstart Example
|
50
|
-
|
51
|
-
Generate text quickly using OpenAI:
|
52
|
-
|
53
|
-
```python
|
54
|
-
from chatterer import Chatterer
|
55
|
-
|
56
|
-
chat = Chatterer.openai("gpt-4o-mini")
|
57
|
-
response = chat.generate("What is the meaning of life?")
|
58
|
-
print(response)
|
59
|
-
```
|
60
|
-
|
61
|
-
Messages can be input as plain strings or structured lists:
|
62
|
-
|
63
|
-
```python
|
64
|
-
response = chat.generate([{ "role": "user", "content": "What's 2+2?" }])
|
65
|
-
print(response)
|
66
|
-
```
|
67
|
-
|
68
|
-
### Structured Output with Pydantic
|
69
|
-
|
70
|
-
```python
|
71
|
-
from pydantic import BaseModel
|
72
|
-
|
73
|
-
class AnswerModel(BaseModel):
|
74
|
-
question: str
|
75
|
-
answer: str
|
76
|
-
|
77
|
-
response = chat.generate_pydantic(AnswerModel, "What's the capital of France?")
|
78
|
-
print(response.question, response.answer)
|
79
|
-
```
|
80
|
-
|
81
|
-
### Async Example
|
82
|
-
|
83
|
-
```python
|
84
|
-
import asyncio
|
85
|
-
|
86
|
-
async def main():
|
87
|
-
response = await chat.agenerate("Explain async in Python briefly.")
|
88
|
-
print(response)
|
89
|
-
|
90
|
-
asyncio.run(main())
|
91
|
-
```
|
92
|
-
|
93
|
-
---
|
94
|
-
|
95
|
-
## Atom-of-Thought Pipeline (AoT)
|
96
|
-
|
97
|
-
`AoTPipeline` provides structured reasoning by:
|
98
|
-
|
99
|
-
- Detecting question domains (general, math, coding, philosophy, multihop).
|
100
|
-
- Decomposing questions recursively.
|
101
|
-
- Generating direct, decomposition-based, and simplified answers.
|
102
|
-
- Combining answers via ensemble.
|
103
|
-
|
104
|
-
### AoT Usage Example
|
105
|
-
|
106
|
-
```python
|
107
|
-
from chatterer import Chatterer
|
108
|
-
from chatterer.strategies import AoTStrategy, AoTPipeline
|
109
|
-
|
110
|
-
pipeline = AoTPipeline(chatterer=Chatterer.openai(), max_depth=2)
|
111
|
-
strategy = AoTStrategy(pipeline=pipeline)
|
112
|
-
|
113
|
-
question = "What would Newton discover if hit by an apple falling from 100 meters?"
|
114
|
-
answer = strategy.invoke(question)
|
115
|
-
print(answer)
|
116
|
-
```
|
117
|
-
|
118
|
-
---
|
119
|
-
|
120
|
-
## Supported Models
|
121
|
-
|
122
|
-
- **OpenAI**
|
123
|
-
- **Anthropic**
|
124
|
-
- **Google Gemini**
|
125
|
-
- **Ollama** (local models)
|
126
|
-
|
127
|
-
Initialize models easily:
|
128
|
-
|
129
|
-
```python
|
130
|
-
openai_chat = Chatterer.openai("gpt-4o-mini")
|
131
|
-
anthropic_chat = Chatterer.anthropic("claude-3-7-sonnet-20250219")
|
132
|
-
gemini_chat = Chatterer.google("gemini-2.0-flash")
|
133
|
-
ollama_chat = Chatterer.ollama("deepseek-r1:1.5b")
|
134
|
-
```
|
135
|
-
|
136
|
-
---
|
137
|
-
|
138
|
-
## Advanced Features
|
139
|
-
|
140
|
-
- **Streaming responses**
|
141
|
-
- **Async/Await support**
|
142
|
-
- **Structured outputs with Pydantic models**
|
143
|
-
|
144
|
-
---
|
145
|
-
|
146
|
-
## Logging
|
147
|
-
|
148
|
-
Built-in logging for easy debugging:
|
149
|
-
|
150
|
-
```python
|
151
|
-
import logging
|
152
|
-
logging.basicConfig(level=logging.DEBUG)
|
153
|
-
```
|
154
|
-
|
155
|
-
---
|
156
|
-
|
157
|
-
## Contributing
|
158
|
-
|
159
|
-
Feel free to open an issue or pull request.
|
160
|
-
|
161
|
-
---
|
162
|
-
|
163
|
-
## License
|
164
|
-
|
165
|
-
MIT License
|
166
|
-
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: chatterer
|
3
|
+
Version: 0.1.7
|
4
|
+
Summary: The highest-level interface for various LLM APIs.
|
5
|
+
Requires-Python: >=3.12
|
6
|
+
Description-Content-Type: text/markdown
|
7
|
+
Requires-Dist: instructor>=1.7.2
|
8
|
+
Requires-Dist: langchain>=0.3.19
|
9
|
+
Provides-Extra: dev
|
10
|
+
Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
|
11
|
+
Requires-Dist: colorama>=0.4.6; extra == "dev"
|
12
|
+
Requires-Dist: ipykernel>=6.29.5; extra == "dev"
|
13
|
+
Provides-Extra: conversion
|
14
|
+
Requires-Dist: markdownify>=1.1.0; extra == "conversion"
|
15
|
+
Requires-Dist: commonmark>=0.9.1; extra == "conversion"
|
16
|
+
Requires-Dist: playwright>=1.50.0; extra == "conversion"
|
17
|
+
Requires-Dist: pillow>=11.1.0; extra == "conversion"
|
18
|
+
Requires-Dist: mistune>=3.1.2; extra == "conversion"
|
19
|
+
Requires-Dist: markitdown>=0.0.2; extra == "conversion"
|
20
|
+
Requires-Dist: pymupdf>=1.25.4; extra == "conversion"
|
21
|
+
Provides-Extra: langchain-providers
|
22
|
+
Requires-Dist: langchain-openai>=0.3.7; extra == "langchain-providers"
|
23
|
+
Requires-Dist: langchain-anthropic>=0.3.8; extra == "langchain-providers"
|
24
|
+
Requires-Dist: langchain-google-genai>=2.0.10; extra == "langchain-providers"
|
25
|
+
Requires-Dist: langchain-ollama>=0.2.3; extra == "langchain-providers"
|
26
|
+
Provides-Extra: all
|
27
|
+
Requires-Dist: chatterer[langchain-providers]; extra == "all"
|
28
|
+
Requires-Dist: chatterer[conversion]; extra == "all"
|
29
|
+
Requires-Dist: chatterer[dev]; extra == "all"
|
30
|
+
|
31
|
+
# Chatterer
|
32
|
+
|
33
|
+
**Simplified, Structured AI Assistant Framework**
|
34
|
+
|
35
|
+
`chatterer` is a Python library designed as a type-safe LangChain wrapper for interacting with various language models (OpenAI, Anthropic, Gemini, Ollama, etc.). It supports structured outputs via Pydantic models, plain text responses, and asynchronous calls.
|
36
|
+
|
37
|
+
The structured reasoning in `chatterer` is inspired by the [Atom-of-Thought](https://github.com/qixucen/atom) pipeline.
|
38
|
+
|
39
|
+
---
|
40
|
+
|
41
|
+
## Quick Install
|
42
|
+
|
43
|
+
```bash
|
44
|
+
pip install chatterer
|
45
|
+
```
|
46
|
+
|
47
|
+
---
|
48
|
+
|
49
|
+
## Quickstart Example
|
50
|
+
|
51
|
+
Generate text quickly using OpenAI:
|
52
|
+
|
53
|
+
```python
|
54
|
+
from chatterer import Chatterer
|
55
|
+
|
56
|
+
chat = Chatterer.openai("gpt-4o-mini")
|
57
|
+
response = chat.generate("What is the meaning of life?")
|
58
|
+
print(response)
|
59
|
+
```
|
60
|
+
|
61
|
+
Messages can be input as plain strings or structured lists:
|
62
|
+
|
63
|
+
```python
|
64
|
+
response = chat.generate([{ "role": "user", "content": "What's 2+2?" }])
|
65
|
+
print(response)
|
66
|
+
```
|
67
|
+
|
68
|
+
### Structured Output with Pydantic
|
69
|
+
|
70
|
+
```python
|
71
|
+
from pydantic import BaseModel
|
72
|
+
|
73
|
+
class AnswerModel(BaseModel):
|
74
|
+
question: str
|
75
|
+
answer: str
|
76
|
+
|
77
|
+
response = chat.generate_pydantic(AnswerModel, "What's the capital of France?")
|
78
|
+
print(response.question, response.answer)
|
79
|
+
```
|
80
|
+
|
81
|
+
### Async Example
|
82
|
+
|
83
|
+
```python
|
84
|
+
import asyncio
|
85
|
+
|
86
|
+
async def main():
|
87
|
+
response = await chat.agenerate("Explain async in Python briefly.")
|
88
|
+
print(response)
|
89
|
+
|
90
|
+
asyncio.run(main())
|
91
|
+
```
|
92
|
+
|
93
|
+
---
|
94
|
+
|
95
|
+
## Atom-of-Thought Pipeline (AoT)
|
96
|
+
|
97
|
+
`AoTPipeline` provides structured reasoning by:
|
98
|
+
|
99
|
+
- Detecting question domains (general, math, coding, philosophy, multihop).
|
100
|
+
- Decomposing questions recursively.
|
101
|
+
- Generating direct, decomposition-based, and simplified answers.
|
102
|
+
- Combining answers via ensemble.
|
103
|
+
|
104
|
+
### AoT Usage Example
|
105
|
+
|
106
|
+
```python
|
107
|
+
from chatterer import Chatterer
|
108
|
+
from chatterer.strategies import AoTStrategy, AoTPipeline
|
109
|
+
|
110
|
+
pipeline = AoTPipeline(chatterer=Chatterer.openai(), max_depth=2)
|
111
|
+
strategy = AoTStrategy(pipeline=pipeline)
|
112
|
+
|
113
|
+
question = "What would Newton discover if hit by an apple falling from 100 meters?"
|
114
|
+
answer = strategy.invoke(question)
|
115
|
+
print(answer)
|
116
|
+
```
|
117
|
+
|
118
|
+
---
|
119
|
+
|
120
|
+
## Supported Models
|
121
|
+
|
122
|
+
- **OpenAI**
|
123
|
+
- **Anthropic**
|
124
|
+
- **Google Gemini**
|
125
|
+
- **Ollama** (local models)
|
126
|
+
|
127
|
+
Initialize models easily:
|
128
|
+
|
129
|
+
```python
|
130
|
+
openai_chat = Chatterer.openai("gpt-4o-mini")
|
131
|
+
anthropic_chat = Chatterer.anthropic("claude-3-7-sonnet-20250219")
|
132
|
+
gemini_chat = Chatterer.google("gemini-2.0-flash")
|
133
|
+
ollama_chat = Chatterer.ollama("deepseek-r1:1.5b")
|
134
|
+
```
|
135
|
+
|
136
|
+
---
|
137
|
+
|
138
|
+
## Advanced Features
|
139
|
+
|
140
|
+
- **Streaming responses**
|
141
|
+
- **Async/Await support**
|
142
|
+
- **Structured outputs with Pydantic models**
|
143
|
+
|
144
|
+
---
|
145
|
+
|
146
|
+
## Logging
|
147
|
+
|
148
|
+
Built-in logging for easy debugging:
|
149
|
+
|
150
|
+
```python
|
151
|
+
import logging
|
152
|
+
logging.basicConfig(level=logging.DEBUG)
|
153
|
+
```
|
154
|
+
|
155
|
+
---
|
156
|
+
|
157
|
+
## Contributing
|
158
|
+
|
159
|
+
Feel free to open an issue or pull request.
|
160
|
+
|
161
|
+
---
|
162
|
+
|
163
|
+
## License
|
164
|
+
|
165
|
+
MIT License
|
166
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
chatterer/__init__.py,sha256=YQN_nZclqzZ_dbzxG10NJHRh1VzyjL7vwXETW5un19U,763
|
2
|
+
chatterer/language_model.py,sha256=ksOg-ZuIaP7qy982pGT8uuYGMgzUHqotXQovNVktoeE,13455
|
3
|
+
chatterer/messages.py,sha256=miHn3QOcZH8yjR9W5WRFxLvjO8kIyPhcsgoeojT7ByU,180
|
4
|
+
chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
chatterer/strategies/__init__.py,sha256=oroDpp5ppWralCER5wnf8fgX77dqLAPF0ogmRtRzQfU,208
|
6
|
+
chatterer/strategies/atom_of_thoughts.py,sha256=g801rY7k5UeNOq2-XRqB4h8sXjikVNzN57G6OtiUH00,39897
|
7
|
+
chatterer/strategies/base.py,sha256=rqOzTo6S2eQ3A_F9aBXCmVoLM1eT6F2VzZ2Dof330Tk,413
|
8
|
+
chatterer/tools/__init__.py,sha256=X6E_ESq-QFp12GF9CYdktRPxARKvwOj38uYQlc9xT8c,398
|
9
|
+
chatterer/tools/convert_to_text.py,sha256=-UuKfFKbmmbkYGodcxsFRgsPszTiclcqupJ-mZEQUEI,15521
|
10
|
+
chatterer/tools/citation_chunking/__init__.py,sha256=DyLMGG4dVgSnGIdaSHcBNDz09iXflcKuJCtg4W0JTVo,79
|
11
|
+
chatterer/tools/citation_chunking/chunks.py,sha256=_Sxzfbud8XTOHdHdQmKwm4-byES1cD1V6C28DgtS1BA,2120
|
12
|
+
chatterer/tools/citation_chunking/citation_chunker.py,sha256=Aye1BqUCa4u_CsTZoqCe72pJA8C_y2U5UR7cNoNpeo4,4776
|
13
|
+
chatterer/tools/citation_chunking/citations.py,sha256=BWhSwzZccvu0Db-OxEbsuEGEz-Dh_tXo8HRx1y2XUHg,12308
|
14
|
+
chatterer/tools/citation_chunking/prompt.py,sha256=so-8uFQ5b2Zq2V5Brfxd76bEnKYkHovYsohAnbxWEnY,7557
|
15
|
+
chatterer/tools/citation_chunking/reference.py,sha256=m47XYaB5uFff_x_k7US9hNr-SpZjKnl-GuzsGaQzcZo,893
|
16
|
+
chatterer/tools/citation_chunking/utils.py,sha256=Xytm9lMrS783Po1qWAdEJ8q7Q3l2UMzwHd9EkYTRiwk,6210
|
17
|
+
chatterer/tools/webpage_to_markdown/__init__.py,sha256=aL4O78CX6AVBXVfUoM8gLxfYb-kpmhwzwDxKk1Gj_Co,119
|
18
|
+
chatterer/tools/webpage_to_markdown/playwright_bot.py,sha256=U5xvGFzn4IWCAI98hpbCD5imKRrhYBa9_RzuFNiSwYA,26847
|
19
|
+
chatterer/tools/webpage_to_markdown/utils.py,sha256=fmO7MMx1yvnnzLElLI31A3y2VaiLnD_XesLEVCNts3U,12264
|
20
|
+
chatterer/utils/image.py,sha256=3v7DiVfRPDZGNfdEAQAiDw7DggTkSt1I_RlqoKXlyLY,10418
|
21
|
+
chatterer-0.1.7.dist-info/METADATA,sha256=vZMtTS7Xxy3aQkI4EawVbBNLCaqIxS32UCKiuRNzlX8,4068
|
22
|
+
chatterer-0.1.7.dist-info/WHEEL,sha256=tTnHoFhvKQHCh4jz3yCn0WPTYIy7wXx3CJtJ7SJGV7c,91
|
23
|
+
chatterer-0.1.7.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
|
24
|
+
chatterer-0.1.7.dist-info/RECORD,,
|
chatterer-0.1.6.dist-info/RECORD
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
chatterer/__init__.py,sha256=9mpj_kaqaGPvaAng2Ol1fzWftYvIs4y97v9umXWPWJg,572
|
2
|
-
chatterer/language_model.py,sha256=dHHjXPje9FjRpEA0cV1G9LJwkiOSfMyVMOTXvrTo69A,12275
|
3
|
-
chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
|
5
|
-
chatterer/strategies/atom_of_thoughts.py,sha256=S_j4R26Drr8qJjAA9HYtWRF1_F4_ldo11_A2Z1ObYOI,40936
|
6
|
-
chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
|
7
|
-
chatterer/tools/__init__.py,sha256=YJc0xaDGwcdHvVlz-xFRjzrek2Q8icxc4Xsq2nOQXQA,341
|
8
|
-
chatterer/tools/convert_to_text.py,sha256=5bOlo9hkUJtJhrB5hmEl4VM-_3Qoh3c2CxUrGmVOoQ4,16188
|
9
|
-
chatterer/tools/webpage_to_markdown/__init__.py,sha256=bHH4qfnXyw8Zz-yBPLaTezF1sh9njvNBJmhBVtcpjsA,123
|
10
|
-
chatterer/tools/webpage_to_markdown/playwright_bot.py,sha256=9k5e4jy6QrekEg8J7ZuJ_E_akDHyZ6yQI-AhTaclEfc,26687
|
11
|
-
chatterer/tools/webpage_to_markdown/utils.py,sha256=0_LnrU7WLM0TbEXcNSND9xEDW4geS0OTCpQszHq68zE,21282
|
12
|
-
chatterer-0.1.6.dist-info/METADATA,sha256=O-5dA1okHEZJanGLLKwDoZR04aQDSj7_1MNjykVbcws,4234
|
13
|
-
chatterer-0.1.6.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
14
|
-
chatterer-0.1.6.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
|
15
|
-
chatterer-0.1.6.dist-info/RECORD,,
|
File without changes
|