chatterer 0.1.13__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. chatterer-0.1.16/PKG-INFO +392 -0
  2. chatterer-0.1.16/README.md +345 -0
  3. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/__init__.py +36 -5
  4. chatterer-0.1.16/chatterer/interactive.py +692 -0
  5. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/language_model.py +217 -261
  6. chatterer-0.1.16/chatterer/messages.py +21 -0
  7. chatterer-0.1.16/chatterer/tools/__init__.py +46 -0
  8. chatterer-0.1.13/chatterer/tools/webpage_to_markdown/utils.py → chatterer-0.1.16/chatterer/tools/caption_markdown_images.py +158 -108
  9. chatterer-0.1.16/chatterer/tools/convert_pdf_to_markdown.py +302 -0
  10. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/tools/convert_to_text.py +45 -16
  11. chatterer-0.1.16/chatterer/tools/upstage_document_parser.py +705 -0
  12. chatterer-0.1.13/chatterer/tools/webpage_to_markdown/playwright_bot.py → chatterer-0.1.16/chatterer/tools/webpage_to_markdown.py +197 -107
  13. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/tools/youtube.py +2 -1
  14. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/utils/__init__.py +1 -1
  15. chatterer-0.1.13/chatterer/utils/image.py → chatterer-0.1.16/chatterer/utils/base64_image.py +56 -62
  16. chatterer-0.1.16/chatterer/utils/code_agent.py +237 -0
  17. chatterer-0.1.16/chatterer/utils/imghdr.py +148 -0
  18. chatterer-0.1.16/chatterer.egg-info/PKG-INFO +392 -0
  19. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer.egg-info/SOURCES.txt +6 -4
  20. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer.egg-info/requires.txt +26 -10
  21. chatterer-0.1.16/pyproject.toml +55 -0
  22. chatterer-0.1.13/PKG-INFO +0 -171
  23. chatterer-0.1.13/README.md +0 -136
  24. chatterer-0.1.13/chatterer/messages.py +0 -9
  25. chatterer-0.1.13/chatterer/tools/__init__.py +0 -35
  26. chatterer-0.1.13/chatterer/tools/upstage_document_parser.py +0 -438
  27. chatterer-0.1.13/chatterer/tools/webpage_to_markdown/__init__.py +0 -4
  28. chatterer-0.1.13/chatterer/utils/code_agent.py +0 -138
  29. chatterer-0.1.13/chatterer.egg-info/PKG-INFO +0 -171
  30. chatterer-0.1.13/pyproject.toml +0 -29
  31. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/common_types/__init__.py +0 -0
  32. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/common_types/io.py +0 -0
  33. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/py.typed +0 -0
  34. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/strategies/__init__.py +0 -0
  35. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/strategies/atom_of_thoughts.py +0 -0
  36. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/strategies/base.py +0 -0
  37. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/tools/citation_chunking/__init__.py +0 -0
  38. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/tools/citation_chunking/chunks.py +0 -0
  39. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/tools/citation_chunking/citation_chunker.py +0 -0
  40. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/tools/citation_chunking/citations.py +0 -0
  41. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/tools/citation_chunking/prompt.py +0 -0
  42. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/tools/citation_chunking/reference.py +0 -0
  43. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/tools/citation_chunking/utils.py +0 -0
  44. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer/utils/bytesio.py +0 -0
  45. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer.egg-info/dependency_links.txt +0 -0
  46. {chatterer-0.1.13 → chatterer-0.1.16}/chatterer.egg-info/top_level.txt +0 -0
  47. {chatterer-0.1.13 → chatterer-0.1.16}/setup.cfg +0 -0
@@ -0,0 +1,392 @@
1
+ Metadata-Version: 2.4
2
+ Name: chatterer
3
+ Version: 0.1.16
4
+ Summary: The highest-level interface for various LLM APIs.
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: instructor>=1.7.2
8
+ Requires-Dist: langchain>=0.3.19
9
+ Requires-Dist: langchain-openai>=0.3.11
10
+ Requires-Dist: pillow>=11.1.0
11
+ Requires-Dist: regex>=2024.11.6
12
+ Requires-Dist: rich>=13.9.4
13
+ Provides-Extra: dev
14
+ Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
15
+ Requires-Dist: colorama>=0.4.6; extra == "dev"
16
+ Requires-Dist: ipykernel>=6.29.5; extra == "dev"
17
+ Requires-Dist: spargear>=0.1.1; extra == "dev"
18
+ Provides-Extra: conversion
19
+ Requires-Dist: youtube-transcript-api>=1.0.3; extra == "conversion"
20
+ Requires-Dist: chatterer[browser]; extra == "conversion"
21
+ Requires-Dist: chatterer[pdf]; extra == "conversion"
22
+ Requires-Dist: chatterer[markdown]; extra == "conversion"
23
+ Requires-Dist: chatterer[video]; extra == "conversion"
24
+ Provides-Extra: browser
25
+ Requires-Dist: playwright>=1.50.0; extra == "browser"
26
+ Provides-Extra: pdf
27
+ Requires-Dist: pymupdf>=1.25.4; extra == "pdf"
28
+ Requires-Dist: pypdf>=5.4.0; extra == "pdf"
29
+ Provides-Extra: markdown
30
+ Requires-Dist: markitdown[all]>=0.1.1; extra == "markdown"
31
+ Requires-Dist: markdownify>=1.1.0; extra == "markdown"
32
+ Requires-Dist: commonmark>=0.9.1; extra == "markdown"
33
+ Requires-Dist: mistune>=3.1.3; extra == "markdown"
34
+ Provides-Extra: video
35
+ Requires-Dist: pydub>=0.25.1; extra == "video"
36
+ Provides-Extra: langchain
37
+ Requires-Dist: chatterer[langchain-providers]; extra == "langchain"
38
+ Requires-Dist: langchain-experimental>=0.3.4; extra == "langchain"
39
+ Provides-Extra: langchain-providers
40
+ Requires-Dist: langchain-anthropic>=0.3.10; extra == "langchain-providers"
41
+ Requires-Dist: langchain-google-genai>=2.1.1; extra == "langchain-providers"
42
+ Requires-Dist: langchain-ollama>=0.3.0; extra == "langchain-providers"
43
+ Provides-Extra: all
44
+ Requires-Dist: chatterer[dev]; extra == "all"
45
+ Requires-Dist: chatterer[langchain]; extra == "all"
46
+ Requires-Dist: chatterer[conversion]; extra == "all"
47
+
48
+ # Chatterer
49
+
50
+ **Simplified, Structured AI Assistant Framework**
51
+
52
+ `chatterer` is a Python library designed as a type-safe LangChain wrapper for interacting with various language models (OpenAI, Anthropic, Google Gemini, Ollama, etc.). It supports structured outputs via Pydantic models, plain text responses, asynchronous calls, image description, code execution, and an interactive shell.
53
+
54
+ The structured reasoning in `chatterer` is inspired by the [Atom-of-Thought](https://github.com/qixucen/atom) pipeline.
55
+
56
+ ---
57
+
58
+ ## Quick Install
59
+
60
+ ```bash
61
+ pip install chatterer
62
+ ```
63
+
64
+ ---
65
+
66
+ ## Quickstart Example
67
+
68
+ Generate text quickly using OpenAI.
69
+ Messages can be input as plain strings or structured lists:
70
+
71
+ ```python
72
+ from chatterer import Chatterer, HumanMessage, AIMessage, SystemMessage
73
+
74
+ # Initialize the Chatterer with `openai`, `anthropic`, `google`, or `ollama` models
75
+ chatterer: Chatterer = Chatterer.openai("gpt-4.1")
76
+
77
+ # Get direct response as str
78
+ response: str = chatterer("What is the meaning of life?")
79
+ # response = chatterer([{ "role": "user", "content": "What is the meaning of life?" }])
80
+ # response = chatterer([("user", "What is the meaning of life?")])
81
+ # response = chatterer([HumanMessage("What is the meaning of life?")])
82
+ print(response)
83
+ ```
84
+
85
+ Image & text content can be sent as together:
86
+
87
+ ```python
88
+ from chatterer import Base64Image, HumanMessage
89
+
90
+ # Load an image from a file or URL, resulting in a None or Base64Image object
91
+ image = Base64Image.from_url_or_path("example.jpg")
92
+ # image = Base64Image.from_url_or_path("https://example.com/image.jpg")
93
+ assert image is not None, "Failed to load image"
94
+
95
+ # Alternatively, load an image from bytes
96
+ # with open("example.jpg", "rb") as f:
97
+ # image = Base64Image.from_bytes(f.read(), ext="jpeg")
98
+
99
+ message = HumanMessage(["Describe the image", image.data_uri_content])
100
+ response: str = chatterer([message])
101
+ print(response)
102
+ ```
103
+
104
+ ---
105
+
106
+ ## Structured Output with Pydantic
107
+
108
+ Define a Pydantic model and get typed responses:
109
+
110
+ ```python
111
+ from pydantic import BaseModel
112
+
113
+ class AnswerModel(BaseModel):
114
+ question: str
115
+ answer: str
116
+
117
+ # Call with response_model
118
+ response: AnswerModel = chatterer("What's the capital of France?", response_model=AnswerModel)
119
+ print(response.question, response.answer)
120
+ ```
121
+
122
+ ---
123
+
124
+ ## Async Example
125
+
126
+ Use asynchronous generation for non-blocking operations:
127
+
128
+ ```python
129
+ import asyncio
130
+
131
+ async def main():
132
+ response = await chatterer.agenerate("Explain async in Python briefly.")
133
+ print(response)
134
+
135
+ asyncio.run(main())
136
+ ```
137
+
138
+ ---
139
+
140
+ ## Streaming Structured Outputs
141
+
142
+ Stream structured responses in real-time:
143
+
144
+ ```python
145
+ from pydantic import BaseModel
146
+
147
+ class AnswerModel(BaseModel):
148
+ text: str
149
+
150
+ chatterer = Chatterer.openai()
151
+ for chunk in chatterer.generate_pydantic_stream(AnswerModel, "Tell me a story"):
152
+ print(chunk.text)
153
+ ```
154
+
155
+ Asynchronous version:
156
+ ```python
157
+ import asyncio
158
+
159
+ async def main():
160
+ async for chunk in chatterer.agenerate_pydantic_stream(AnswerModel, "Tell me a story"):
161
+ print(chunk.text)
162
+
163
+ asyncio.run(main())
164
+ ```
165
+
166
+ ---
167
+
168
+ ## Image Description
169
+
170
+ Generate descriptions for images using the language model:
171
+
172
+ ```python
173
+ description = chatterer.describe_image("https://example.com/image.jpg")
174
+ print(description)
175
+
176
+ # Customize the instruction
177
+ description = chatterer.describe_image("https://example.com/image.jpg", instruction="Describe the main objects in the image.")
178
+ ```
179
+
180
+ An asynchronous version is also available:
181
+
182
+ ```python
183
+ async def main():
184
+ description = await chatterer.adescribe_image("https://example.com/image.jpg")
185
+ print(description)
186
+
187
+ asyncio.run(main())
188
+ ```
189
+
190
+ ---
191
+
192
+ ## Code Execution
193
+
194
+ Generate and execute Python code dynamically:
195
+
196
+ ```python
197
+ result = chatterer.invoke_code_execution("Write a function to calculate factorial.")
198
+ print(result.code)
199
+ print(result.output)
200
+ ```
201
+
202
+ An asynchronous version exists as well:
203
+
204
+ ```python
205
+ async def main():
206
+ result = await chatterer.ainvoke_code_execution("Write a function to calculate factorial.")
207
+ print(result.output)
208
+
209
+ asyncio.run(main())
210
+ ```
211
+
212
+ ---
213
+
214
+ ## Webpage to Markdown
215
+
216
+ Convert webpages to Markdown, optionally filtering content with the language model:
217
+
218
+ ```python
219
+ from chatterer.tools.webpage_to_markdown import PlayWrightBot
220
+
221
+ with PlayWrightBot() as bot:
222
+ # Basic conversion
223
+ markdown = bot.url_to_md("https://example.com")
224
+ print(markdown)
225
+
226
+ # With LLM filtering and image descriptions
227
+ filtered_md = bot.url_to_md_with_llm("https://example.com", describe_images=True)
228
+ print(filtered_md)
229
+ ```
230
+
231
+ Asynchronous version:
232
+ ```python
233
+ import asyncio
234
+
235
+ async def main():
236
+ async with PlayWrightBot() as bot:
237
+ markdown = await bot.aurl_to_md_with_llm("https://example.com")
238
+ print(markdown)
239
+
240
+ asyncio.run(main())
241
+ ```
242
+
243
+ Extract specific elements:
244
+ ```python
245
+ with PlayWrightBot() as bot:
246
+ headings = bot.select_and_extract("https://example.com", "h2")
247
+ print(headings)
248
+ ```
249
+
250
+ ---
251
+
252
+ ## Citation Chunking
253
+
254
+ Chunk documents into semantic sections with citations:
255
+
256
+ ```python
257
+ from chatterer import Chatterer
258
+ from chatterer.tools import citation_chunker
259
+
260
+ chatterer = Chatterer.openai()
261
+ document = "Long text about quantum computing..."
262
+ chunks = citation_chunker(document, chatterer, global_coverage_threshold=0.9)
263
+ for chunk in chunks:
264
+ print(f"Subject: {chunk.name}")
265
+ for source, matches in chunk.references.items():
266
+ print(f" Source: {source}, Matches: {matches}")
267
+ ```
268
+
269
+ ---
270
+
271
+ ## Interactive Shell
272
+
273
+ Engage in a conversational AI session with code execution support:
274
+
275
+ ```python
276
+ from chatterer import interactive_shell
277
+
278
+ interactive_shell()
279
+ ```
280
+
281
+ This launches an interactive session where you can chat with the AI and execute code snippets. Type `quit` or `exit` to end the session.
282
+
283
+ ---
284
+
285
+ ## Atom-of-Thought Pipeline (AoT)
286
+
287
+ `AoTPipeline` provides structured reasoning inspired by the [Atom-of-Thought](https://github.com/qixucen/atom) approach. It decomposes complex questions recursively, generates answers, and combines them via an ensemble process.
288
+
289
+ ### AoT Usage Example
290
+
291
+ ```python
292
+ from chatterer import Chatterer
293
+ from chatterer.strategies import AoTStrategy, AoTPipeline
294
+
295
+ pipeline = AoTPipeline(chatterer=Chatterer.openai(), max_depth=2)
296
+ strategy = AoTStrategy(pipeline=pipeline)
297
+
298
+ question = "What would Newton discover if hit by an apple falling from 100 meters?"
299
+ answer = strategy.invoke(question)
300
+ print(answer)
301
+
302
+ # Generate and inspect reasoning graph
303
+ graph = strategy.get_reasoning_graph()
304
+ print(f"Graph: {len(graph.nodes)} nodes, {len(graph.relationships)} relationships")
305
+ ```
306
+
307
+ **Note**: The AoT pipeline includes an optional feature to generate a reasoning graph, which can be stored in Neo4j for visualization and analysis. Install `neo4j_extension` and set up a Neo4j instance to use this feature:
308
+
309
+ ```python
310
+ from neo4j_extension import Neo4jConnection
311
+ with Neo4jConnection() as conn:
312
+ conn.upsert_graph(graph)
313
+ ```
314
+
315
+ ---
316
+
317
+ ## Supported Models
318
+
319
+ Chatterer supports multiple language models, easily initialized as follows:
320
+
321
+ - **OpenAI**
322
+ - **Anthropic**
323
+ - **Google Gemini**
324
+ - **Ollama** (local models)
325
+
326
+ ```python
327
+ openai_chatterer = Chatterer.openai("gpt-4o-mini")
328
+ anthropic_chatterer = Chatterer.anthropic("claude-3-7-sonnet-20250219")
329
+ gemini_chatterer = Chatterer.google("gemini-2.0-flash")
330
+ ollama_chatterer = Chatterer.ollama("deepseek-r1:1.5b")
331
+ ```
332
+
333
+ ---
334
+
335
+ ## Advanced Features
336
+
337
+ - **Streaming Responses**: Use `generate_stream` or `agenerate_stream` for real-time output.
338
+ - **Streaming Structured Outputs**: Stream Pydantic-typed responses with `generate_pydantic_stream` or `agenerate_pydantic_stream`.
339
+ - **Async/Await Support**: All methods have asynchronous counterparts (e.g., `agenerate`, `adescribe_image`).
340
+ - **Structured Outputs**: Leverage Pydantic models for typed responses.
341
+ - **Image Description**: Generate descriptions for images with `describe_image`.
342
+ - **Code Execution**: Dynamically generate and execute Python code with `invoke_code_execution`.
343
+ - **Webpage to Markdown**: Convert webpages to Markdown with `PlayWrightBot`, including JavaScript rendering, element extraction, and LLM-based content filtering.
344
+ - **Citation Chunking**: Semantically chunk documents and extract citations with `citation_chunker`, including coverage analysis.
345
+ - **Interactive Shell**: Use `interactive_shell` for conversational AI with code execution.
346
+ - **Token Counting**: Retrieve input/output token counts with `get_num_tokens_from_message`.
347
+ - **Utilities**: Tools for content processing (e.g., `html_to_markdown`, `pdf_to_text`, `get_youtube_video_subtitle`, `citation_chunker`) are available in the `tools` module.
348
+
349
+ ```python
350
+ # Example: Convert PDF to text
351
+ from chatterer.tools import pdf_to_text
352
+ text = pdf_to_text("example.pdf")
353
+ print(text)
354
+
355
+ # Example: Get YouTube subtitles
356
+ from chatterer.tools import get_youtube_video_subtitle
357
+ subtitles = get_youtube_video_subtitle("https://www.youtube.com/watch?v=example")
358
+ print(subtitles)
359
+
360
+ # Example: Get token counts
361
+ from chatterer.messages import HumanMessage
362
+ msg = HumanMessage(content="Hello, world!")
363
+ tokens = chatterer.get_num_tokens_from_message(msg)
364
+ if tokens:
365
+ input_tokens, output_tokens = tokens
366
+ print(f"Input: {input_tokens}, Output: {output_tokens}")
367
+ ```
368
+
369
+ ---
370
+
371
+ ## Logging
372
+
373
+ Enable debugging with basic logging:
374
+
375
+ ```python
376
+ import logging
377
+ logging.basicConfig(level=logging.DEBUG)
378
+ ```
379
+
380
+ The AoT pipeline uses a custom color-coded logger for detailed step-by-step output.
381
+
382
+ ---
383
+
384
+ ## Contributing
385
+
386
+ We welcome contributions! Feel free to open an issue or submit a pull request on the repository.
387
+
388
+ ---
389
+
390
+ ## License
391
+
392
+ MIT License