chatterer 0.1.13__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. chatterer-0.1.14/PKG-INFO +387 -0
  2. chatterer-0.1.14/README.md +345 -0
  3. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/__init__.py +97 -62
  4. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/common_types/__init__.py +21 -21
  5. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/common_types/io.py +19 -19
  6. chatterer-0.1.14/chatterer/interactive.py +353 -0
  7. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/language_model.py +454 -577
  8. chatterer-0.1.14/chatterer/messages.py +21 -0
  9. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/strategies/__init__.py +13 -13
  10. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/strategies/atom_of_thoughts.py +975 -975
  11. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/strategies/base.py +14 -14
  12. chatterer-0.1.14/chatterer/tools/__init__.py +46 -0
  13. chatterer-0.1.13/chatterer/tools/webpage_to_markdown/utils.py → chatterer-0.1.14/chatterer/tools/caption_markdown_images.py +384 -334
  14. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/tools/citation_chunking/__init__.py +3 -3
  15. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/tools/citation_chunking/chunks.py +53 -53
  16. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/tools/citation_chunking/citation_chunker.py +118 -118
  17. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/tools/citation_chunking/citations.py +285 -285
  18. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/tools/citation_chunking/prompt.py +157 -157
  19. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/tools/citation_chunking/reference.py +26 -26
  20. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/tools/citation_chunking/utils.py +138 -138
  21. chatterer-0.1.14/chatterer/tools/convert_pdf_to_markdown.py +302 -0
  22. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/tools/convert_to_text.py +447 -418
  23. chatterer-0.1.14/chatterer/tools/upstage_document_parser.py +705 -0
  24. chatterer-0.1.13/chatterer/tools/webpage_to_markdown/playwright_bot.py → chatterer-0.1.14/chatterer/tools/webpage_to_markdown.py +739 -649
  25. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/tools/youtube.py +147 -146
  26. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/utils/__init__.py +18 -15
  27. chatterer-0.1.13/chatterer/utils/image.py → chatterer-0.1.14/chatterer/utils/base64_image.py +285 -291
  28. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/utils/bytesio.py +59 -59
  29. chatterer-0.1.14/chatterer/utils/cli.py +476 -0
  30. chatterer-0.1.14/chatterer/utils/code_agent.py +237 -0
  31. chatterer-0.1.14/chatterer/utils/imghdr.py +148 -0
  32. chatterer-0.1.14/chatterer.egg-info/PKG-INFO +387 -0
  33. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer.egg-info/SOURCES.txt +7 -4
  34. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer.egg-info/requires.txt +20 -10
  35. {chatterer-0.1.13 → chatterer-0.1.14}/pyproject.toml +24 -10
  36. {chatterer-0.1.13 → chatterer-0.1.14}/setup.cfg +4 -4
  37. chatterer-0.1.13/PKG-INFO +0 -171
  38. chatterer-0.1.13/README.md +0 -136
  39. chatterer-0.1.13/chatterer/messages.py +0 -9
  40. chatterer-0.1.13/chatterer/tools/__init__.py +0 -35
  41. chatterer-0.1.13/chatterer/tools/upstage_document_parser.py +0 -438
  42. chatterer-0.1.13/chatterer/tools/webpage_to_markdown/__init__.py +0 -4
  43. chatterer-0.1.13/chatterer/utils/code_agent.py +0 -138
  44. chatterer-0.1.13/chatterer.egg-info/PKG-INFO +0 -171
  45. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer/py.typed +0 -0
  46. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer.egg-info/dependency_links.txt +0 -0
  47. {chatterer-0.1.13 → chatterer-0.1.14}/chatterer.egg-info/top_level.txt +0 -0
@@ -0,0 +1,387 @@
1
+ Metadata-Version: 2.4
2
+ Name: chatterer
3
+ Version: 0.1.14
4
+ Summary: The highest-level interface for various LLM APIs.
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: instructor>=1.7.2
8
+ Requires-Dist: langchain>=0.3.19
9
+ Requires-Dist: langchain-openai>=0.3.11
10
+ Requires-Dist: pillow>=11.1.0
11
+ Requires-Dist: regex>=2024.11.6
12
+ Provides-Extra: dev
13
+ Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
14
+ Requires-Dist: colorama>=0.4.6; extra == "dev"
15
+ Requires-Dist: ipykernel>=6.29.5; extra == "dev"
16
+ Provides-Extra: conversion
17
+ Requires-Dist: youtube-transcript-api>=1.0.3; extra == "conversion"
18
+ Requires-Dist: chatterer[browser]; extra == "conversion"
19
+ Requires-Dist: chatterer[pdf]; extra == "conversion"
20
+ Requires-Dist: chatterer[markdown]; extra == "conversion"
21
+ Provides-Extra: browser
22
+ Requires-Dist: playwright>=1.50.0; extra == "browser"
23
+ Provides-Extra: pdf
24
+ Requires-Dist: pymupdf>=1.25.4; extra == "pdf"
25
+ Requires-Dist: pypdf>=5.4.0; extra == "pdf"
26
+ Provides-Extra: markdown
27
+ Requires-Dist: markitdown[all]>=0.1.1; extra == "markdown"
28
+ Requires-Dist: markdownify>=1.1.0; extra == "markdown"
29
+ Requires-Dist: commonmark>=0.9.1; extra == "markdown"
30
+ Requires-Dist: mistune>=3.1.3; extra == "markdown"
31
+ Provides-Extra: langchain
32
+ Requires-Dist: chatterer[langchain-providers]; extra == "langchain"
33
+ Requires-Dist: langchain-experimental>=0.3.4; extra == "langchain"
34
+ Provides-Extra: langchain-providers
35
+ Requires-Dist: langchain-anthropic>=0.3.10; extra == "langchain-providers"
36
+ Requires-Dist: langchain-google-genai>=2.1.1; extra == "langchain-providers"
37
+ Requires-Dist: langchain-ollama>=0.3.0; extra == "langchain-providers"
38
+ Provides-Extra: all
39
+ Requires-Dist: chatterer[dev]; extra == "all"
40
+ Requires-Dist: chatterer[langchain]; extra == "all"
41
+ Requires-Dist: chatterer[conversion]; extra == "all"
42
+
43
+ # Chatterer
44
+
45
+ **Simplified, Structured AI Assistant Framework**
46
+
47
+ `chatterer` is a Python library designed as a type-safe LangChain wrapper for interacting with various language models (OpenAI, Anthropic, Google Gemini, Ollama, etc.). It supports structured outputs via Pydantic models, plain text responses, asynchronous calls, image description, code execution, and an interactive shell.
48
+
49
+ The structured reasoning in `chatterer` is inspired by the [Atom-of-Thought](https://github.com/qixucen/atom) pipeline.
50
+
51
+ ---
52
+
53
+ ## Quick Install
54
+
55
+ ```bash
56
+ pip install chatterer
57
+ ```
58
+
59
+ ---
60
+
61
+ ## Quickstart Example
62
+
63
+ Generate text quickly using OpenAI.
64
+ Messages can be input as plain strings or structured lists:
65
+
66
+ ```python
67
+ from chatterer import Chatterer, HumanMessage, AIMessage, SystemMessage
68
+
69
+ # Initialize the Chatterer with `openai`, `anthropic`, `google`, or `ollama` models
70
+ chatterer: Chatterer = Chatterer.openai("gpt-4.1")
71
+
72
+ # Get direct response as str
73
+ response: str = chatterer("What is the meaning of life?")
74
+ # response = chatterer([{ "role": "user", "content": "What is the meaning of life?" }])
75
+ # response = chatterer([("user", "What is the meaning of life?")])
76
+ # response = chatterer([HumanMessage("What is the meaning of life?")])
77
+ print(response)
78
+ ```
79
+
80
+ Image & text content can be sent as together:
81
+
82
+ ```python
83
+ from chatterer import Base64Image, HumanMessage
84
+
85
+ # Load an image from a file or URL, resulting in a None or Base64Image object
86
+ image = Base64Image.from_url_or_path("example.jpg")
87
+ # image = Base64Image.from_url_or_path("https://example.com/image.jpg")
88
+ assert image is not None, "Failed to load image"
89
+
90
+ # Alternatively, load an image from bytes
91
+ # with open("example.jpg", "rb") as f:
92
+ # image = Base64Image.from_bytes(f.read(), ext="jpeg")
93
+
94
+ message = HumanMessage(["Describe the image", image.data_uri_content])
95
+ response: str = chatterer([message])
96
+ print(response)
97
+ ```
98
+
99
+ ---
100
+
101
+ ## Structured Output with Pydantic
102
+
103
+ Define a Pydantic model and get typed responses:
104
+
105
+ ```python
106
+ from pydantic import BaseModel
107
+
108
+ class AnswerModel(BaseModel):
109
+ question: str
110
+ answer: str
111
+
112
+ # Call with response_model
113
+ response: AnswerModel = chatterer("What's the capital of France?", response_model=AnswerModel)
114
+ print(response.question, response.answer)
115
+ ```
116
+
117
+ ---
118
+
119
+ ## Async Example
120
+
121
+ Use asynchronous generation for non-blocking operations:
122
+
123
+ ```python
124
+ import asyncio
125
+
126
+ async def main():
127
+ response = await chatterer.agenerate("Explain async in Python briefly.")
128
+ print(response)
129
+
130
+ asyncio.run(main())
131
+ ```
132
+
133
+ ---
134
+
135
+ ## Streaming Structured Outputs
136
+
137
+ Stream structured responses in real-time:
138
+
139
+ ```python
140
+ from pydantic import BaseModel
141
+
142
+ class AnswerModel(BaseModel):
143
+ text: str
144
+
145
+ chatterer = Chatterer.openai()
146
+ for chunk in chatterer.generate_pydantic_stream(AnswerModel, "Tell me a story"):
147
+ print(chunk.text)
148
+ ```
149
+
150
+ Asynchronous version:
151
+ ```python
152
+ import asyncio
153
+
154
+ async def main():
155
+ async for chunk in chatterer.agenerate_pydantic_stream(AnswerModel, "Tell me a story"):
156
+ print(chunk.text)
157
+
158
+ asyncio.run(main())
159
+ ```
160
+
161
+ ---
162
+
163
+ ## Image Description
164
+
165
+ Generate descriptions for images using the language model:
166
+
167
+ ```python
168
+ description = chatterer.describe_image("https://example.com/image.jpg")
169
+ print(description)
170
+
171
+ # Customize the instruction
172
+ description = chatterer.describe_image("https://example.com/image.jpg", instruction="Describe the main objects in the image.")
173
+ ```
174
+
175
+ An asynchronous version is also available:
176
+
177
+ ```python
178
+ async def main():
179
+ description = await chatterer.adescribe_image("https://example.com/image.jpg")
180
+ print(description)
181
+
182
+ asyncio.run(main())
183
+ ```
184
+
185
+ ---
186
+
187
+ ## Code Execution
188
+
189
+ Generate and execute Python code dynamically:
190
+
191
+ ```python
192
+ result = chatterer.invoke_code_execution("Write a function to calculate factorial.")
193
+ print(result.code)
194
+ print(result.output)
195
+ ```
196
+
197
+ An asynchronous version exists as well:
198
+
199
+ ```python
200
+ async def main():
201
+ result = await chatterer.ainvoke_code_execution("Write a function to calculate factorial.")
202
+ print(result.output)
203
+
204
+ asyncio.run(main())
205
+ ```
206
+
207
+ ---
208
+
209
+ ## Webpage to Markdown
210
+
211
+ Convert webpages to Markdown, optionally filtering content with the language model:
212
+
213
+ ```python
214
+ from chatterer.tools.webpage_to_markdown import PlayWrightBot
215
+
216
+ with PlayWrightBot() as bot:
217
+ # Basic conversion
218
+ markdown = bot.url_to_md("https://example.com")
219
+ print(markdown)
220
+
221
+ # With LLM filtering and image descriptions
222
+ filtered_md = bot.url_to_md_with_llm("https://example.com", describe_images=True)
223
+ print(filtered_md)
224
+ ```
225
+
226
+ Asynchronous version:
227
+ ```python
228
+ import asyncio
229
+
230
+ async def main():
231
+ async with PlayWrightBot() as bot:
232
+ markdown = await bot.aurl_to_md_with_llm("https://example.com")
233
+ print(markdown)
234
+
235
+ asyncio.run(main())
236
+ ```
237
+
238
+ Extract specific elements:
239
+ ```python
240
+ with PlayWrightBot() as bot:
241
+ headings = bot.select_and_extract("https://example.com", "h2")
242
+ print(headings)
243
+ ```
244
+
245
+ ---
246
+
247
+ ## Citation Chunking
248
+
249
+ Chunk documents into semantic sections with citations:
250
+
251
+ ```python
252
+ from chatterer import Chatterer
253
+ from chatterer.tools import citation_chunker
254
+
255
+ chatterer = Chatterer.openai()
256
+ document = "Long text about quantum computing..."
257
+ chunks = citation_chunker(document, chatterer, global_coverage_threshold=0.9)
258
+ for chunk in chunks:
259
+ print(f"Subject: {chunk.name}")
260
+ for source, matches in chunk.references.items():
261
+ print(f" Source: {source}, Matches: {matches}")
262
+ ```
263
+
264
+ ---
265
+
266
+ ## Interactive Shell
267
+
268
+ Engage in a conversational AI session with code execution support:
269
+
270
+ ```python
271
+ from chatterer import interactive_shell
272
+
273
+ interactive_shell()
274
+ ```
275
+
276
+ This launches an interactive session where you can chat with the AI and execute code snippets. Type `quit` or `exit` to end the session.
277
+
278
+ ---
279
+
280
+ ## Atom-of-Thought Pipeline (AoT)
281
+
282
+ `AoTPipeline` provides structured reasoning inspired by the [Atom-of-Thought](https://github.com/qixucen/atom) approach. It decomposes complex questions recursively, generates answers, and combines them via an ensemble process.
283
+
284
+ ### AoT Usage Example
285
+
286
+ ```python
287
+ from chatterer import Chatterer
288
+ from chatterer.strategies import AoTStrategy, AoTPipeline
289
+
290
+ pipeline = AoTPipeline(chatterer=Chatterer.openai(), max_depth=2)
291
+ strategy = AoTStrategy(pipeline=pipeline)
292
+
293
+ question = "What would Newton discover if hit by an apple falling from 100 meters?"
294
+ answer = strategy.invoke(question)
295
+ print(answer)
296
+
297
+ # Generate and inspect reasoning graph
298
+ graph = strategy.get_reasoning_graph()
299
+ print(f"Graph: {len(graph.nodes)} nodes, {len(graph.relationships)} relationships")
300
+ ```
301
+
302
+ **Note**: The AoT pipeline includes an optional feature to generate a reasoning graph, which can be stored in Neo4j for visualization and analysis. Install `neo4j_extension` and set up a Neo4j instance to use this feature:
303
+
304
+ ```python
305
+ from neo4j_extension import Neo4jConnection
306
+ with Neo4jConnection() as conn:
307
+ conn.upsert_graph(graph)
308
+ ```
309
+
310
+ ---
311
+
312
+ ## Supported Models
313
+
314
+ Chatterer supports multiple language models, easily initialized as follows:
315
+
316
+ - **OpenAI**
317
+ - **Anthropic**
318
+ - **Google Gemini**
319
+ - **Ollama** (local models)
320
+
321
+ ```python
322
+ openai_chatterer = Chatterer.openai("gpt-4o-mini")
323
+ anthropic_chatterer = Chatterer.anthropic("claude-3-7-sonnet-20250219")
324
+ gemini_chatterer = Chatterer.google("gemini-2.0-flash")
325
+ ollama_chatterer = Chatterer.ollama("deepseek-r1:1.5b")
326
+ ```
327
+
328
+ ---
329
+
330
+ ## Advanced Features
331
+
332
+ - **Streaming Responses**: Use `generate_stream` or `agenerate_stream` for real-time output.
333
+ - **Streaming Structured Outputs**: Stream Pydantic-typed responses with `generate_pydantic_stream` or `agenerate_pydantic_stream`.
334
+ - **Async/Await Support**: All methods have asynchronous counterparts (e.g., `agenerate`, `adescribe_image`).
335
+ - **Structured Outputs**: Leverage Pydantic models for typed responses.
336
+ - **Image Description**: Generate descriptions for images with `describe_image`.
337
+ - **Code Execution**: Dynamically generate and execute Python code with `invoke_code_execution`.
338
+ - **Webpage to Markdown**: Convert webpages to Markdown with `PlayWrightBot`, including JavaScript rendering, element extraction, and LLM-based content filtering.
339
+ - **Citation Chunking**: Semantically chunk documents and extract citations with `citation_chunker`, including coverage analysis.
340
+ - **Interactive Shell**: Use `interactive_shell` for conversational AI with code execution.
341
+ - **Token Counting**: Retrieve input/output token counts with `get_num_tokens_from_message`.
342
+ - **Utilities**: Tools for content processing (e.g., `html_to_markdown`, `pdf_to_text`, `get_youtube_video_subtitle`, `citation_chunker`) are available in the `tools` module.
343
+
344
+ ```python
345
+ # Example: Convert PDF to text
346
+ from chatterer.tools import pdf_to_text
347
+ text = pdf_to_text("example.pdf")
348
+ print(text)
349
+
350
+ # Example: Get YouTube subtitles
351
+ from chatterer.tools import get_youtube_video_subtitle
352
+ subtitles = get_youtube_video_subtitle("https://www.youtube.com/watch?v=example")
353
+ print(subtitles)
354
+
355
+ # Example: Get token counts
356
+ from chatterer.messages import HumanMessage
357
+ msg = HumanMessage(content="Hello, world!")
358
+ tokens = chatterer.get_num_tokens_from_message(msg)
359
+ if tokens:
360
+ input_tokens, output_tokens = tokens
361
+ print(f"Input: {input_tokens}, Output: {output_tokens}")
362
+ ```
363
+
364
+ ---
365
+
366
+ ## Logging
367
+
368
+ Enable debugging with basic logging:
369
+
370
+ ```python
371
+ import logging
372
+ logging.basicConfig(level=logging.DEBUG)
373
+ ```
374
+
375
+ The AoT pipeline uses a custom color-coded logger for detailed step-by-step output.
376
+
377
+ ---
378
+
379
+ ## Contributing
380
+
381
+ We welcome contributions! Feel free to open an issue or submit a pull request on the repository.
382
+
383
+ ---
384
+
385
+ ## License
386
+
387
+ MIT License