chatterer 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {chatterer-0.1.4 → chatterer-0.1.6}/PKG-INFO +21 -5
  2. chatterer-0.1.6/chatterer/__init__.py +27 -0
  3. chatterer-0.1.6/chatterer/language_model.py +327 -0
  4. chatterer-0.1.6/chatterer/py.typed +0 -0
  5. chatterer-0.1.6/chatterer/strategies/__init__.py +13 -0
  6. chatterer-0.1.6/chatterer/strategies/atom_of_thoughts.py +976 -0
  7. chatterer-0.1.6/chatterer/tools/__init__.py +15 -0
  8. chatterer-0.1.6/chatterer/tools/convert_to_text.py +464 -0
  9. chatterer-0.1.6/chatterer/tools/webpage_to_markdown/__init__.py +4 -0
  10. chatterer-0.1.6/chatterer/tools/webpage_to_markdown/playwright_bot.py +631 -0
  11. chatterer-0.1.6/chatterer/tools/webpage_to_markdown/utils.py +556 -0
  12. {chatterer-0.1.4 → chatterer-0.1.6}/chatterer.egg-info/PKG-INFO +21 -5
  13. {chatterer-0.1.4 → chatterer-0.1.6}/chatterer.egg-info/SOURCES.txt +7 -1
  14. chatterer-0.1.6/chatterer.egg-info/requires.txt +27 -0
  15. chatterer-0.1.6/pyproject.toml +30 -0
  16. chatterer-0.1.4/chatterer/__init__.py +0 -21
  17. chatterer-0.1.4/chatterer/language_model.py +0 -608
  18. chatterer-0.1.4/chatterer/strategies/__init__.py +0 -19
  19. chatterer-0.1.4/chatterer/strategies/atom_of_thoughts.py +0 -594
  20. chatterer-0.1.4/chatterer.egg-info/requires.txt +0 -8
  21. chatterer-0.1.4/pyproject.toml +0 -15
  22. {chatterer-0.1.4 → chatterer-0.1.6}/README.md +0 -0
  23. {chatterer-0.1.4 → chatterer-0.1.6}/chatterer/strategies/base.py +0 -0
  24. {chatterer-0.1.4 → chatterer-0.1.6}/chatterer.egg-info/dependency_links.txt +0 -0
  25. {chatterer-0.1.4 → chatterer-0.1.6}/chatterer.egg-info/top_level.txt +0 -0
  26. {chatterer-0.1.4 → chatterer-0.1.6}/setup.cfg +0 -0
@@ -1,16 +1,32 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: chatterer
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
7
7
  Requires-Dist: instructor>=1.7.2
8
8
  Requires-Dist: langchain>=0.3.19
9
+ Provides-Extra: dev
10
+ Requires-Dist: neo4j-extension>=0.1.14; extra == "dev"
11
+ Requires-Dist: colorama>=0.4.6; extra == "dev"
12
+ Requires-Dist: ipykernel>=6.29.5; extra == "dev"
13
+ Provides-Extra: conversion
14
+ Requires-Dist: markdownify>=1.1.0; extra == "conversion"
15
+ Requires-Dist: commonmark>=0.9.1; extra == "conversion"
16
+ Requires-Dist: playwright>=1.50.0; extra == "conversion"
17
+ Requires-Dist: pillow>=11.1.0; extra == "conversion"
18
+ Requires-Dist: mistune>=3.1.2; extra == "conversion"
19
+ Requires-Dist: markitdown>=0.0.2; extra == "conversion"
20
+ Requires-Dist: pymupdf>=1.25.4; extra == "conversion"
21
+ Provides-Extra: langchain-providers
22
+ Requires-Dist: langchain-openai>=0.3.7; extra == "langchain-providers"
23
+ Requires-Dist: langchain-anthropic>=0.3.8; extra == "langchain-providers"
24
+ Requires-Dist: langchain-google-genai>=2.0.10; extra == "langchain-providers"
25
+ Requires-Dist: langchain-ollama>=0.2.3; extra == "langchain-providers"
9
26
  Provides-Extra: all
10
- Requires-Dist: langchain-openai>=0.3.7; extra == "all"
11
- Requires-Dist: langchain-anthropic>=0.3.8; extra == "all"
12
- Requires-Dist: langchain-google-genai>=2.0.10; extra == "all"
13
- Requires-Dist: langchain-ollama>=0.2.3; extra == "all"
27
+ Requires-Dist: chatterer[langchain-providers]; extra == "all"
28
+ Requires-Dist: chatterer[conversion]; extra == "all"
29
+ Requires-Dist: chatterer[dev]; extra == "all"
14
30
 
15
31
  # Chatterer
16
32
 
@@ -0,0 +1,27 @@
1
+ from .language_model import Chatterer
2
+ from .strategies import (
3
+ AoTPipeline,
4
+ AoTPrompter,
5
+ AoTStrategy,
6
+ BaseStrategy,
7
+ )
8
+ from .tools import (
9
+ anything_to_markdown,
10
+ get_default_html_to_markdown_options,
11
+ html_to_markdown,
12
+ pdf_to_text,
13
+ pyscripts_to_snippets,
14
+ )
15
+
16
+ __all__ = [
17
+ "BaseStrategy",
18
+ "Chatterer",
19
+ "AoTStrategy",
20
+ "AoTPipeline",
21
+ "AoTPrompter",
22
+ "html_to_markdown",
23
+ "anything_to_markdown",
24
+ "pdf_to_text",
25
+ "get_default_html_to_markdown_options",
26
+ "pyscripts_to_snippets",
27
+ ]
@@ -0,0 +1,327 @@
1
+ from typing import (
2
+ TYPE_CHECKING,
3
+ Any,
4
+ AsyncIterator,
5
+ Iterator,
6
+ Optional,
7
+ Self,
8
+ Type,
9
+ TypeAlias,
10
+ TypeVar,
11
+ )
12
+
13
+ from langchain_core.language_models.base import LanguageModelInput
14
+ from langchain_core.language_models.chat_models import BaseChatModel
15
+ from langchain_core.messages import HumanMessage
16
+ from langchain_core.runnables.base import Runnable
17
+ from langchain_core.runnables.config import RunnableConfig
18
+ from pydantic import BaseModel, Field
19
+
20
+ if TYPE_CHECKING:
21
+ from instructor import Partial
22
+
23
+ PydanticModelT = TypeVar("PydanticModelT", bound=BaseModel)
24
+ StructuredOutputType: TypeAlias = dict[object, object] | BaseModel
25
+
26
+ DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION = "Just describe all the details you see in the image in few sentences."
27
+
28
+
29
+ class Chatterer(BaseModel):
30
+ """Language model for generating text from a given input."""
31
+
32
+ client: BaseChatModel
33
+ structured_output_kwargs: dict[str, Any] = Field(default_factory=dict)
34
+
35
+ def __call__(self, messages: LanguageModelInput) -> str:
36
+ """
37
+ Generate text from the given input messages.
38
+
39
+ Args:
40
+ messages (LanguageModelInput): Input messages for the language model.
41
+ Can be one of the following types:
42
+ - str: A single string message.
43
+ - list[dict[str, str]]: A list of dictionaries with 'role' and 'content' keys.
44
+ - tuple[str, str]: A tuple of strings representing the role and content of a single message.
45
+ - list[BaseMessage]: A list of BaseMessage objects. (BaseMessage is a Pydantic model; e.g. can import AIMessage, HumanMessage, SystemMessage from langchain_core.messages)
46
+ """
47
+ return self.generate(messages)
48
+
49
+ @classmethod
50
+ def openai(
51
+ cls,
52
+ model: str = "gpt-4o-mini",
53
+ structured_output_kwargs: Optional[dict[str, Any]] = {"strict": True},
54
+ ) -> Self:
55
+ from langchain_openai import ChatOpenAI
56
+
57
+ return cls(client=ChatOpenAI(model=model), structured_output_kwargs=structured_output_kwargs or {})
58
+
59
+ @classmethod
60
+ def anthropic(
61
+ cls,
62
+ model_name: str = "claude-3-7-sonnet-20250219",
63
+ structured_output_kwargs: Optional[dict[str, Any]] = None,
64
+ ) -> Self:
65
+ from langchain_anthropic import ChatAnthropic
66
+
67
+ return cls(
68
+ client=ChatAnthropic(model_name=model_name, timeout=None, stop=None),
69
+ structured_output_kwargs=structured_output_kwargs or {},
70
+ )
71
+
72
+ @classmethod
73
+ def google(
74
+ cls,
75
+ model: str = "gemini-2.0-flash",
76
+ structured_output_kwargs: Optional[dict[str, Any]] = None,
77
+ ) -> Self:
78
+ from langchain_google_genai import ChatGoogleGenerativeAI
79
+
80
+ return cls(
81
+ client=ChatGoogleGenerativeAI(model=model),
82
+ structured_output_kwargs=structured_output_kwargs or {},
83
+ )
84
+
85
+ @classmethod
86
+ def ollama(
87
+ cls,
88
+ model: str = "deepseek-r1:1.5b",
89
+ structured_output_kwargs: Optional[dict[str, Any]] = None,
90
+ ) -> Self:
91
+ from langchain_ollama import ChatOllama
92
+
93
+ return cls(
94
+ client=ChatOllama(model=model),
95
+ structured_output_kwargs=structured_output_kwargs or {},
96
+ )
97
+
98
+ def generate(
99
+ self,
100
+ messages: LanguageModelInput,
101
+ config: Optional[RunnableConfig] = None,
102
+ stop: Optional[list[str]] = None,
103
+ **kwargs: Any,
104
+ ) -> str:
105
+ return self.client.invoke(input=messages, config=config, stop=stop, **kwargs).text()
106
+
107
+ async def agenerate(
108
+ self,
109
+ messages: LanguageModelInput,
110
+ config: Optional[RunnableConfig] = None,
111
+ stop: Optional[list[str]] = None,
112
+ **kwargs: Any,
113
+ ) -> str:
114
+ return (await self.client.ainvoke(input=messages, config=config, stop=stop, **kwargs)).text()
115
+
116
+ def generate_stream(
117
+ self,
118
+ messages: LanguageModelInput,
119
+ config: Optional[RunnableConfig] = None,
120
+ stop: Optional[list[str]] = None,
121
+ **kwargs: Any,
122
+ ) -> Iterator[str]:
123
+ for chunk in self.client.stream(input=messages, config=config, stop=stop, **kwargs):
124
+ yield chunk.text()
125
+
126
+ async def agenerate_stream(
127
+ self,
128
+ messages: LanguageModelInput,
129
+ config: Optional[RunnableConfig] = None,
130
+ stop: Optional[list[str]] = None,
131
+ **kwargs: Any,
132
+ ) -> AsyncIterator[str]:
133
+ async for chunk in self.client.astream(input=messages, config=config, stop=stop, **kwargs):
134
+ yield chunk.text()
135
+
136
+ def generate_pydantic(
137
+ self,
138
+ response_model: Type[PydanticModelT],
139
+ messages: LanguageModelInput,
140
+ config: Optional[RunnableConfig] = None,
141
+ stop: Optional[list[str]] = None,
142
+ **kwargs: Any,
143
+ ) -> PydanticModelT:
144
+ result: StructuredOutputType = with_structured_output(
145
+ client=self.client,
146
+ response_model=response_model,
147
+ structured_output_kwargs=self.structured_output_kwargs,
148
+ ).invoke(input=messages, config=config, stop=stop, **kwargs)
149
+ if isinstance(result, response_model):
150
+ return result
151
+ else:
152
+ return response_model.model_validate(result)
153
+
154
+ async def agenerate_pydantic(
155
+ self,
156
+ response_model: Type[PydanticModelT],
157
+ messages: LanguageModelInput,
158
+ config: Optional[RunnableConfig] = None,
159
+ stop: Optional[list[str]] = None,
160
+ **kwargs: Any,
161
+ ) -> PydanticModelT:
162
+ result: StructuredOutputType = await with_structured_output(
163
+ client=self.client,
164
+ response_model=response_model,
165
+ structured_output_kwargs=self.structured_output_kwargs,
166
+ ).ainvoke(input=messages, config=config, stop=stop, **kwargs)
167
+ if isinstance(result, response_model):
168
+ return result
169
+ else:
170
+ return response_model.model_validate(result)
171
+
172
+ def generate_pydantic_stream(
173
+ self,
174
+ response_model: Type[PydanticModelT],
175
+ messages: LanguageModelInput,
176
+ config: Optional[RunnableConfig] = None,
177
+ stop: Optional[list[str]] = None,
178
+ **kwargs: Any,
179
+ ) -> Iterator[PydanticModelT]:
180
+ try:
181
+ import instructor
182
+ except ImportError:
183
+ raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
184
+
185
+ partial_response_model = instructor.Partial[response_model]
186
+ for chunk in with_structured_output(
187
+ client=self.client,
188
+ response_model=partial_response_model,
189
+ structured_output_kwargs=self.structured_output_kwargs,
190
+ ).stream(input=messages, config=config, stop=stop, **kwargs):
191
+ yield response_model.model_validate(chunk)
192
+
193
+ async def agenerate_pydantic_stream(
194
+ self,
195
+ response_model: Type[PydanticModelT],
196
+ messages: LanguageModelInput,
197
+ config: Optional[RunnableConfig] = None,
198
+ stop: Optional[list[str]] = None,
199
+ **kwargs: Any,
200
+ ) -> AsyncIterator[PydanticModelT]:
201
+ try:
202
+ import instructor
203
+ except ImportError:
204
+ raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
205
+
206
+ partial_response_model = instructor.Partial[response_model]
207
+ async for chunk in with_structured_output(
208
+ client=self.client,
209
+ response_model=partial_response_model,
210
+ structured_output_kwargs=self.structured_output_kwargs,
211
+ ).astream(input=messages, config=config, stop=stop, **kwargs):
212
+ yield response_model.model_validate(chunk)
213
+
214
+ def describe_image(self, image_url: str, instruction: str = DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION) -> str:
215
+ """
216
+ Create a detailed description of an image using the Vision Language Model.
217
+ - image_url: Image URL to describe
218
+ """
219
+ return self.generate([
220
+ HumanMessage(
221
+ content=[{"type": "text", "text": instruction}, {"type": "image_url", "image_url": {"url": image_url}}],
222
+ )
223
+ ])
224
+
225
+ async def adescribe_image(self, image_url: str, instruction: str = DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION) -> str:
226
+ """
227
+ Create a detailed description of an image using the Vision Language Model asynchronously.
228
+ - image_url: Image URL to describe
229
+ """
230
+ return await self.agenerate([
231
+ HumanMessage(
232
+ content=[{"type": "text", "text": instruction}, {"type": "image_url", "image_url": {"url": image_url}}],
233
+ )
234
+ ])
235
+
236
+
237
+ def with_structured_output(
238
+ client: BaseChatModel,
239
+ response_model: Type["PydanticModelT | Partial[PydanticModelT]"],
240
+ structured_output_kwargs: dict[str, Any],
241
+ ) -> Runnable[LanguageModelInput, dict[object, object] | BaseModel]:
242
+ return client.with_structured_output(schema=response_model, **structured_output_kwargs) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
243
+
244
+
245
+ if __name__ == "__main__":
246
+ import asyncio
247
+
248
+ # 테스트용 Pydantic 모델 정의
249
+ class Propositions(BaseModel):
250
+ proposition_topic: str
251
+ proposition_content: str
252
+
253
+ chatterer = Chatterer.openai()
254
+ prompt = "What is the meaning of life?"
255
+
256
+ # === Synchronous Tests ===
257
+
258
+ # 1. generate
259
+ print("=== Synchronous generate ===")
260
+ result_sync = chatterer.generate(prompt)
261
+ print("Result (generate):", result_sync)
262
+
263
+ # 2. __call__
264
+ print("\n=== Synchronous __call__ ===")
265
+ result_call = chatterer(prompt)
266
+ print("Result (__call__):", result_call)
267
+
268
+ # 3. generate_stream
269
+ print("\n=== Synchronous generate_stream ===")
270
+ for i, chunk in enumerate(chatterer.generate_stream(prompt)):
271
+ print(f"Chunk {i}:", chunk)
272
+
273
+ # 4. generate_pydantic
274
+ print("\n=== Synchronous generate_pydantic ===")
275
+ try:
276
+ result_pydantic = chatterer.generate_pydantic(Propositions, prompt)
277
+ print("Result (generate_pydantic):", result_pydantic)
278
+ except Exception as e:
279
+ print("Error in generate_pydantic:", e)
280
+
281
+ # 5. generate_pydantic_stream
282
+ print("\n=== Synchronous generate_pydantic_stream ===")
283
+ try:
284
+ for i, chunk in enumerate(chatterer.generate_pydantic_stream(Propositions, prompt)):
285
+ print(f"Pydantic Chunk {i}:", chunk)
286
+ except Exception as e:
287
+ print("Error in generate_pydantic_stream:", e)
288
+
289
+ # === Asynchronous Tests ===
290
+
291
+ # Async helper function to enumerate async iterator
292
+ async def async_enumerate(aiter: AsyncIterator[Any], start: int = 0) -> AsyncIterator[tuple[int, Any]]:
293
+ i = start
294
+ async for item in aiter:
295
+ yield i, item
296
+ i += 1
297
+
298
+ async def run_async_tests():
299
+ # 6. agenerate
300
+ print("\n=== Asynchronous agenerate ===")
301
+ result_async = await chatterer.agenerate(prompt)
302
+ print("Result (agenerate):", result_async)
303
+
304
+ # 7. agenerate_stream
305
+ print("\n=== Asynchronous agenerate_stream ===")
306
+ async for i, chunk in async_enumerate(chatterer.agenerate_stream(prompt)):
307
+ print(f"Async Chunk {i}:", chunk)
308
+
309
+ # 8. agenerate_pydantic
310
+ print("\n=== Asynchronous agenerate_pydantic ===")
311
+ try:
312
+ result_async_pydantic = await chatterer.agenerate_pydantic(Propositions, prompt)
313
+ print("Result (agenerate_pydantic):", result_async_pydantic)
314
+ except Exception as e:
315
+ print("Error in agenerate_pydantic:", e)
316
+
317
+ # 9. agenerate_pydantic_stream
318
+ print("\n=== Asynchronous agenerate_pydantic_stream ===")
319
+ try:
320
+ i = 0
321
+ async for chunk in chatterer.agenerate_pydantic_stream(Propositions, prompt):
322
+ print(f"Async Pydantic Chunk {i}:", chunk)
323
+ i += 1
324
+ except Exception as e:
325
+ print("Error in agenerate_pydantic_stream:", e)
326
+
327
+ asyncio.run(run_async_tests())
File without changes
@@ -0,0 +1,13 @@
1
+ from .atom_of_thoughts import (
2
+ AoTPipeline,
3
+ AoTStrategy,
4
+ AoTPrompter,
5
+ )
6
+ from .base import BaseStrategy
7
+
8
+ __all__ = [
9
+ "BaseStrategy",
10
+ "AoTPipeline",
11
+ "AoTPrompter",
12
+ "AoTStrategy",
13
+ ]