chatterer 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/__init__.py +39 -39
- chatterer/language_model.py +371 -371
- chatterer/messages.py +8 -8
- chatterer/strategies/__init__.py +13 -13
- chatterer/strategies/atom_of_thoughts.py +975 -975
- chatterer/strategies/base.py +14 -14
- chatterer/tools/__init__.py +17 -17
- chatterer/tools/citation_chunking/__init__.py +3 -3
- chatterer/tools/citation_chunking/chunks.py +53 -53
- chatterer/tools/citation_chunking/citation_chunker.py +118 -118
- chatterer/tools/citation_chunking/citations.py +285 -285
- chatterer/tools/citation_chunking/prompt.py +157 -157
- chatterer/tools/citation_chunking/reference.py +26 -26
- chatterer/tools/citation_chunking/utils.py +138 -138
- chatterer/tools/convert_to_text.py +463 -466
- chatterer/tools/webpage_to_markdown/__init__.py +4 -4
- chatterer/tools/webpage_to_markdown/playwright_bot.py +649 -649
- chatterer/tools/webpage_to_markdown/utils.py +329 -329
- chatterer/utils/image.py +288 -284
- {chatterer-0.1.7.dist-info → chatterer-0.1.8.dist-info}/METADATA +166 -166
- chatterer-0.1.8.dist-info/RECORD +24 -0
- {chatterer-0.1.7.dist-info → chatterer-0.1.8.dist-info}/WHEEL +1 -1
- chatterer-0.1.7.dist-info/RECORD +0 -24
- {chatterer-0.1.7.dist-info → chatterer-0.1.8.dist-info}/top_level.txt +0 -0
chatterer/language_model.py
CHANGED
@@ -1,371 +1,371 @@
|
|
1
|
-
from typing import (
|
2
|
-
TYPE_CHECKING,
|
3
|
-
Any,
|
4
|
-
AsyncIterator,
|
5
|
-
Iterator,
|
6
|
-
Optional,
|
7
|
-
Self,
|
8
|
-
Type,
|
9
|
-
TypeAlias,
|
10
|
-
TypeVar,
|
11
|
-
cast,
|
12
|
-
overload,
|
13
|
-
)
|
14
|
-
|
15
|
-
from langchain_core.language_models.base import LanguageModelInput
|
16
|
-
from langchain_core.language_models.chat_models import BaseChatModel
|
17
|
-
from langchain_core.runnables.base import Runnable
|
18
|
-
from langchain_core.runnables.config import RunnableConfig
|
19
|
-
from pydantic import BaseModel, Field
|
20
|
-
|
21
|
-
from .messages import AIMessage, BaseMessage, HumanMessage
|
22
|
-
|
23
|
-
if TYPE_CHECKING:
|
24
|
-
from instructor import Partial
|
25
|
-
|
26
|
-
PydanticModelT = TypeVar("PydanticModelT", bound=BaseModel)
|
27
|
-
StructuredOutputType: TypeAlias = dict[object, object] | BaseModel
|
28
|
-
|
29
|
-
DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION = "Just describe all the details you see in the image in few sentences."
|
30
|
-
|
31
|
-
|
32
|
-
class Chatterer(BaseModel):
|
33
|
-
"""Language model for generating text from a given input."""
|
34
|
-
|
35
|
-
client: BaseChatModel
|
36
|
-
structured_output_kwargs: dict[str, Any] = Field(default_factory=dict)
|
37
|
-
|
38
|
-
@overload
|
39
|
-
def __call__(
|
40
|
-
self,
|
41
|
-
messages: LanguageModelInput,
|
42
|
-
response_model: Type[PydanticModelT],
|
43
|
-
config: Optional[RunnableConfig] = None,
|
44
|
-
stop: Optional[list[str]] = None,
|
45
|
-
**kwargs: Any,
|
46
|
-
) -> PydanticModelT: ...
|
47
|
-
|
48
|
-
@overload
|
49
|
-
def __call__(
|
50
|
-
self,
|
51
|
-
messages: LanguageModelInput,
|
52
|
-
response_model: None = None,
|
53
|
-
config: Optional[RunnableConfig] = None,
|
54
|
-
stop: Optional[list[str]] = None,
|
55
|
-
**kwargs: Any,
|
56
|
-
) -> str: ...
|
57
|
-
|
58
|
-
def __call__(
|
59
|
-
self,
|
60
|
-
messages: LanguageModelInput,
|
61
|
-
response_model: Optional[Type[PydanticModelT]] = None,
|
62
|
-
config: Optional[RunnableConfig] = None,
|
63
|
-
stop: Optional[list[str]] = None,
|
64
|
-
**kwargs: Any,
|
65
|
-
) -> str | PydanticModelT:
|
66
|
-
if response_model:
|
67
|
-
return self.generate_pydantic(response_model, messages, config, stop, **kwargs)
|
68
|
-
return self.client.invoke(input=messages, config=config, stop=stop, **kwargs).text()
|
69
|
-
|
70
|
-
@classmethod
|
71
|
-
def openai(
|
72
|
-
cls,
|
73
|
-
model: str = "gpt-4o-mini",
|
74
|
-
structured_output_kwargs: Optional[dict[str, Any]] = {"strict": True},
|
75
|
-
) -> Self:
|
76
|
-
from langchain_openai import ChatOpenAI
|
77
|
-
|
78
|
-
return cls(client=ChatOpenAI(model=model), structured_output_kwargs=structured_output_kwargs or {})
|
79
|
-
|
80
|
-
@classmethod
|
81
|
-
def anthropic(
|
82
|
-
cls,
|
83
|
-
model_name: str = "claude-3-7-sonnet-20250219",
|
84
|
-
structured_output_kwargs: Optional[dict[str, Any]] = None,
|
85
|
-
) -> Self:
|
86
|
-
from langchain_anthropic import ChatAnthropic
|
87
|
-
|
88
|
-
return cls(
|
89
|
-
client=ChatAnthropic(model_name=model_name, timeout=None, stop=None),
|
90
|
-
structured_output_kwargs=structured_output_kwargs or {},
|
91
|
-
)
|
92
|
-
|
93
|
-
@classmethod
|
94
|
-
def google(
|
95
|
-
cls,
|
96
|
-
model: str = "gemini-2.0-flash",
|
97
|
-
structured_output_kwargs: Optional[dict[str, Any]] = None,
|
98
|
-
) -> Self:
|
99
|
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
100
|
-
|
101
|
-
return cls(
|
102
|
-
client=ChatGoogleGenerativeAI(model=model),
|
103
|
-
structured_output_kwargs=structured_output_kwargs or {},
|
104
|
-
)
|
105
|
-
|
106
|
-
@classmethod
|
107
|
-
def ollama(
|
108
|
-
cls,
|
109
|
-
model: str = "deepseek-r1:1.5b",
|
110
|
-
structured_output_kwargs: Optional[dict[str, Any]] = None,
|
111
|
-
) -> Self:
|
112
|
-
from langchain_ollama import ChatOllama
|
113
|
-
|
114
|
-
return cls(
|
115
|
-
client=ChatOllama(model=model),
|
116
|
-
structured_output_kwargs=structured_output_kwargs or {},
|
117
|
-
)
|
118
|
-
|
119
|
-
def generate(
|
120
|
-
self,
|
121
|
-
messages: LanguageModelInput,
|
122
|
-
config: Optional[RunnableConfig] = None,
|
123
|
-
stop: Optional[list[str]] = None,
|
124
|
-
**kwargs: Any,
|
125
|
-
) -> str:
|
126
|
-
return self.client.invoke(input=messages, config=config, stop=stop, **kwargs).text()
|
127
|
-
|
128
|
-
async def agenerate(
|
129
|
-
self,
|
130
|
-
messages: LanguageModelInput,
|
131
|
-
config: Optional[RunnableConfig] = None,
|
132
|
-
stop: Optional[list[str]] = None,
|
133
|
-
**kwargs: Any,
|
134
|
-
) -> str:
|
135
|
-
return (await self.client.ainvoke(input=messages, config=config, stop=stop, **kwargs)).text()
|
136
|
-
|
137
|
-
def generate_stream(
|
138
|
-
self,
|
139
|
-
messages: LanguageModelInput,
|
140
|
-
config: Optional[RunnableConfig] = None,
|
141
|
-
stop: Optional[list[str]] = None,
|
142
|
-
**kwargs: Any,
|
143
|
-
) -> Iterator[str]:
|
144
|
-
for chunk in self.client.stream(input=messages, config=config, stop=stop, **kwargs):
|
145
|
-
yield chunk.text()
|
146
|
-
|
147
|
-
async def agenerate_stream(
|
148
|
-
self,
|
149
|
-
messages: LanguageModelInput,
|
150
|
-
config: Optional[RunnableConfig] = None,
|
151
|
-
stop: Optional[list[str]] = None,
|
152
|
-
**kwargs: Any,
|
153
|
-
) -> AsyncIterator[str]:
|
154
|
-
async for chunk in self.client.astream(input=messages, config=config, stop=stop, **kwargs):
|
155
|
-
yield chunk.text()
|
156
|
-
|
157
|
-
def generate_pydantic(
|
158
|
-
self,
|
159
|
-
response_model: Type[PydanticModelT],
|
160
|
-
messages: LanguageModelInput,
|
161
|
-
config: Optional[RunnableConfig] = None,
|
162
|
-
stop: Optional[list[str]] = None,
|
163
|
-
**kwargs: Any,
|
164
|
-
) -> PydanticModelT:
|
165
|
-
result: StructuredOutputType = with_structured_output(
|
166
|
-
client=self.client,
|
167
|
-
response_model=response_model,
|
168
|
-
structured_output_kwargs=self.structured_output_kwargs,
|
169
|
-
).invoke(input=messages, config=config, stop=stop, **kwargs)
|
170
|
-
if isinstance(result, response_model):
|
171
|
-
return result
|
172
|
-
else:
|
173
|
-
return response_model.model_validate(result)
|
174
|
-
|
175
|
-
async def agenerate_pydantic(
|
176
|
-
self,
|
177
|
-
response_model: Type[PydanticModelT],
|
178
|
-
messages: LanguageModelInput,
|
179
|
-
config: Optional[RunnableConfig] = None,
|
180
|
-
stop: Optional[list[str]] = None,
|
181
|
-
**kwargs: Any,
|
182
|
-
) -> PydanticModelT:
|
183
|
-
result: StructuredOutputType = await with_structured_output(
|
184
|
-
client=self.client,
|
185
|
-
response_model=response_model,
|
186
|
-
structured_output_kwargs=self.structured_output_kwargs,
|
187
|
-
).ainvoke(input=messages, config=config, stop=stop, **kwargs)
|
188
|
-
if isinstance(result, response_model):
|
189
|
-
return result
|
190
|
-
else:
|
191
|
-
return response_model.model_validate(result)
|
192
|
-
|
193
|
-
def generate_pydantic_stream(
|
194
|
-
self,
|
195
|
-
response_model: Type[PydanticModelT],
|
196
|
-
messages: LanguageModelInput,
|
197
|
-
config: Optional[RunnableConfig] = None,
|
198
|
-
stop: Optional[list[str]] = None,
|
199
|
-
**kwargs: Any,
|
200
|
-
) -> Iterator[PydanticModelT]:
|
201
|
-
try:
|
202
|
-
import instructor
|
203
|
-
except ImportError:
|
204
|
-
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
205
|
-
|
206
|
-
partial_response_model = instructor.Partial[response_model]
|
207
|
-
for chunk in with_structured_output(
|
208
|
-
client=self.client,
|
209
|
-
response_model=partial_response_model,
|
210
|
-
structured_output_kwargs=self.structured_output_kwargs,
|
211
|
-
).stream(input=messages, config=config, stop=stop, **kwargs):
|
212
|
-
yield response_model.model_validate(chunk)
|
213
|
-
|
214
|
-
async def agenerate_pydantic_stream(
|
215
|
-
self,
|
216
|
-
response_model: Type[PydanticModelT],
|
217
|
-
messages: LanguageModelInput,
|
218
|
-
config: Optional[RunnableConfig] = None,
|
219
|
-
stop: Optional[list[str]] = None,
|
220
|
-
**kwargs: Any,
|
221
|
-
) -> AsyncIterator[PydanticModelT]:
|
222
|
-
try:
|
223
|
-
import instructor
|
224
|
-
except ImportError:
|
225
|
-
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
226
|
-
|
227
|
-
partial_response_model = instructor.Partial[response_model]
|
228
|
-
async for chunk in with_structured_output(
|
229
|
-
client=self.client,
|
230
|
-
response_model=partial_response_model,
|
231
|
-
structured_output_kwargs=self.structured_output_kwargs,
|
232
|
-
).astream(input=messages, config=config, stop=stop, **kwargs):
|
233
|
-
yield response_model.model_validate(chunk)
|
234
|
-
|
235
|
-
def describe_image(self, image_url: str, instruction: str = DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION) -> str:
|
236
|
-
"""
|
237
|
-
Create a detailed description of an image using the Vision Language Model.
|
238
|
-
- image_url: Image URL to describe
|
239
|
-
"""
|
240
|
-
return self.generate([
|
241
|
-
HumanMessage(
|
242
|
-
content=[{"type": "text", "text": instruction}, {"type": "image_url", "image_url": {"url": image_url}}],
|
243
|
-
)
|
244
|
-
])
|
245
|
-
|
246
|
-
async def adescribe_image(self, image_url: str, instruction: str = DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION) -> str:
|
247
|
-
"""
|
248
|
-
Create a detailed description of an image using the Vision Language Model asynchronously.
|
249
|
-
- image_url: Image URL to describe
|
250
|
-
"""
|
251
|
-
return await self.agenerate([
|
252
|
-
HumanMessage(
|
253
|
-
content=[{"type": "text", "text": instruction}, {"type": "image_url", "image_url": {"url": image_url}}],
|
254
|
-
)
|
255
|
-
])
|
256
|
-
|
257
|
-
@staticmethod
|
258
|
-
def get_num_tokens_from_message(message: BaseMessage) -> Optional[tuple[int, int]]:
|
259
|
-
try:
|
260
|
-
if isinstance(message, AIMessage) and (usage_metadata := message.usage_metadata):
|
261
|
-
input_tokens = int(usage_metadata["input_tokens"])
|
262
|
-
output_tokens = int(usage_metadata["output_tokens"])
|
263
|
-
else:
|
264
|
-
# Dynamic extraction for unknown structures
|
265
|
-
input_tokens: Optional[int] = None
|
266
|
-
output_tokens: Optional[int] = None
|
267
|
-
|
268
|
-
def _find_tokens(obj: object) -> None:
|
269
|
-
nonlocal input_tokens, output_tokens
|
270
|
-
if isinstance(obj, dict):
|
271
|
-
for key, value in cast(dict[object, object], obj).items():
|
272
|
-
if isinstance(value, int):
|
273
|
-
if "input" in str(key) or "prompt" in str(key):
|
274
|
-
input_tokens = value
|
275
|
-
elif "output" in str(key) or "completion" in str(key):
|
276
|
-
output_tokens = value
|
277
|
-
else:
|
278
|
-
_find_tokens(value)
|
279
|
-
elif isinstance(obj, list):
|
280
|
-
for item in cast(list[object], obj):
|
281
|
-
_find_tokens(item)
|
282
|
-
|
283
|
-
_find_tokens(message.model_dump())
|
284
|
-
|
285
|
-
if input_tokens is None or output_tokens is None:
|
286
|
-
return None
|
287
|
-
return input_tokens, output_tokens
|
288
|
-
except Exception:
|
289
|
-
return None
|
290
|
-
|
291
|
-
|
292
|
-
def with_structured_output(
|
293
|
-
client: BaseChatModel,
|
294
|
-
response_model: Type["PydanticModelT | Partial[PydanticModelT]"],
|
295
|
-
structured_output_kwargs: dict[str, Any],
|
296
|
-
) -> Runnable[LanguageModelInput, dict[object, object] | BaseModel]:
|
297
|
-
return client.with_structured_output(schema=response_model, **structured_output_kwargs) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
|
298
|
-
|
299
|
-
|
300
|
-
if __name__ == "__main__":
|
301
|
-
import asyncio
|
302
|
-
|
303
|
-
# 테스트용 Pydantic 모델 정의
|
304
|
-
class Propositions(BaseModel):
|
305
|
-
proposition_topic: str
|
306
|
-
proposition_content: str
|
307
|
-
|
308
|
-
chatterer = Chatterer.openai()
|
309
|
-
prompt = "What is the meaning of life?"
|
310
|
-
|
311
|
-
# === Synchronous Tests ===
|
312
|
-
|
313
|
-
# generate
|
314
|
-
print("=== Synchronous generate ===")
|
315
|
-
result_sync = chatterer(prompt)
|
316
|
-
print("Result (generate):", result_sync)
|
317
|
-
|
318
|
-
# generate_stream
|
319
|
-
print("\n=== Synchronous generate_stream ===")
|
320
|
-
for i, chunk in enumerate(chatterer.generate_stream(prompt)):
|
321
|
-
print(f"Chunk {i}:", chunk)
|
322
|
-
|
323
|
-
# generate_pydantic
|
324
|
-
print("\n=== Synchronous generate_pydantic ===")
|
325
|
-
result_pydantic = chatterer(prompt, Propositions)
|
326
|
-
print("Result (generate_pydantic):", result_pydantic)
|
327
|
-
|
328
|
-
# generate_pydantic_stream
|
329
|
-
print("\n=== Synchronous generate_pydantic_stream ===")
|
330
|
-
for i, chunk in enumerate(chatterer.generate_pydantic_stream(Propositions, prompt)):
|
331
|
-
print(f"Pydantic Chunk {i}:", chunk)
|
332
|
-
|
333
|
-
# === Asynchronous Tests ===
|
334
|
-
|
335
|
-
# Async helper function to enumerate async iterator
|
336
|
-
async def async_enumerate(aiter: AsyncIterator[Any], start: int = 0) -> AsyncIterator[tuple[int, Any]]:
|
337
|
-
i = start
|
338
|
-
async for item in aiter:
|
339
|
-
yield i, item
|
340
|
-
i += 1
|
341
|
-
|
342
|
-
async def run_async_tests():
|
343
|
-
# 6. agenerate
|
344
|
-
print("\n=== Asynchronous agenerate ===")
|
345
|
-
result_async = await chatterer.agenerate(prompt)
|
346
|
-
print("Result (agenerate):", result_async)
|
347
|
-
|
348
|
-
# 7. agenerate_stream
|
349
|
-
print("\n=== Asynchronous agenerate_stream ===")
|
350
|
-
async for i, chunk in async_enumerate(chatterer.agenerate_stream(prompt)):
|
351
|
-
print(f"Async Chunk {i}:", chunk)
|
352
|
-
|
353
|
-
# 8. agenerate_pydantic
|
354
|
-
print("\n=== Asynchronous agenerate_pydantic ===")
|
355
|
-
try:
|
356
|
-
result_async_pydantic = await chatterer.agenerate_pydantic(Propositions, prompt)
|
357
|
-
print("Result (agenerate_pydantic):", result_async_pydantic)
|
358
|
-
except Exception as e:
|
359
|
-
print("Error in agenerate_pydantic:", e)
|
360
|
-
|
361
|
-
# 9. agenerate_pydantic_stream
|
362
|
-
print("\n=== Asynchronous agenerate_pydantic_stream ===")
|
363
|
-
try:
|
364
|
-
i = 0
|
365
|
-
async for chunk in chatterer.agenerate_pydantic_stream(Propositions, prompt):
|
366
|
-
print(f"Async Pydantic Chunk {i}:", chunk)
|
367
|
-
i += 1
|
368
|
-
except Exception as e:
|
369
|
-
print("Error in agenerate_pydantic_stream:", e)
|
370
|
-
|
371
|
-
asyncio.run(run_async_tests())
|
1
|
+
from typing import (
|
2
|
+
TYPE_CHECKING,
|
3
|
+
Any,
|
4
|
+
AsyncIterator,
|
5
|
+
Iterator,
|
6
|
+
Optional,
|
7
|
+
Self,
|
8
|
+
Type,
|
9
|
+
TypeAlias,
|
10
|
+
TypeVar,
|
11
|
+
cast,
|
12
|
+
overload,
|
13
|
+
)
|
14
|
+
|
15
|
+
from langchain_core.language_models.base import LanguageModelInput
|
16
|
+
from langchain_core.language_models.chat_models import BaseChatModel
|
17
|
+
from langchain_core.runnables.base import Runnable
|
18
|
+
from langchain_core.runnables.config import RunnableConfig
|
19
|
+
from pydantic import BaseModel, Field
|
20
|
+
|
21
|
+
from .messages import AIMessage, BaseMessage, HumanMessage
|
22
|
+
|
23
|
+
if TYPE_CHECKING:
|
24
|
+
from instructor import Partial
|
25
|
+
|
26
|
+
PydanticModelT = TypeVar("PydanticModelT", bound=BaseModel)
|
27
|
+
StructuredOutputType: TypeAlias = dict[object, object] | BaseModel
|
28
|
+
|
29
|
+
DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION = "Just describe all the details you see in the image in few sentences."
|
30
|
+
|
31
|
+
|
32
|
+
class Chatterer(BaseModel):
|
33
|
+
"""Language model for generating text from a given input."""
|
34
|
+
|
35
|
+
client: BaseChatModel
|
36
|
+
structured_output_kwargs: dict[str, Any] = Field(default_factory=dict)
|
37
|
+
|
38
|
+
@overload
|
39
|
+
def __call__(
|
40
|
+
self,
|
41
|
+
messages: LanguageModelInput,
|
42
|
+
response_model: Type[PydanticModelT],
|
43
|
+
config: Optional[RunnableConfig] = None,
|
44
|
+
stop: Optional[list[str]] = None,
|
45
|
+
**kwargs: Any,
|
46
|
+
) -> PydanticModelT: ...
|
47
|
+
|
48
|
+
@overload
|
49
|
+
def __call__(
|
50
|
+
self,
|
51
|
+
messages: LanguageModelInput,
|
52
|
+
response_model: None = None,
|
53
|
+
config: Optional[RunnableConfig] = None,
|
54
|
+
stop: Optional[list[str]] = None,
|
55
|
+
**kwargs: Any,
|
56
|
+
) -> str: ...
|
57
|
+
|
58
|
+
def __call__(
|
59
|
+
self,
|
60
|
+
messages: LanguageModelInput,
|
61
|
+
response_model: Optional[Type[PydanticModelT]] = None,
|
62
|
+
config: Optional[RunnableConfig] = None,
|
63
|
+
stop: Optional[list[str]] = None,
|
64
|
+
**kwargs: Any,
|
65
|
+
) -> str | PydanticModelT:
|
66
|
+
if response_model:
|
67
|
+
return self.generate_pydantic(response_model, messages, config, stop, **kwargs)
|
68
|
+
return self.client.invoke(input=messages, config=config, stop=stop, **kwargs).text()
|
69
|
+
|
70
|
+
@classmethod
|
71
|
+
def openai(
|
72
|
+
cls,
|
73
|
+
model: str = "gpt-4o-mini",
|
74
|
+
structured_output_kwargs: Optional[dict[str, Any]] = {"strict": True},
|
75
|
+
) -> Self:
|
76
|
+
from langchain_openai import ChatOpenAI
|
77
|
+
|
78
|
+
return cls(client=ChatOpenAI(model=model), structured_output_kwargs=structured_output_kwargs or {})
|
79
|
+
|
80
|
+
@classmethod
|
81
|
+
def anthropic(
|
82
|
+
cls,
|
83
|
+
model_name: str = "claude-3-7-sonnet-20250219",
|
84
|
+
structured_output_kwargs: Optional[dict[str, Any]] = None,
|
85
|
+
) -> Self:
|
86
|
+
from langchain_anthropic import ChatAnthropic
|
87
|
+
|
88
|
+
return cls(
|
89
|
+
client=ChatAnthropic(model_name=model_name, timeout=None, stop=None),
|
90
|
+
structured_output_kwargs=structured_output_kwargs or {},
|
91
|
+
)
|
92
|
+
|
93
|
+
@classmethod
|
94
|
+
def google(
|
95
|
+
cls,
|
96
|
+
model: str = "gemini-2.0-flash",
|
97
|
+
structured_output_kwargs: Optional[dict[str, Any]] = None,
|
98
|
+
) -> Self:
|
99
|
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
100
|
+
|
101
|
+
return cls(
|
102
|
+
client=ChatGoogleGenerativeAI(model=model),
|
103
|
+
structured_output_kwargs=structured_output_kwargs or {},
|
104
|
+
)
|
105
|
+
|
106
|
+
@classmethod
|
107
|
+
def ollama(
|
108
|
+
cls,
|
109
|
+
model: str = "deepseek-r1:1.5b",
|
110
|
+
structured_output_kwargs: Optional[dict[str, Any]] = None,
|
111
|
+
) -> Self:
|
112
|
+
from langchain_ollama import ChatOllama
|
113
|
+
|
114
|
+
return cls(
|
115
|
+
client=ChatOllama(model=model),
|
116
|
+
structured_output_kwargs=structured_output_kwargs or {},
|
117
|
+
)
|
118
|
+
|
119
|
+
def generate(
|
120
|
+
self,
|
121
|
+
messages: LanguageModelInput,
|
122
|
+
config: Optional[RunnableConfig] = None,
|
123
|
+
stop: Optional[list[str]] = None,
|
124
|
+
**kwargs: Any,
|
125
|
+
) -> str:
|
126
|
+
return self.client.invoke(input=messages, config=config, stop=stop, **kwargs).text()
|
127
|
+
|
128
|
+
async def agenerate(
|
129
|
+
self,
|
130
|
+
messages: LanguageModelInput,
|
131
|
+
config: Optional[RunnableConfig] = None,
|
132
|
+
stop: Optional[list[str]] = None,
|
133
|
+
**kwargs: Any,
|
134
|
+
) -> str:
|
135
|
+
return (await self.client.ainvoke(input=messages, config=config, stop=stop, **kwargs)).text()
|
136
|
+
|
137
|
+
def generate_stream(
|
138
|
+
self,
|
139
|
+
messages: LanguageModelInput,
|
140
|
+
config: Optional[RunnableConfig] = None,
|
141
|
+
stop: Optional[list[str]] = None,
|
142
|
+
**kwargs: Any,
|
143
|
+
) -> Iterator[str]:
|
144
|
+
for chunk in self.client.stream(input=messages, config=config, stop=stop, **kwargs):
|
145
|
+
yield chunk.text()
|
146
|
+
|
147
|
+
async def agenerate_stream(
|
148
|
+
self,
|
149
|
+
messages: LanguageModelInput,
|
150
|
+
config: Optional[RunnableConfig] = None,
|
151
|
+
stop: Optional[list[str]] = None,
|
152
|
+
**kwargs: Any,
|
153
|
+
) -> AsyncIterator[str]:
|
154
|
+
async for chunk in self.client.astream(input=messages, config=config, stop=stop, **kwargs):
|
155
|
+
yield chunk.text()
|
156
|
+
|
157
|
+
def generate_pydantic(
|
158
|
+
self,
|
159
|
+
response_model: Type[PydanticModelT],
|
160
|
+
messages: LanguageModelInput,
|
161
|
+
config: Optional[RunnableConfig] = None,
|
162
|
+
stop: Optional[list[str]] = None,
|
163
|
+
**kwargs: Any,
|
164
|
+
) -> PydanticModelT:
|
165
|
+
result: StructuredOutputType = with_structured_output(
|
166
|
+
client=self.client,
|
167
|
+
response_model=response_model,
|
168
|
+
structured_output_kwargs=self.structured_output_kwargs,
|
169
|
+
).invoke(input=messages, config=config, stop=stop, **kwargs)
|
170
|
+
if isinstance(result, response_model):
|
171
|
+
return result
|
172
|
+
else:
|
173
|
+
return response_model.model_validate(result)
|
174
|
+
|
175
|
+
async def agenerate_pydantic(
|
176
|
+
self,
|
177
|
+
response_model: Type[PydanticModelT],
|
178
|
+
messages: LanguageModelInput,
|
179
|
+
config: Optional[RunnableConfig] = None,
|
180
|
+
stop: Optional[list[str]] = None,
|
181
|
+
**kwargs: Any,
|
182
|
+
) -> PydanticModelT:
|
183
|
+
result: StructuredOutputType = await with_structured_output(
|
184
|
+
client=self.client,
|
185
|
+
response_model=response_model,
|
186
|
+
structured_output_kwargs=self.structured_output_kwargs,
|
187
|
+
).ainvoke(input=messages, config=config, stop=stop, **kwargs)
|
188
|
+
if isinstance(result, response_model):
|
189
|
+
return result
|
190
|
+
else:
|
191
|
+
return response_model.model_validate(result)
|
192
|
+
|
193
|
+
def generate_pydantic_stream(
|
194
|
+
self,
|
195
|
+
response_model: Type[PydanticModelT],
|
196
|
+
messages: LanguageModelInput,
|
197
|
+
config: Optional[RunnableConfig] = None,
|
198
|
+
stop: Optional[list[str]] = None,
|
199
|
+
**kwargs: Any,
|
200
|
+
) -> Iterator[PydanticModelT]:
|
201
|
+
try:
|
202
|
+
import instructor
|
203
|
+
except ImportError:
|
204
|
+
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
205
|
+
|
206
|
+
partial_response_model = instructor.Partial[response_model]
|
207
|
+
for chunk in with_structured_output(
|
208
|
+
client=self.client,
|
209
|
+
response_model=partial_response_model,
|
210
|
+
structured_output_kwargs=self.structured_output_kwargs,
|
211
|
+
).stream(input=messages, config=config, stop=stop, **kwargs):
|
212
|
+
yield response_model.model_validate(chunk)
|
213
|
+
|
214
|
+
async def agenerate_pydantic_stream(
|
215
|
+
self,
|
216
|
+
response_model: Type[PydanticModelT],
|
217
|
+
messages: LanguageModelInput,
|
218
|
+
config: Optional[RunnableConfig] = None,
|
219
|
+
stop: Optional[list[str]] = None,
|
220
|
+
**kwargs: Any,
|
221
|
+
) -> AsyncIterator[PydanticModelT]:
|
222
|
+
try:
|
223
|
+
import instructor
|
224
|
+
except ImportError:
|
225
|
+
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
226
|
+
|
227
|
+
partial_response_model = instructor.Partial[response_model]
|
228
|
+
async for chunk in with_structured_output(
|
229
|
+
client=self.client,
|
230
|
+
response_model=partial_response_model,
|
231
|
+
structured_output_kwargs=self.structured_output_kwargs,
|
232
|
+
).astream(input=messages, config=config, stop=stop, **kwargs):
|
233
|
+
yield response_model.model_validate(chunk)
|
234
|
+
|
235
|
+
def describe_image(self, image_url: str, instruction: str = DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION) -> str:
|
236
|
+
"""
|
237
|
+
Create a detailed description of an image using the Vision Language Model.
|
238
|
+
- image_url: Image URL to describe
|
239
|
+
"""
|
240
|
+
return self.generate([
|
241
|
+
HumanMessage(
|
242
|
+
content=[{"type": "text", "text": instruction}, {"type": "image_url", "image_url": {"url": image_url}}],
|
243
|
+
)
|
244
|
+
])
|
245
|
+
|
246
|
+
async def adescribe_image(self, image_url: str, instruction: str = DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION) -> str:
|
247
|
+
"""
|
248
|
+
Create a detailed description of an image using the Vision Language Model asynchronously.
|
249
|
+
- image_url: Image URL to describe
|
250
|
+
"""
|
251
|
+
return await self.agenerate([
|
252
|
+
HumanMessage(
|
253
|
+
content=[{"type": "text", "text": instruction}, {"type": "image_url", "image_url": {"url": image_url}}],
|
254
|
+
)
|
255
|
+
])
|
256
|
+
|
257
|
+
@staticmethod
|
258
|
+
def get_num_tokens_from_message(message: BaseMessage) -> Optional[tuple[int, int]]:
|
259
|
+
try:
|
260
|
+
if isinstance(message, AIMessage) and (usage_metadata := message.usage_metadata):
|
261
|
+
input_tokens = int(usage_metadata["input_tokens"])
|
262
|
+
output_tokens = int(usage_metadata["output_tokens"])
|
263
|
+
else:
|
264
|
+
# Dynamic extraction for unknown structures
|
265
|
+
input_tokens: Optional[int] = None
|
266
|
+
output_tokens: Optional[int] = None
|
267
|
+
|
268
|
+
def _find_tokens(obj: object) -> None:
|
269
|
+
nonlocal input_tokens, output_tokens
|
270
|
+
if isinstance(obj, dict):
|
271
|
+
for key, value in cast(dict[object, object], obj).items():
|
272
|
+
if isinstance(value, int):
|
273
|
+
if "input" in str(key) or "prompt" in str(key):
|
274
|
+
input_tokens = value
|
275
|
+
elif "output" in str(key) or "completion" in str(key):
|
276
|
+
output_tokens = value
|
277
|
+
else:
|
278
|
+
_find_tokens(value)
|
279
|
+
elif isinstance(obj, list):
|
280
|
+
for item in cast(list[object], obj):
|
281
|
+
_find_tokens(item)
|
282
|
+
|
283
|
+
_find_tokens(message.model_dump())
|
284
|
+
|
285
|
+
if input_tokens is None or output_tokens is None:
|
286
|
+
return None
|
287
|
+
return input_tokens, output_tokens
|
288
|
+
except Exception:
|
289
|
+
return None
|
290
|
+
|
291
|
+
|
292
|
+
def with_structured_output(
|
293
|
+
client: BaseChatModel,
|
294
|
+
response_model: Type["PydanticModelT | Partial[PydanticModelT]"],
|
295
|
+
structured_output_kwargs: dict[str, Any],
|
296
|
+
) -> Runnable[LanguageModelInput, dict[object, object] | BaseModel]:
|
297
|
+
return client.with_structured_output(schema=response_model, **structured_output_kwargs) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
|
298
|
+
|
299
|
+
|
300
|
+
if __name__ == "__main__":
|
301
|
+
import asyncio
|
302
|
+
|
303
|
+
# 테스트용 Pydantic 모델 정의
|
304
|
+
class Propositions(BaseModel):
|
305
|
+
proposition_topic: str
|
306
|
+
proposition_content: str
|
307
|
+
|
308
|
+
chatterer = Chatterer.openai()
|
309
|
+
prompt = "What is the meaning of life?"
|
310
|
+
|
311
|
+
# === Synchronous Tests ===
|
312
|
+
|
313
|
+
# generate
|
314
|
+
print("=== Synchronous generate ===")
|
315
|
+
result_sync = chatterer(prompt)
|
316
|
+
print("Result (generate):", result_sync)
|
317
|
+
|
318
|
+
# generate_stream
|
319
|
+
print("\n=== Synchronous generate_stream ===")
|
320
|
+
for i, chunk in enumerate(chatterer.generate_stream(prompt)):
|
321
|
+
print(f"Chunk {i}:", chunk)
|
322
|
+
|
323
|
+
# generate_pydantic
|
324
|
+
print("\n=== Synchronous generate_pydantic ===")
|
325
|
+
result_pydantic = chatterer(prompt, Propositions)
|
326
|
+
print("Result (generate_pydantic):", result_pydantic)
|
327
|
+
|
328
|
+
# generate_pydantic_stream
|
329
|
+
print("\n=== Synchronous generate_pydantic_stream ===")
|
330
|
+
for i, chunk in enumerate(chatterer.generate_pydantic_stream(Propositions, prompt)):
|
331
|
+
print(f"Pydantic Chunk {i}:", chunk)
|
332
|
+
|
333
|
+
# === Asynchronous Tests ===
|
334
|
+
|
335
|
+
# Async helper function to enumerate async iterator
|
336
|
+
async def async_enumerate(aiter: AsyncIterator[Any], start: int = 0) -> AsyncIterator[tuple[int, Any]]:
|
337
|
+
i = start
|
338
|
+
async for item in aiter:
|
339
|
+
yield i, item
|
340
|
+
i += 1
|
341
|
+
|
342
|
+
async def run_async_tests():
|
343
|
+
# 6. agenerate
|
344
|
+
print("\n=== Asynchronous agenerate ===")
|
345
|
+
result_async = await chatterer.agenerate(prompt)
|
346
|
+
print("Result (agenerate):", result_async)
|
347
|
+
|
348
|
+
# 7. agenerate_stream
|
349
|
+
print("\n=== Asynchronous agenerate_stream ===")
|
350
|
+
async for i, chunk in async_enumerate(chatterer.agenerate_stream(prompt)):
|
351
|
+
print(f"Async Chunk {i}:", chunk)
|
352
|
+
|
353
|
+
# 8. agenerate_pydantic
|
354
|
+
print("\n=== Asynchronous agenerate_pydantic ===")
|
355
|
+
try:
|
356
|
+
result_async_pydantic = await chatterer.agenerate_pydantic(Propositions, prompt)
|
357
|
+
print("Result (agenerate_pydantic):", result_async_pydantic)
|
358
|
+
except Exception as e:
|
359
|
+
print("Error in agenerate_pydantic:", e)
|
360
|
+
|
361
|
+
# 9. agenerate_pydantic_stream
|
362
|
+
print("\n=== Asynchronous agenerate_pydantic_stream ===")
|
363
|
+
try:
|
364
|
+
i = 0
|
365
|
+
async for chunk in chatterer.agenerate_pydantic_stream(Propositions, prompt):
|
366
|
+
print(f"Async Pydantic Chunk {i}:", chunk)
|
367
|
+
i += 1
|
368
|
+
except Exception as e:
|
369
|
+
print("Error in agenerate_pydantic_stream:", e)
|
370
|
+
|
371
|
+
asyncio.run(run_async_tests())
|