langchain-ollama 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_ollama/__init__.py +1 -1
- langchain_ollama/_utils.py +39 -0
- langchain_ollama/chat_models.py +218 -120
- langchain_ollama/embeddings.py +55 -12
- langchain_ollama/llms.py +119 -32
- langchain_ollama-0.3.4.dist-info/METADATA +68 -0
- langchain_ollama-0.3.4.dist-info/RECORD +11 -0
- {langchain_ollama-0.3.2.dist-info → langchain_ollama-0.3.4.dist-info}/WHEEL +1 -1
- langchain_ollama-0.3.2.dist-info/METADATA +0 -57
- langchain_ollama-0.3.2.dist-info/RECORD +0 -10
- {langchain_ollama-0.3.2.dist-info → langchain_ollama-0.3.4.dist-info}/entry_points.txt +0 -0
- {langchain_ollama-0.3.2.dist-info → langchain_ollama-0.3.4.dist-info}/licenses/LICENSE +0 -0
langchain_ollama/__init__.py
CHANGED
@@ -0,0 +1,39 @@
|
|
1
|
+
"""Utility functions for validating Ollama models."""
|
2
|
+
|
3
|
+
from httpx import ConnectError
|
4
|
+
from ollama import Client, ResponseError
|
5
|
+
|
6
|
+
|
7
|
+
def validate_model(client: Client, model_name: str) -> None:
|
8
|
+
"""Validate that a model exists in the Ollama instance.
|
9
|
+
|
10
|
+
Args:
|
11
|
+
client: The Ollama client.
|
12
|
+
model_name: The name of the model to validate.
|
13
|
+
|
14
|
+
Raises:
|
15
|
+
ValueError: If the model is not found or if there's a connection issue.
|
16
|
+
"""
|
17
|
+
try:
|
18
|
+
response = client.list()
|
19
|
+
|
20
|
+
model_names: list[str] = [model["model"] for model in response["models"]]
|
21
|
+
|
22
|
+
if not any(
|
23
|
+
model_name == m or m.startswith(f"{model_name}:") for m in model_names
|
24
|
+
):
|
25
|
+
msg = (
|
26
|
+
f"Model `{model_name}` not found in Ollama. Please pull the "
|
27
|
+
f"model (using `ollama pull {model_name}`) or specify a valid "
|
28
|
+
f"model name. Available local models: {', '.join(model_names)}"
|
29
|
+
)
|
30
|
+
raise ValueError(msg)
|
31
|
+
except ConnectError as e:
|
32
|
+
msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download" # noqa: E501
|
33
|
+
raise ValueError(msg) from e
|
34
|
+
except ResponseError as e:
|
35
|
+
msg = (
|
36
|
+
"Received an error from the Ollama API. "
|
37
|
+
"Please check your Ollama server logs."
|
38
|
+
)
|
39
|
+
raise ValueError(msg) from e
|
langchain_ollama/chat_models.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
"""Ollama chat models."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import json
|
4
6
|
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
|
5
7
|
from operator import itemgetter
|
6
8
|
from typing import (
|
7
9
|
Any,
|
8
10
|
Callable,
|
9
|
-
Final,
|
10
11
|
Literal,
|
11
12
|
Optional,
|
12
13
|
Union,
|
@@ -25,7 +26,7 @@ from langchain_core.messages import (
|
|
25
26
|
AIMessage,
|
26
27
|
AIMessageChunk,
|
27
28
|
BaseMessage,
|
28
|
-
|
29
|
+
ChatMessage,
|
29
30
|
HumanMessage,
|
30
31
|
SystemMessage,
|
31
32
|
ToolCall,
|
@@ -54,8 +55,7 @@ from pydantic.json_schema import JsonSchemaValue
|
|
54
55
|
from pydantic.v1 import BaseModel as BaseModelV1
|
55
56
|
from typing_extensions import Self, is_typeddict
|
56
57
|
|
57
|
-
|
58
|
-
DEFAULT_THINK_TOKEN_END: Final[str] = "</think>"
|
58
|
+
from ._utils import validate_model
|
59
59
|
|
60
60
|
|
61
61
|
def _get_usage_metadata_from_generation_info(
|
@@ -76,7 +76,9 @@ def _get_usage_metadata_from_generation_info(
|
|
76
76
|
|
77
77
|
|
78
78
|
def _parse_json_string(
|
79
|
-
json_string: str,
|
79
|
+
json_string: str,
|
80
|
+
raw_tool_call: dict[str, Any],
|
81
|
+
skip: bool, # noqa: FBT001
|
80
82
|
) -> Any:
|
81
83
|
"""Attempt to parse a JSON string for tool calling.
|
82
84
|
|
@@ -150,26 +152,30 @@ def _get_tool_calls_from_response(
|
|
150
152
|
) -> list[ToolCall]:
|
151
153
|
"""Get tool calls from ollama response."""
|
152
154
|
tool_calls = []
|
153
|
-
if "message" in response
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
)
|
155
|
+
if "message" in response and (
|
156
|
+
raw_tool_calls := response["message"].get("tool_calls")
|
157
|
+
):
|
158
|
+
tool_calls.extend(
|
159
|
+
[
|
160
|
+
tool_call(
|
161
|
+
id=str(uuid4()),
|
162
|
+
name=tc["function"]["name"],
|
163
|
+
args=_parse_arguments_from_tool_call(tc) or {},
|
162
164
|
)
|
165
|
+
for tc in raw_tool_calls
|
166
|
+
]
|
167
|
+
)
|
163
168
|
return tool_calls
|
164
169
|
|
165
170
|
|
166
|
-
def _lc_tool_call_to_openai_tool_call(
|
171
|
+
def _lc_tool_call_to_openai_tool_call(tool_call_: ToolCall) -> dict:
|
172
|
+
"""Convert a LangChain tool call to an OpenAI tool call format."""
|
167
173
|
return {
|
168
174
|
"type": "function",
|
169
|
-
"id":
|
175
|
+
"id": tool_call_["id"],
|
170
176
|
"function": {
|
171
|
-
"name":
|
172
|
-
"arguments":
|
177
|
+
"name": tool_call_["name"],
|
178
|
+
"arguments": tool_call_["args"],
|
173
179
|
},
|
174
180
|
}
|
175
181
|
|
@@ -179,14 +185,12 @@ def _get_image_from_data_content_block(block: dict) -> str:
|
|
179
185
|
if block["type"] == "image":
|
180
186
|
if block["source_type"] == "base64":
|
181
187
|
return block["data"]
|
182
|
-
|
183
|
-
error_message = "Image data only supported through in-line base64 format."
|
184
|
-
raise ValueError(error_message)
|
185
|
-
|
186
|
-
else:
|
187
|
-
error_message = f"Blocks of type {block['type']} not supported."
|
188
|
+
error_message = "Image data only supported through in-line base64 format."
|
188
189
|
raise ValueError(error_message)
|
189
190
|
|
191
|
+
error_message = f"Blocks of type {block['type']} not supported."
|
192
|
+
raise ValueError(error_message)
|
193
|
+
|
190
194
|
|
191
195
|
def _is_pydantic_class(obj: Any) -> bool:
|
192
196
|
return isinstance(obj, type) and is_basemodel_subclass(obj)
|
@@ -208,8 +212,22 @@ class ChatOllama(BaseChatModel):
|
|
208
212
|
Key init args — completion params:
|
209
213
|
model: str
|
210
214
|
Name of Ollama model to use.
|
215
|
+
reasoning: Optional[bool]
|
216
|
+
Controls the reasoning/thinking mode for
|
217
|
+
`supported models <https://ollama.com/search?c=thinking>`__.
|
218
|
+
|
219
|
+
- ``True``: Enables reasoning mode. The model's reasoning process will be
|
220
|
+
captured and returned separately in the ``additional_kwargs`` of the
|
221
|
+
response message, under ``reasoning_content``. The main response
|
222
|
+
content will not include the reasoning tags.
|
223
|
+
- ``False``: Disables reasoning mode. The model will not perform any reasoning,
|
224
|
+
and the response will not include any reasoning content.
|
225
|
+
- ``None`` (Default): The model will use its default reasoning behavior. Note
|
226
|
+
however, if the model's default behavior *is* to perform reasoning, think tags
|
227
|
+
(``<think>`` and ``</think>``) will be present within the main response content
|
228
|
+
unless you set ``reasoning`` to ``True``.
|
211
229
|
temperature: float
|
212
|
-
Sampling temperature. Ranges from 0.0 to 1.0
|
230
|
+
Sampling temperature. Ranges from ``0.0`` to ``1.0``.
|
213
231
|
num_predict: Optional[int]
|
214
232
|
Max number of tokens to generate.
|
215
233
|
|
@@ -325,7 +343,6 @@ class ChatOllama(BaseChatModel):
|
|
325
343
|
'{"location": "Pune, India", "time_of_day": "morning"}'
|
326
344
|
|
327
345
|
Tool Calling:
|
328
|
-
|
329
346
|
.. code-block:: python
|
330
347
|
|
331
348
|
from langchain_ollama import ChatOllama
|
@@ -344,17 +361,70 @@ class ChatOllama(BaseChatModel):
|
|
344
361
|
'args': {'a': 45, 'b': 67},
|
345
362
|
'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
|
346
363
|
'type': 'tool_call'}]
|
347
|
-
|
364
|
+
|
365
|
+
Thinking / Reasoning:
|
366
|
+
You can enable reasoning mode for models that support it by setting
|
367
|
+
the ``reasoning`` parameter to ``True`` in either the constructor or
|
368
|
+
the ``invoke``/``stream`` methods. This will enable the model to think
|
369
|
+
through the problem and return the reasoning process separately in the
|
370
|
+
``additional_kwargs`` of the response message, under ``reasoning_content``.
|
371
|
+
|
372
|
+
If ``reasoning`` is set to ``None``, the model will use its default reasoning
|
373
|
+
behavior, and any reasoning content will *not* be captured under the
|
374
|
+
``reasoning_content`` key, but will be present within the main response content
|
375
|
+
as think tags (``<think>`` and ``</think>``).
|
376
|
+
|
377
|
+
.. note::
|
378
|
+
This feature is only available for `models that support reasoning <https://ollama.com/search?c=thinking>`__.
|
379
|
+
|
380
|
+
.. code-block:: python
|
381
|
+
|
382
|
+
from langchain_ollama import ChatOllama
|
383
|
+
|
384
|
+
llm = ChatOllama(
|
385
|
+
model = "deepseek-r1:8b",
|
386
|
+
reasoning= True,
|
387
|
+
)
|
388
|
+
|
389
|
+
user_message = HumanMessage(content="how many r in the word strawberry?")
|
390
|
+
messages: List[Any] = [user_message]
|
391
|
+
llm.invoke(messages)
|
392
|
+
|
393
|
+
# or, on an invocation basis:
|
394
|
+
|
395
|
+
llm.invoke(messages, reasoning=True)
|
396
|
+
# or llm.stream(messages, reasoning=True)
|
397
|
+
|
398
|
+
# If not provided, the invocation will default to the ChatOllama reasoning
|
399
|
+
# param provided (None by default).
|
400
|
+
|
401
|
+
.. code-block:: python
|
402
|
+
|
403
|
+
AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
|
404
|
+
|
405
|
+
|
406
|
+
""" # noqa: E501, pylint: disable=line-too-long
|
348
407
|
|
349
408
|
model: str
|
350
409
|
"""Model name to use."""
|
351
410
|
|
352
|
-
|
353
|
-
"""
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
411
|
+
reasoning: Optional[bool] = None
|
412
|
+
"""Controls the reasoning/thinking mode for
|
413
|
+
`supported models <https://ollama.com/search?c=thinking>`__.
|
414
|
+
|
415
|
+
- ``True``: Enables reasoning mode. The model's reasoning process will be
|
416
|
+
captured and returned separately in the ``additional_kwargs`` of the
|
417
|
+
response message, under ``reasoning_content``. The main response
|
418
|
+
content will not include the reasoning tags.
|
419
|
+
- ``False``: Disables reasoning mode. The model will not perform any reasoning,
|
420
|
+
and the response will not include any reasoning content.
|
421
|
+
- ``None`` (Default): The model will use its default reasoning behavior. Note
|
422
|
+
however, if the model's default behavior *is* to perform reasoning, think tags
|
423
|
+
()``<think>`` and ``</think>``) will be present within the main response content
|
424
|
+
unless you set ``reasoning`` to ``True``."""
|
425
|
+
|
426
|
+
validate_model_on_init: bool = False
|
427
|
+
"""Whether to validate the model exists in Ollama locally on initialization."""
|
358
428
|
|
359
429
|
mirostat: Optional[int] = None
|
360
430
|
"""Enable Mirostat sampling for controlling perplexity.
|
@@ -435,16 +505,30 @@ class ChatOllama(BaseChatModel):
|
|
435
505
|
"""Base url the model is hosted under."""
|
436
506
|
|
437
507
|
client_kwargs: Optional[dict] = {}
|
438
|
-
"""Additional kwargs to pass to the httpx
|
439
|
-
|
508
|
+
"""Additional kwargs to pass to the httpx clients.
|
509
|
+
These arguments are passed to both synchronous and async clients.
|
510
|
+
Use sync_client_kwargs and async_client_kwargs to pass different arguments
|
511
|
+
to synchronous and asynchronous clients.
|
512
|
+
"""
|
513
|
+
|
514
|
+
async_client_kwargs: Optional[dict] = {}
|
515
|
+
"""Additional kwargs to merge with client_kwargs before
|
516
|
+
passing to the httpx AsyncClient.
|
517
|
+
`Full list of params. <https://www.python-httpx.org/api/#asyncclient>`__
|
440
518
|
"""
|
441
519
|
|
442
|
-
|
520
|
+
sync_client_kwargs: Optional[dict] = {}
|
521
|
+
"""Additional kwargs to merge with client_kwargs before
|
522
|
+
passing to the httpx Client.
|
523
|
+
`Full list of params. <https://www.python-httpx.org/api/#client>`__
|
524
|
+
"""
|
525
|
+
|
526
|
+
_client: Client = PrivateAttr()
|
443
527
|
"""
|
444
528
|
The client to use for making requests.
|
445
529
|
"""
|
446
530
|
|
447
|
-
_async_client: AsyncClient = PrivateAttr(
|
531
|
+
_async_client: AsyncClient = PrivateAttr()
|
448
532
|
"""
|
449
533
|
The async client to use for making requests.
|
450
534
|
"""
|
@@ -458,8 +542,9 @@ class ChatOllama(BaseChatModel):
|
|
458
542
|
ollama_messages = self._convert_messages_to_ollama_messages(messages)
|
459
543
|
|
460
544
|
if self.stop is not None and stop is not None:
|
461
|
-
|
462
|
-
|
545
|
+
msg = "`stop` found in both the input and default params."
|
546
|
+
raise ValueError(msg)
|
547
|
+
if self.stop is not None:
|
463
548
|
stop = self.stop
|
464
549
|
|
465
550
|
options_dict = kwargs.pop(
|
@@ -487,6 +572,7 @@ class ChatOllama(BaseChatModel):
|
|
487
572
|
"messages": ollama_messages,
|
488
573
|
"stream": kwargs.pop("stream", True),
|
489
574
|
"model": kwargs.pop("model", self.model),
|
575
|
+
"think": kwargs.pop("reasoning", self.reasoning),
|
490
576
|
"format": kwargs.pop("format", self.format),
|
491
577
|
"options": Options(**options_dict),
|
492
578
|
"keep_alive": kwargs.pop("keep_alive", self.keep_alive),
|
@@ -502,8 +588,19 @@ class ChatOllama(BaseChatModel):
|
|
502
588
|
def _set_clients(self) -> Self:
|
503
589
|
"""Set clients to use for ollama."""
|
504
590
|
client_kwargs = self.client_kwargs or {}
|
505
|
-
|
506
|
-
|
591
|
+
|
592
|
+
sync_client_kwargs = client_kwargs
|
593
|
+
if self.sync_client_kwargs:
|
594
|
+
sync_client_kwargs = {**sync_client_kwargs, **self.sync_client_kwargs}
|
595
|
+
|
596
|
+
async_client_kwargs = client_kwargs
|
597
|
+
if self.async_client_kwargs:
|
598
|
+
async_client_kwargs = {**async_client_kwargs, **self.async_client_kwargs}
|
599
|
+
|
600
|
+
self._client = Client(host=self.base_url, **sync_client_kwargs)
|
601
|
+
self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
|
602
|
+
if self.validate_model_on_init:
|
603
|
+
validate_model(self._client, self.model)
|
507
604
|
return self
|
508
605
|
|
509
606
|
def _convert_messages_to_ollama_messages(
|
@@ -511,7 +608,7 @@ class ChatOllama(BaseChatModel):
|
|
511
608
|
) -> Sequence[Message]:
|
512
609
|
ollama_messages: list = []
|
513
610
|
for message in messages:
|
514
|
-
role:
|
611
|
+
role: str
|
515
612
|
tool_call_id: Optional[str] = None
|
516
613
|
tool_calls: Optional[list[dict[str, Any]]] = None
|
517
614
|
if isinstance(message, HumanMessage):
|
@@ -528,11 +625,14 @@ class ChatOllama(BaseChatModel):
|
|
528
625
|
)
|
529
626
|
elif isinstance(message, SystemMessage):
|
530
627
|
role = "system"
|
628
|
+
elif isinstance(message, ChatMessage):
|
629
|
+
role = message.role
|
531
630
|
elif isinstance(message, ToolMessage):
|
532
631
|
role = "tool"
|
533
632
|
tool_call_id = message.tool_call_id
|
534
633
|
else:
|
535
|
-
|
634
|
+
msg = "Received unsupported message type for Ollama."
|
635
|
+
raise ValueError(msg)
|
536
636
|
|
537
637
|
content = ""
|
538
638
|
images = []
|
@@ -556,10 +656,11 @@ class ChatOllama(BaseChatModel):
|
|
556
656
|
):
|
557
657
|
image_url = temp_image_url["url"]
|
558
658
|
else:
|
559
|
-
|
659
|
+
msg = (
|
560
660
|
"Only string image_url or dict with string 'url' "
|
561
661
|
"inside content parts are supported."
|
562
662
|
)
|
663
|
+
raise ValueError(msg)
|
563
664
|
|
564
665
|
image_url_components = image_url.split(",")
|
565
666
|
# Support data:image/jpeg;base64,<image> format
|
@@ -572,47 +673,27 @@ class ChatOllama(BaseChatModel):
|
|
572
673
|
image = _get_image_from_data_content_block(content_part)
|
573
674
|
images.append(image)
|
574
675
|
else:
|
575
|
-
|
676
|
+
msg = (
|
576
677
|
"Unsupported message content type. "
|
577
678
|
"Must either have type 'text' or type 'image_url' "
|
578
679
|
"with a string 'image_url' field."
|
579
680
|
)
|
580
|
-
|
581
|
-
|
681
|
+
raise ValueError(msg)
|
682
|
+
# Should convert to ollama.Message once role includes tool,
|
683
|
+
# and tool_call_id is in Message
|
684
|
+
msg_: dict = {
|
582
685
|
"role": role,
|
583
686
|
"content": content,
|
584
687
|
"images": images,
|
585
688
|
}
|
586
689
|
if tool_calls:
|
587
|
-
|
690
|
+
msg_["tool_calls"] = tool_calls
|
588
691
|
if tool_call_id:
|
589
|
-
|
590
|
-
ollama_messages.append(
|
692
|
+
msg_["tool_call_id"] = tool_call_id
|
693
|
+
ollama_messages.append(msg_)
|
591
694
|
|
592
695
|
return ollama_messages
|
593
696
|
|
594
|
-
def _extract_reasoning(
|
595
|
-
self, message_chunk: BaseMessageChunk, is_thinking: bool
|
596
|
-
) -> tuple[BaseMessageChunk, bool]:
|
597
|
-
"""Mutate a message chunk to extract reasoning content."""
|
598
|
-
if not self.extract_reasoning:
|
599
|
-
return message_chunk, is_thinking
|
600
|
-
elif self.extract_reasoning is True:
|
601
|
-
start_token = DEFAULT_THINK_TOKEN_START
|
602
|
-
end_token = DEFAULT_THINK_TOKEN_END
|
603
|
-
else:
|
604
|
-
start_token, end_token = cast(tuple, self.extract_reasoning)
|
605
|
-
if start_token in message_chunk.content:
|
606
|
-
is_thinking = True
|
607
|
-
content = message_chunk.content
|
608
|
-
if is_thinking:
|
609
|
-
message_chunk.additional_kwargs["reasoning_content"] = content
|
610
|
-
message_chunk.content = ""
|
611
|
-
if end_token in content:
|
612
|
-
is_thinking = False
|
613
|
-
|
614
|
-
return message_chunk, is_thinking
|
615
|
-
|
616
697
|
async def _acreate_chat_stream(
|
617
698
|
self,
|
618
699
|
messages: list[BaseMessage],
|
@@ -636,16 +717,18 @@ class ChatOllama(BaseChatModel):
|
|
636
717
|
chat_params = self._chat_params(messages, stop, **kwargs)
|
637
718
|
|
638
719
|
if chat_params["stream"]:
|
639
|
-
|
720
|
+
if self._client:
|
721
|
+
yield from self._client.chat(**chat_params)
|
640
722
|
else:
|
641
|
-
|
723
|
+
if self._client:
|
724
|
+
yield self._client.chat(**chat_params)
|
642
725
|
|
643
726
|
def _chat_stream_with_aggregation(
|
644
727
|
self,
|
645
728
|
messages: list[BaseMessage],
|
646
729
|
stop: Optional[list[str]] = None,
|
647
730
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
648
|
-
verbose: bool = False,
|
731
|
+
verbose: bool = False, # noqa: FBT001, FBT002
|
649
732
|
**kwargs: Any,
|
650
733
|
) -> ChatGenerationChunk:
|
651
734
|
final_chunk = None
|
@@ -661,7 +744,8 @@ class ChatOllama(BaseChatModel):
|
|
661
744
|
verbose=verbose,
|
662
745
|
)
|
663
746
|
if final_chunk is None:
|
664
|
-
|
747
|
+
msg = "No data received from Ollama stream."
|
748
|
+
raise ValueError(msg)
|
665
749
|
|
666
750
|
return final_chunk
|
667
751
|
|
@@ -670,7 +754,7 @@ class ChatOllama(BaseChatModel):
|
|
670
754
|
messages: list[BaseMessage],
|
671
755
|
stop: Optional[list[str]] = None,
|
672
756
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
673
|
-
verbose: bool = False,
|
757
|
+
verbose: bool = False, # noqa: FBT001, FBT002
|
674
758
|
**kwargs: Any,
|
675
759
|
) -> ChatGenerationChunk:
|
676
760
|
final_chunk = None
|
@@ -686,7 +770,8 @@ class ChatOllama(BaseChatModel):
|
|
686
770
|
verbose=verbose,
|
687
771
|
)
|
688
772
|
if final_chunk is None:
|
689
|
-
|
773
|
+
msg = "No data received from Ollama stream."
|
774
|
+
raise ValueError(msg)
|
690
775
|
|
691
776
|
return final_chunk
|
692
777
|
|
@@ -733,22 +818,35 @@ class ChatOllama(BaseChatModel):
|
|
733
818
|
stop: Optional[list[str]] = None,
|
734
819
|
**kwargs: Any,
|
735
820
|
) -> Iterator[ChatGenerationChunk]:
|
736
|
-
|
821
|
+
reasoning = kwargs.get("reasoning", self.reasoning)
|
737
822
|
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
|
738
823
|
if not isinstance(stream_resp, str):
|
739
824
|
if stream_resp.get("done") is True:
|
740
825
|
generation_info = dict(stream_resp)
|
826
|
+
if "model" in generation_info:
|
827
|
+
generation_info["model_name"] = generation_info["model"]
|
741
828
|
_ = generation_info.pop("message", None)
|
742
829
|
else:
|
743
830
|
generation_info = None
|
831
|
+
|
832
|
+
content = (
|
833
|
+
stream_resp["message"]["content"]
|
834
|
+
if "message" in stream_resp and "content" in stream_resp["message"]
|
835
|
+
else ""
|
836
|
+
)
|
837
|
+
|
838
|
+
additional_kwargs = {}
|
839
|
+
if (
|
840
|
+
reasoning
|
841
|
+
and "message" in stream_resp
|
842
|
+
and (thinking_content := stream_resp["message"].get("thinking"))
|
843
|
+
):
|
844
|
+
additional_kwargs["reasoning_content"] = thinking_content
|
845
|
+
|
744
846
|
chunk = ChatGenerationChunk(
|
745
847
|
message=AIMessageChunk(
|
746
|
-
content=
|
747
|
-
|
748
|
-
if "message" in stream_resp
|
749
|
-
and "content" in stream_resp["message"]
|
750
|
-
else ""
|
751
|
-
),
|
848
|
+
content=content,
|
849
|
+
additional_kwargs=additional_kwargs,
|
752
850
|
usage_metadata=_get_usage_metadata_from_generation_info(
|
753
851
|
stream_resp
|
754
852
|
),
|
@@ -756,15 +854,7 @@ class ChatOllama(BaseChatModel):
|
|
756
854
|
),
|
757
855
|
generation_info=generation_info,
|
758
856
|
)
|
759
|
-
|
760
|
-
model := chunk.generation_info.get("model")
|
761
|
-
):
|
762
|
-
chunk.generation_info["model_name"] = model # backwards compat
|
763
|
-
if self.extract_reasoning:
|
764
|
-
message, is_thinking = self._extract_reasoning(
|
765
|
-
chunk.message, is_thinking
|
766
|
-
)
|
767
|
-
chunk.message = message
|
857
|
+
|
768
858
|
yield chunk
|
769
859
|
|
770
860
|
def _stream(
|
@@ -788,22 +878,35 @@ class ChatOllama(BaseChatModel):
|
|
788
878
|
stop: Optional[list[str]] = None,
|
789
879
|
**kwargs: Any,
|
790
880
|
) -> AsyncIterator[ChatGenerationChunk]:
|
791
|
-
|
881
|
+
reasoning = kwargs.get("reasoning", self.reasoning)
|
792
882
|
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
|
793
883
|
if not isinstance(stream_resp, str):
|
794
884
|
if stream_resp.get("done") is True:
|
795
885
|
generation_info = dict(stream_resp)
|
886
|
+
if "model" in generation_info:
|
887
|
+
generation_info["model_name"] = generation_info["model"]
|
796
888
|
_ = generation_info.pop("message", None)
|
797
889
|
else:
|
798
890
|
generation_info = None
|
891
|
+
|
892
|
+
content = (
|
893
|
+
stream_resp["message"]["content"]
|
894
|
+
if "message" in stream_resp and "content" in stream_resp["message"]
|
895
|
+
else ""
|
896
|
+
)
|
897
|
+
|
898
|
+
additional_kwargs = {}
|
899
|
+
if (
|
900
|
+
reasoning
|
901
|
+
and "message" in stream_resp
|
902
|
+
and (thinking_content := stream_resp["message"].get("thinking"))
|
903
|
+
):
|
904
|
+
additional_kwargs["reasoning_content"] = thinking_content
|
905
|
+
|
799
906
|
chunk = ChatGenerationChunk(
|
800
907
|
message=AIMessageChunk(
|
801
|
-
content=
|
802
|
-
|
803
|
-
if "message" in stream_resp
|
804
|
-
and "content" in stream_resp["message"]
|
805
|
-
else ""
|
806
|
-
),
|
908
|
+
content=content,
|
909
|
+
additional_kwargs=additional_kwargs,
|
807
910
|
usage_metadata=_get_usage_metadata_from_generation_info(
|
808
911
|
stream_resp
|
809
912
|
),
|
@@ -811,15 +914,7 @@ class ChatOllama(BaseChatModel):
|
|
811
914
|
),
|
812
915
|
generation_info=generation_info,
|
813
916
|
)
|
814
|
-
|
815
|
-
model := chunk.generation_info.get("model")
|
816
|
-
):
|
817
|
-
chunk.generation_info["model_name"] = model # backwards compat
|
818
|
-
if self.extract_reasoning:
|
819
|
-
message, is_thinking = self._extract_reasoning(
|
820
|
-
chunk.message, is_thinking
|
821
|
-
)
|
822
|
-
chunk.message = message
|
917
|
+
|
823
918
|
yield chunk
|
824
919
|
|
825
920
|
async def _astream(
|
@@ -868,7 +963,7 @@ class ChatOllama(BaseChatModel):
|
|
868
963
|
self,
|
869
964
|
tools: Sequence[Union[dict[str, Any], type, Callable, BaseTool]],
|
870
965
|
*,
|
871
|
-
tool_choice: Optional[Union[dict, str, Literal["auto", "any"], bool]] = None,
|
966
|
+
tool_choice: Optional[Union[dict, str, Literal["auto", "any"], bool]] = None, # noqa: PYI051
|
872
967
|
**kwargs: Any,
|
873
968
|
) -> Runnable[LanguageModelInput, BaseMessage]:
|
874
969
|
"""Bind tool-like objects to this chat model.
|
@@ -883,7 +978,7 @@ class ChatOllama(BaseChatModel):
|
|
883
978
|
is currently ignored as it is not supported by Ollama.**
|
884
979
|
kwargs: Any additional parameters are passed directly to
|
885
980
|
``self.bind(**kwargs)``.
|
886
|
-
"""
|
981
|
+
"""
|
887
982
|
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
|
888
983
|
return super().bind(tools=formatted_tools, **kwargs)
|
889
984
|
|
@@ -916,7 +1011,7 @@ class ChatOllama(BaseChatModel):
|
|
916
1011
|
method: The method for steering model generation, one of:
|
917
1012
|
|
918
1013
|
- "json_schema":
|
919
|
-
Uses Ollama's structured output API
|
1014
|
+
Uses Ollama's `structured output API <https://ollama.com/blog/structured-outputs>`__
|
920
1015
|
- "function_calling":
|
921
1016
|
Uses Ollama's tool-calling API
|
922
1017
|
- "json_mode":
|
@@ -1140,14 +1235,16 @@ class ChatOllama(BaseChatModel):
|
|
1140
1235
|
""" # noqa: E501, D301
|
1141
1236
|
_ = kwargs.pop("strict", None)
|
1142
1237
|
if kwargs:
|
1143
|
-
|
1238
|
+
msg = f"Received unsupported arguments {kwargs}"
|
1239
|
+
raise ValueError(msg)
|
1144
1240
|
is_pydantic_schema = _is_pydantic_class(schema)
|
1145
1241
|
if method == "function_calling":
|
1146
1242
|
if schema is None:
|
1147
|
-
|
1243
|
+
msg = (
|
1148
1244
|
"schema must be specified when method is not 'json_mode'. "
|
1149
1245
|
"Received None."
|
1150
1246
|
)
|
1247
|
+
raise ValueError(msg)
|
1151
1248
|
formatted_tool = convert_to_openai_tool(schema)
|
1152
1249
|
tool_name = formatted_tool["function"]["name"]
|
1153
1250
|
llm = self.bind_tools(
|
@@ -1182,10 +1279,11 @@ class ChatOllama(BaseChatModel):
|
|
1182
1279
|
)
|
1183
1280
|
elif method == "json_schema":
|
1184
1281
|
if schema is None:
|
1185
|
-
|
1282
|
+
msg = (
|
1186
1283
|
"schema must be specified when method is not 'json_mode'. "
|
1187
1284
|
"Received None."
|
1188
1285
|
)
|
1286
|
+
raise ValueError(msg)
|
1189
1287
|
if is_pydantic_schema:
|
1190
1288
|
schema = cast(TypeBaseModel, schema)
|
1191
1289
|
if issubclass(schema, BaseModelV1):
|
@@ -1199,7 +1297,7 @@ class ChatOllama(BaseChatModel):
|
|
1199
1297
|
"schema": schema,
|
1200
1298
|
},
|
1201
1299
|
)
|
1202
|
-
output_parser = PydanticOutputParser(pydantic_object=schema)
|
1300
|
+
output_parser = PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type]
|
1203
1301
|
else:
|
1204
1302
|
if is_typeddict(schema):
|
1205
1303
|
response_format = convert_to_json_schema(schema)
|
@@ -1219,10 +1317,11 @@ class ChatOllama(BaseChatModel):
|
|
1219
1317
|
)
|
1220
1318
|
output_parser = JsonOutputParser()
|
1221
1319
|
else:
|
1222
|
-
|
1320
|
+
msg = (
|
1223
1321
|
f"Unrecognized method argument. Expected one of 'function_calling', "
|
1224
1322
|
f"'json_schema', or 'json_mode'. Received: '{method}'"
|
1225
1323
|
)
|
1324
|
+
raise ValueError(msg)
|
1226
1325
|
|
1227
1326
|
if include_raw:
|
1228
1327
|
parser_assign = RunnablePassthrough.assign(
|
@@ -1233,5 +1332,4 @@ class ChatOllama(BaseChatModel):
|
|
1233
1332
|
[parser_none], exception_key="parsing_error"
|
1234
1333
|
)
|
1235
1334
|
return RunnableMap(raw=llm) | parser_with_fallback
|
1236
|
-
|
1237
|
-
return llm | output_parser
|
1335
|
+
return llm | output_parser
|
langchain_ollama/embeddings.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
"""Ollama embeddings models."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from typing import Any, Optional
|
4
6
|
|
5
7
|
from langchain_core.embeddings import Embeddings
|
@@ -12,6 +14,8 @@ from pydantic import (
|
|
12
14
|
)
|
13
15
|
from typing_extensions import Self
|
14
16
|
|
17
|
+
from ._utils import validate_model
|
18
|
+
|
15
19
|
|
16
20
|
class OllamaEmbeddings(BaseModel, Embeddings):
|
17
21
|
"""Ollama embedding model integration.
|
@@ -95,7 +99,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
95
99
|
Embed multiple texts:
|
96
100
|
.. code-block:: python
|
97
101
|
|
98
|
-
|
102
|
+
input_texts = ["Document 1...", "Document 2..."]
|
99
103
|
vectors = embed.embed_documents(input_texts)
|
100
104
|
print(len(vectors))
|
101
105
|
# The first 3 coordinates for the first vector
|
@@ -110,7 +114,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
110
114
|
.. code-block:: python
|
111
115
|
|
112
116
|
vector = await embed.aembed_query(input_text)
|
113
|
-
|
117
|
+
print(vector[:3])
|
114
118
|
|
115
119
|
# multiple:
|
116
120
|
# await embed.aembed_documents(input_texts)
|
@@ -123,20 +127,38 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
123
127
|
model: str
|
124
128
|
"""Model name to use."""
|
125
129
|
|
130
|
+
validate_model_on_init: bool = False
|
131
|
+
"""Whether to validate the model exists in ollama locally on initialization."""
|
132
|
+
|
126
133
|
base_url: Optional[str] = None
|
127
134
|
"""Base url the model is hosted under."""
|
128
135
|
|
129
136
|
client_kwargs: Optional[dict] = {}
|
130
|
-
"""Additional kwargs to pass to the httpx
|
131
|
-
|
137
|
+
"""Additional kwargs to pass to the httpx clients.
|
138
|
+
These arguments are passed to both synchronous and async clients.
|
139
|
+
Use sync_client_kwargs and async_client_kwargs to pass different arguments
|
140
|
+
to synchronous and asynchronous clients.
|
132
141
|
"""
|
133
142
|
|
134
|
-
|
143
|
+
async_client_kwargs: Optional[dict] = {}
|
144
|
+
"""Additional kwargs to merge with client_kwargs before passing to the httpx
|
145
|
+
AsyncClient.
|
146
|
+
|
147
|
+
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
148
|
+
"""
|
149
|
+
|
150
|
+
sync_client_kwargs: Optional[dict] = {}
|
151
|
+
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
|
152
|
+
|
153
|
+
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
154
|
+
"""
|
155
|
+
|
156
|
+
_client: Optional[Client] = PrivateAttr(default=None)
|
135
157
|
"""
|
136
158
|
The client to use for making requests.
|
137
159
|
"""
|
138
160
|
|
139
|
-
_async_client: AsyncClient = PrivateAttr(default=None)
|
161
|
+
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
140
162
|
"""
|
141
163
|
The async client to use for making requests.
|
142
164
|
"""
|
@@ -233,16 +255,32 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
233
255
|
def _set_clients(self) -> Self:
|
234
256
|
"""Set clients to use for ollama."""
|
235
257
|
client_kwargs = self.client_kwargs or {}
|
236
|
-
|
237
|
-
|
258
|
+
|
259
|
+
sync_client_kwargs = client_kwargs
|
260
|
+
if self.sync_client_kwargs:
|
261
|
+
sync_client_kwargs = {**sync_client_kwargs, **self.sync_client_kwargs}
|
262
|
+
|
263
|
+
async_client_kwargs = client_kwargs
|
264
|
+
if self.async_client_kwargs:
|
265
|
+
async_client_kwargs = {**async_client_kwargs, **self.async_client_kwargs}
|
266
|
+
|
267
|
+
self._client = Client(host=self.base_url, **sync_client_kwargs)
|
268
|
+
self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
|
269
|
+
if self.validate_model_on_init:
|
270
|
+
validate_model(self._client, self.model)
|
238
271
|
return self
|
239
272
|
|
240
273
|
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
241
274
|
"""Embed search docs."""
|
242
|
-
|
275
|
+
if not self._client:
|
276
|
+
msg = (
|
277
|
+
"Ollama client is not initialized. "
|
278
|
+
"Please ensure Ollama is running and the model is loaded."
|
279
|
+
)
|
280
|
+
raise ValueError(msg)
|
281
|
+
return self._client.embed(
|
243
282
|
self.model, texts, options=self._default_params, keep_alive=self.keep_alive
|
244
283
|
)["embeddings"]
|
245
|
-
return embedded_docs
|
246
284
|
|
247
285
|
def embed_query(self, text: str) -> list[float]:
|
248
286
|
"""Embed query text."""
|
@@ -250,12 +288,17 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
250
288
|
|
251
289
|
async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
|
252
290
|
"""Embed search docs."""
|
253
|
-
|
291
|
+
if not self._async_client:
|
292
|
+
msg = (
|
293
|
+
"Ollama client is not initialized. "
|
294
|
+
"Please ensure Ollama is running and the model is loaded."
|
295
|
+
)
|
296
|
+
raise ValueError(msg)
|
297
|
+
return (
|
254
298
|
await self._async_client.embed(
|
255
299
|
self.model, texts, keep_alive=self.keep_alive
|
256
300
|
)
|
257
301
|
)["embeddings"]
|
258
|
-
return embedded_docs
|
259
302
|
|
260
303
|
async def aembed_query(self, text: str) -> list[float]:
|
261
304
|
"""Embed query text."""
|
langchain_ollama/llms.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
"""Ollama large language models."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from collections.abc import AsyncIterator, Iterator, Mapping
|
4
6
|
from typing import (
|
5
7
|
Any,
|
@@ -18,6 +20,8 @@ from ollama import AsyncClient, Client, Options
|
|
18
20
|
from pydantic import PrivateAttr, model_validator
|
19
21
|
from typing_extensions import Self
|
20
22
|
|
23
|
+
from ._utils import validate_model
|
24
|
+
|
21
25
|
|
22
26
|
class OllamaLLM(BaseLLM):
|
23
27
|
"""OllamaLLM large language models.
|
@@ -28,12 +32,29 @@ class OllamaLLM(BaseLLM):
|
|
28
32
|
from langchain_ollama import OllamaLLM
|
29
33
|
|
30
34
|
model = OllamaLLM(model="llama3")
|
31
|
-
model.invoke("Come up with 10 names for a song about parrots")
|
35
|
+
print(model.invoke("Come up with 10 names for a song about parrots"))
|
32
36
|
"""
|
33
37
|
|
34
38
|
model: str
|
35
39
|
"""Model name to use."""
|
36
40
|
|
41
|
+
reasoning: Optional[bool] = None
|
42
|
+
"""Controls the reasoning/thinking mode for
|
43
|
+
`supported models <https://ollama.com/search?c=thinking>`__.
|
44
|
+
|
45
|
+
- ``True``: Enables reasoning mode. The model's reasoning process will be
|
46
|
+
captured and returned separately in the ``additional_kwargs`` of the
|
47
|
+
response message, under ``reasoning_content``. The main response
|
48
|
+
content will not include the reasoning tags.
|
49
|
+
- ``False``: Disables reasoning mode. The model will not perform any reasoning,
|
50
|
+
and the response will not include any reasoning content.
|
51
|
+
- ``None`` (Default): The model will use its default reasoning behavior. If
|
52
|
+
the model performs reasoning, the ``<think>`` and ``</think>`` tags will
|
53
|
+
be present directly within the main response content."""
|
54
|
+
|
55
|
+
validate_model_on_init: bool = False
|
56
|
+
"""Whether to validate the model exists in ollama locally on initialization."""
|
57
|
+
|
37
58
|
mirostat: Optional[int] = None
|
38
59
|
"""Enable Mirostat sampling for controlling perplexity.
|
39
60
|
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
|
@@ -51,7 +72,7 @@ class OllamaLLM(BaseLLM):
|
|
51
72
|
|
52
73
|
num_ctx: Optional[int] = None
|
53
74
|
"""Sets the size of the context window used to generate the
|
54
|
-
next token. (Default: 2048)
|
75
|
+
next token. (Default: 2048)"""
|
55
76
|
|
56
77
|
num_gpu: Optional[int] = None
|
57
78
|
"""The number of GPUs to use. On macOS it defaults to 1 to
|
@@ -113,16 +134,31 @@ class OllamaLLM(BaseLLM):
|
|
113
134
|
"""Base url the model is hosted under."""
|
114
135
|
|
115
136
|
client_kwargs: Optional[dict] = {}
|
116
|
-
"""Additional kwargs to pass to the httpx
|
117
|
-
|
137
|
+
"""Additional kwargs to pass to the httpx clients.
|
138
|
+
These arguments are passed to both synchronous and async clients.
|
139
|
+
Use sync_client_kwargs and async_client_kwargs to pass different arguments
|
140
|
+
to synchronous and asynchronous clients.
|
141
|
+
"""
|
142
|
+
|
143
|
+
async_client_kwargs: Optional[dict] = {}
|
144
|
+
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX
|
145
|
+
AsyncClient.
|
146
|
+
|
147
|
+
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
148
|
+
"""
|
149
|
+
|
150
|
+
sync_client_kwargs: Optional[dict] = {}
|
151
|
+
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
|
152
|
+
|
153
|
+
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
118
154
|
"""
|
119
155
|
|
120
|
-
_client: Client = PrivateAttr(default=None)
|
156
|
+
_client: Optional[Client] = PrivateAttr(default=None)
|
121
157
|
"""
|
122
158
|
The client to use for making requests.
|
123
159
|
"""
|
124
160
|
|
125
|
-
_async_client: AsyncClient = PrivateAttr(default=None)
|
161
|
+
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
126
162
|
"""
|
127
163
|
The async client to use for making requests.
|
128
164
|
"""
|
@@ -134,8 +170,9 @@ class OllamaLLM(BaseLLM):
|
|
134
170
|
**kwargs: Any,
|
135
171
|
) -> dict[str, Any]:
|
136
172
|
if self.stop is not None and stop is not None:
|
137
|
-
|
138
|
-
|
173
|
+
msg = "`stop` found in both the input and default params."
|
174
|
+
raise ValueError(msg)
|
175
|
+
if self.stop is not None:
|
139
176
|
stop = self.stop
|
140
177
|
|
141
178
|
options_dict = kwargs.pop(
|
@@ -159,18 +196,17 @@ class OllamaLLM(BaseLLM):
|
|
159
196
|
},
|
160
197
|
)
|
161
198
|
|
162
|
-
|
199
|
+
return {
|
163
200
|
"prompt": prompt,
|
164
201
|
"stream": kwargs.pop("stream", True),
|
165
202
|
"model": kwargs.pop("model", self.model),
|
203
|
+
"think": kwargs.pop("reasoning", self.reasoning),
|
166
204
|
"format": kwargs.pop("format", self.format),
|
167
205
|
"options": Options(**options_dict),
|
168
206
|
"keep_alive": kwargs.pop("keep_alive", self.keep_alive),
|
169
207
|
**kwargs,
|
170
208
|
}
|
171
209
|
|
172
|
-
return params
|
173
|
-
|
174
210
|
@property
|
175
211
|
def _llm_type(self) -> str:
|
176
212
|
"""Return type of LLM."""
|
@@ -189,8 +225,19 @@ class OllamaLLM(BaseLLM):
|
|
189
225
|
def _set_clients(self) -> Self:
|
190
226
|
"""Set clients to use for ollama."""
|
191
227
|
client_kwargs = self.client_kwargs or {}
|
192
|
-
|
193
|
-
|
228
|
+
|
229
|
+
sync_client_kwargs = client_kwargs
|
230
|
+
if self.sync_client_kwargs:
|
231
|
+
sync_client_kwargs = {**sync_client_kwargs, **self.sync_client_kwargs}
|
232
|
+
|
233
|
+
async_client_kwargs = client_kwargs
|
234
|
+
if self.async_client_kwargs:
|
235
|
+
async_client_kwargs = {**async_client_kwargs, **self.async_client_kwargs}
|
236
|
+
|
237
|
+
self._client = Client(host=self.base_url, **sync_client_kwargs)
|
238
|
+
self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
|
239
|
+
if self.validate_model_on_init:
|
240
|
+
validate_model(self._client, self.model)
|
194
241
|
return self
|
195
242
|
|
196
243
|
async def _acreate_generate_stream(
|
@@ -199,10 +246,11 @@ class OllamaLLM(BaseLLM):
|
|
199
246
|
stop: Optional[list[str]] = None,
|
200
247
|
**kwargs: Any,
|
201
248
|
) -> AsyncIterator[Union[Mapping[str, Any], str]]:
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
249
|
+
if self._async_client:
|
250
|
+
async for part in await self._async_client.generate(
|
251
|
+
**self._generate_params(prompt, stop=stop, **kwargs)
|
252
|
+
):
|
253
|
+
yield part
|
206
254
|
|
207
255
|
def _create_generate_stream(
|
208
256
|
self,
|
@@ -210,23 +258,27 @@ class OllamaLLM(BaseLLM):
|
|
210
258
|
stop: Optional[list[str]] = None,
|
211
259
|
**kwargs: Any,
|
212
260
|
) -> Iterator[Union[Mapping[str, Any], str]]:
|
213
|
-
|
214
|
-
|
215
|
-
|
261
|
+
if self._client:
|
262
|
+
yield from self._client.generate(
|
263
|
+
**self._generate_params(prompt, stop=stop, **kwargs)
|
264
|
+
)
|
216
265
|
|
217
266
|
async def _astream_with_aggregation(
|
218
267
|
self,
|
219
268
|
prompt: str,
|
220
269
|
stop: Optional[list[str]] = None,
|
221
270
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
222
|
-
verbose: bool = False,
|
271
|
+
verbose: bool = False, # noqa: FBT001, FBT002
|
223
272
|
**kwargs: Any,
|
224
273
|
) -> GenerationChunk:
|
225
274
|
final_chunk = None
|
275
|
+
thinking_content = ""
|
226
276
|
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
|
227
277
|
if not isinstance(stream_resp, str):
|
278
|
+
if stream_resp.get("thinking"):
|
279
|
+
thinking_content += stream_resp["thinking"]
|
228
280
|
chunk = GenerationChunk(
|
229
|
-
text=stream_resp
|
281
|
+
text=stream_resp.get("response", ""),
|
230
282
|
generation_info=(
|
231
283
|
dict(stream_resp) if stream_resp.get("done") is True else None
|
232
284
|
),
|
@@ -242,7 +294,14 @@ class OllamaLLM(BaseLLM):
|
|
242
294
|
verbose=verbose,
|
243
295
|
)
|
244
296
|
if final_chunk is None:
|
245
|
-
|
297
|
+
msg = "No data received from Ollama stream."
|
298
|
+
raise ValueError(msg)
|
299
|
+
|
300
|
+
if thinking_content:
|
301
|
+
if final_chunk.generation_info:
|
302
|
+
final_chunk.generation_info["thinking"] = thinking_content
|
303
|
+
else:
|
304
|
+
final_chunk.generation_info = {"thinking": thinking_content}
|
246
305
|
|
247
306
|
return final_chunk
|
248
307
|
|
@@ -251,14 +310,17 @@ class OllamaLLM(BaseLLM):
|
|
251
310
|
prompt: str,
|
252
311
|
stop: Optional[list[str]] = None,
|
253
312
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
254
|
-
verbose: bool = False,
|
313
|
+
verbose: bool = False, # noqa: FBT001, FBT002
|
255
314
|
**kwargs: Any,
|
256
315
|
) -> GenerationChunk:
|
257
316
|
final_chunk = None
|
317
|
+
thinking_content = ""
|
258
318
|
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
|
259
319
|
if not isinstance(stream_resp, str):
|
320
|
+
if stream_resp.get("thinking"):
|
321
|
+
thinking_content += stream_resp["thinking"]
|
260
322
|
chunk = GenerationChunk(
|
261
|
-
text=stream_resp
|
323
|
+
text=stream_resp.get("response", ""),
|
262
324
|
generation_info=(
|
263
325
|
dict(stream_resp) if stream_resp.get("done") is True else None
|
264
326
|
),
|
@@ -274,7 +336,14 @@ class OllamaLLM(BaseLLM):
|
|
274
336
|
verbose=verbose,
|
275
337
|
)
|
276
338
|
if final_chunk is None:
|
277
|
-
|
339
|
+
msg = "No data received from Ollama stream."
|
340
|
+
raise ValueError(msg)
|
341
|
+
|
342
|
+
if thinking_content:
|
343
|
+
if final_chunk.generation_info:
|
344
|
+
final_chunk.generation_info["thinking"] = thinking_content
|
345
|
+
else:
|
346
|
+
final_chunk.generation_info = {"thinking": thinking_content}
|
278
347
|
|
279
348
|
return final_chunk
|
280
349
|
|
@@ -323,13 +392,22 @@ class OllamaLLM(BaseLLM):
|
|
323
392
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
324
393
|
**kwargs: Any,
|
325
394
|
) -> Iterator[GenerationChunk]:
|
395
|
+
reasoning = kwargs.get("reasoning", self.reasoning)
|
326
396
|
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
|
327
397
|
if not isinstance(stream_resp, str):
|
398
|
+
additional_kwargs = {}
|
399
|
+
if reasoning and (thinking_content := stream_resp.get("thinking")):
|
400
|
+
additional_kwargs["reasoning_content"] = thinking_content
|
401
|
+
|
328
402
|
chunk = GenerationChunk(
|
329
403
|
text=(stream_resp.get("response", "")),
|
330
|
-
generation_info=
|
331
|
-
|
332
|
-
|
404
|
+
generation_info={
|
405
|
+
"finish_reason": self.stop,
|
406
|
+
**additional_kwargs,
|
407
|
+
**(
|
408
|
+
dict(stream_resp) if stream_resp.get("done") is True else {}
|
409
|
+
),
|
410
|
+
},
|
333
411
|
)
|
334
412
|
if run_manager:
|
335
413
|
run_manager.on_llm_new_token(
|
@@ -345,13 +423,22 @@ class OllamaLLM(BaseLLM):
|
|
345
423
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
346
424
|
**kwargs: Any,
|
347
425
|
) -> AsyncIterator[GenerationChunk]:
|
426
|
+
reasoning = kwargs.get("reasoning", self.reasoning)
|
348
427
|
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
|
349
428
|
if not isinstance(stream_resp, str):
|
429
|
+
additional_kwargs = {}
|
430
|
+
if reasoning and (thinking_content := stream_resp.get("thinking")):
|
431
|
+
additional_kwargs["reasoning_content"] = thinking_content
|
432
|
+
|
350
433
|
chunk = GenerationChunk(
|
351
434
|
text=(stream_resp.get("response", "")),
|
352
|
-
generation_info=
|
353
|
-
|
354
|
-
|
435
|
+
generation_info={
|
436
|
+
"finish_reason": self.stop,
|
437
|
+
**additional_kwargs,
|
438
|
+
**(
|
439
|
+
dict(stream_resp) if stream_resp.get("done") is True else {}
|
440
|
+
),
|
441
|
+
},
|
355
442
|
)
|
356
443
|
if run_manager:
|
357
444
|
await run_manager.on_llm_new_token(
|
@@ -0,0 +1,68 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: langchain-ollama
|
3
|
+
Version: 0.3.4
|
4
|
+
Summary: An integration package connecting Ollama and LangChain
|
5
|
+
License: MIT
|
6
|
+
Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
|
7
|
+
Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
|
8
|
+
Project-URL: repository, https://github.com/langchain-ai/langchain
|
9
|
+
Requires-Python: >=3.9
|
10
|
+
Requires-Dist: ollama<1.0.0,>=0.5.1
|
11
|
+
Requires-Dist: langchain-core<1.0.0,>=0.3.68
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
|
14
|
+
# langchain-ollama
|
15
|
+
|
16
|
+
This package contains the LangChain integration with Ollama
|
17
|
+
|
18
|
+
## Installation
|
19
|
+
|
20
|
+
```bash
|
21
|
+
pip install -U langchain-ollama
|
22
|
+
```
|
23
|
+
|
24
|
+
For the package to work, you will need to install and run the Ollama server locally ([download](https://ollama.com/download)).
|
25
|
+
|
26
|
+
To run integration tests (`make integration_tests`), you will need the following models installed in your Ollama server:
|
27
|
+
|
28
|
+
- `llama3.1`
|
29
|
+
- `deepseek-r1:1.5b`
|
30
|
+
|
31
|
+
Install these models by running:
|
32
|
+
|
33
|
+
```bash
|
34
|
+
ollama pull <name-of-model>
|
35
|
+
```
|
36
|
+
|
37
|
+
## [Chat Models](https://python.langchain.com/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html#chatollama)
|
38
|
+
|
39
|
+
`ChatOllama` class exposes chat models from Ollama.
|
40
|
+
|
41
|
+
```python
|
42
|
+
from langchain_ollama import ChatOllama
|
43
|
+
|
44
|
+
llm = ChatOllama(model="llama3.1")
|
45
|
+
llm.invoke("Sing a ballad of LangChain.")
|
46
|
+
```
|
47
|
+
|
48
|
+
## [Embeddings](https://python.langchain.com/api_reference/ollama/embeddings/langchain_ollama.embeddings.OllamaEmbeddings.html#ollamaembeddings)
|
49
|
+
|
50
|
+
`OllamaEmbeddings` class exposes embeddings from Ollama.
|
51
|
+
|
52
|
+
```python
|
53
|
+
from langchain_ollama import OllamaEmbeddings
|
54
|
+
|
55
|
+
embeddings = OllamaEmbeddings(model="llama3.1")
|
56
|
+
embeddings.embed_query("What is the meaning of life?")
|
57
|
+
```
|
58
|
+
|
59
|
+
## [LLMs](https://python.langchain.com/api_reference/ollama/llms/langchain_ollama.llms.OllamaLLM.html#ollamallm)
|
60
|
+
|
61
|
+
`OllamaLLM` class exposes traditional LLMs from Ollama.
|
62
|
+
|
63
|
+
```python
|
64
|
+
from langchain_ollama import OllamaLLM
|
65
|
+
|
66
|
+
llm = OllamaLLM(model="llama3.1")
|
67
|
+
llm.invoke("The meaning of life is")
|
68
|
+
```
|
@@ -0,0 +1,11 @@
|
|
1
|
+
langchain_ollama-0.3.4.dist-info/METADATA,sha256=wM54qEosykpO89kExse0V4Y3K3ncspLP_mFNKsBxTNY,2072
|
2
|
+
langchain_ollama-0.3.4.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
3
|
+
langchain_ollama-0.3.4.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
4
|
+
langchain_ollama-0.3.4.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
|
5
|
+
langchain_ollama/__init__.py,sha256=TI1gI0Wpg7mRXehGpxrJG2flF_t4Ev-aIJlLKV-CgL0,633
|
6
|
+
langchain_ollama/_utils.py,sha256=dmFO4tSvDTeMALc89QnTBLNWPMZL0eNAq1EDwuMjRA8,1416
|
7
|
+
langchain_ollama/chat_models.py,sha256=olz3KJeLG1vk47Xl38nN9bP4bcol5cBQnPnu5MyP8k8,55539
|
8
|
+
langchain_ollama/embeddings.py,sha256=VprOFiBRuUPGEygoIfxvAZStUsqRj65ZNMpkvCAo_9Y,10239
|
9
|
+
langchain_ollama/llms.py,sha256=PSJ-VQMocp1nm-pgtnKnozidt66RKJiEnhdzftoLNNc,16778
|
10
|
+
langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
langchain_ollama-0.3.4.dist-info/RECORD,,
|
@@ -1,57 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: langchain-ollama
|
3
|
-
Version: 0.3.2
|
4
|
-
Summary: An integration package connecting Ollama and LangChain
|
5
|
-
License: MIT
|
6
|
-
Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
|
7
|
-
Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
|
8
|
-
Project-URL: repository, https://github.com/langchain-ai/langchain
|
9
|
-
Requires-Python: <4.0,>=3.9
|
10
|
-
Requires-Dist: ollama<1,>=0.4.4
|
11
|
-
Requires-Dist: langchain-core<1.0.0,>=0.3.52
|
12
|
-
Description-Content-Type: text/markdown
|
13
|
-
|
14
|
-
# langchain-ollama
|
15
|
-
|
16
|
-
This package contains the LangChain integration with Ollama
|
17
|
-
|
18
|
-
## Installation
|
19
|
-
|
20
|
-
```bash
|
21
|
-
pip install -U langchain-ollama
|
22
|
-
```
|
23
|
-
|
24
|
-
You will also need to run the Ollama server locally.
|
25
|
-
You can download it [here](https://ollama.com/download).
|
26
|
-
|
27
|
-
## Chat Models
|
28
|
-
|
29
|
-
`ChatOllama` class exposes chat models from Ollama.
|
30
|
-
|
31
|
-
```python
|
32
|
-
from langchain_ollama import ChatOllama
|
33
|
-
|
34
|
-
llm = ChatOllama(model="llama3-groq-tool-use")
|
35
|
-
llm.invoke("Sing a ballad of LangChain.")
|
36
|
-
```
|
37
|
-
|
38
|
-
## Embeddings
|
39
|
-
|
40
|
-
`OllamaEmbeddings` class exposes embeddings from Ollama.
|
41
|
-
|
42
|
-
```python
|
43
|
-
from langchain_ollama import OllamaEmbeddings
|
44
|
-
|
45
|
-
embeddings = OllamaEmbeddings(model="llama3")
|
46
|
-
embeddings.embed_query("What is the meaning of life?")
|
47
|
-
```
|
48
|
-
|
49
|
-
## LLMs
|
50
|
-
`OllamaLLM` class exposes LLMs from Ollama.
|
51
|
-
|
52
|
-
```python
|
53
|
-
from langchain_ollama import OllamaLLM
|
54
|
-
|
55
|
-
llm = OllamaLLM(model="llama3")
|
56
|
-
llm.invoke("The meaning of life is")
|
57
|
-
```
|
@@ -1,10 +0,0 @@
|
|
1
|
-
langchain_ollama-0.3.2.dist-info/METADATA,sha256=58k8ADvokbZrjkTN5_-DRJWHYxZI6A1IbYO7rJ2DWc8,1463
|
2
|
-
langchain_ollama-0.3.2.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
|
3
|
-
langchain_ollama-0.3.2.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
4
|
-
langchain_ollama-0.3.2.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
|
5
|
-
langchain_ollama/__init__.py,sha256=1f8Cyf1_bS0CT16U8-Os1P1Oa3erIDtIBTH4KVmBLvY,633
|
6
|
-
langchain_ollama/chat_models.py,sha256=3ZvSHz-14idWKykyQgMV2i84bFrXVRjpU9dbGTz4_hs,50735
|
7
|
-
langchain_ollama/embeddings.py,sha256=2G0gfnUbPBpVv9oBzL7C3z3FI_VumQ2WCYCf_-LMz-Q,8621
|
8
|
-
langchain_ollama/llms.py,sha256=DiCWKLX2JPZAoVoRTKKQ2yOuoXbVStg0wkS1p6IruQU,13007
|
9
|
-
langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
langchain_ollama-0.3.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|