langchain-ollama 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_ollama/__init__.py +1 -1
- langchain_ollama/_utils.py +39 -0
- langchain_ollama/chat_models.py +190 -118
- langchain_ollama/embeddings.py +35 -15
- langchain_ollama/llms.py +99 -35
- {langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/METADATA +23 -12
- langchain_ollama-0.3.4.dist-info/RECORD +11 -0
- {langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/WHEEL +1 -1
- langchain_ollama-0.3.3.dist-info/RECORD +0 -10
- {langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/entry_points.txt +0 -0
- {langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/licenses/LICENSE +0 -0
langchain_ollama/__init__.py
CHANGED
@@ -0,0 +1,39 @@
|
|
1
|
+
"""Utility functions for validating Ollama models."""
|
2
|
+
|
3
|
+
from httpx import ConnectError
|
4
|
+
from ollama import Client, ResponseError
|
5
|
+
|
6
|
+
|
7
|
+
def validate_model(client: Client, model_name: str) -> None:
|
8
|
+
"""Validate that a model exists in the Ollama instance.
|
9
|
+
|
10
|
+
Args:
|
11
|
+
client: The Ollama client.
|
12
|
+
model_name: The name of the model to validate.
|
13
|
+
|
14
|
+
Raises:
|
15
|
+
ValueError: If the model is not found or if there's a connection issue.
|
16
|
+
"""
|
17
|
+
try:
|
18
|
+
response = client.list()
|
19
|
+
|
20
|
+
model_names: list[str] = [model["model"] for model in response["models"]]
|
21
|
+
|
22
|
+
if not any(
|
23
|
+
model_name == m or m.startswith(f"{model_name}:") for m in model_names
|
24
|
+
):
|
25
|
+
msg = (
|
26
|
+
f"Model `{model_name}` not found in Ollama. Please pull the "
|
27
|
+
f"model (using `ollama pull {model_name}`) or specify a valid "
|
28
|
+
f"model name. Available local models: {', '.join(model_names)}"
|
29
|
+
)
|
30
|
+
raise ValueError(msg)
|
31
|
+
except ConnectError as e:
|
32
|
+
msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download" # noqa: E501
|
33
|
+
raise ValueError(msg) from e
|
34
|
+
except ResponseError as e:
|
35
|
+
msg = (
|
36
|
+
"Received an error from the Ollama API. "
|
37
|
+
"Please check your Ollama server logs."
|
38
|
+
)
|
39
|
+
raise ValueError(msg) from e
|
langchain_ollama/chat_models.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
"""Ollama chat models."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import json
|
4
6
|
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
|
5
7
|
from operator import itemgetter
|
6
8
|
from typing import (
|
7
9
|
Any,
|
8
10
|
Callable,
|
9
|
-
Final,
|
10
11
|
Literal,
|
11
12
|
Optional,
|
12
13
|
Union,
|
@@ -25,7 +26,6 @@ from langchain_core.messages import (
|
|
25
26
|
AIMessage,
|
26
27
|
AIMessageChunk,
|
27
28
|
BaseMessage,
|
28
|
-
BaseMessageChunk,
|
29
29
|
ChatMessage,
|
30
30
|
HumanMessage,
|
31
31
|
SystemMessage,
|
@@ -55,8 +55,7 @@ from pydantic.json_schema import JsonSchemaValue
|
|
55
55
|
from pydantic.v1 import BaseModel as BaseModelV1
|
56
56
|
from typing_extensions import Self, is_typeddict
|
57
57
|
|
58
|
-
|
59
|
-
DEFAULT_THINK_TOKEN_END: Final[str] = "</think>"
|
58
|
+
from ._utils import validate_model
|
60
59
|
|
61
60
|
|
62
61
|
def _get_usage_metadata_from_generation_info(
|
@@ -77,7 +76,9 @@ def _get_usage_metadata_from_generation_info(
|
|
77
76
|
|
78
77
|
|
79
78
|
def _parse_json_string(
|
80
|
-
json_string: str,
|
79
|
+
json_string: str,
|
80
|
+
raw_tool_call: dict[str, Any],
|
81
|
+
skip: bool, # noqa: FBT001
|
81
82
|
) -> Any:
|
82
83
|
"""Attempt to parse a JSON string for tool calling.
|
83
84
|
|
@@ -151,26 +152,30 @@ def _get_tool_calls_from_response(
|
|
151
152
|
) -> list[ToolCall]:
|
152
153
|
"""Get tool calls from ollama response."""
|
153
154
|
tool_calls = []
|
154
|
-
if "message" in response
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
)
|
155
|
+
if "message" in response and (
|
156
|
+
raw_tool_calls := response["message"].get("tool_calls")
|
157
|
+
):
|
158
|
+
tool_calls.extend(
|
159
|
+
[
|
160
|
+
tool_call(
|
161
|
+
id=str(uuid4()),
|
162
|
+
name=tc["function"]["name"],
|
163
|
+
args=_parse_arguments_from_tool_call(tc) or {},
|
163
164
|
)
|
165
|
+
for tc in raw_tool_calls
|
166
|
+
]
|
167
|
+
)
|
164
168
|
return tool_calls
|
165
169
|
|
166
170
|
|
167
|
-
def _lc_tool_call_to_openai_tool_call(
|
171
|
+
def _lc_tool_call_to_openai_tool_call(tool_call_: ToolCall) -> dict:
|
172
|
+
"""Convert a LangChain tool call to an OpenAI tool call format."""
|
168
173
|
return {
|
169
174
|
"type": "function",
|
170
|
-
"id":
|
175
|
+
"id": tool_call_["id"],
|
171
176
|
"function": {
|
172
|
-
"name":
|
173
|
-
"arguments":
|
177
|
+
"name": tool_call_["name"],
|
178
|
+
"arguments": tool_call_["args"],
|
174
179
|
},
|
175
180
|
}
|
176
181
|
|
@@ -180,14 +185,12 @@ def _get_image_from_data_content_block(block: dict) -> str:
|
|
180
185
|
if block["type"] == "image":
|
181
186
|
if block["source_type"] == "base64":
|
182
187
|
return block["data"]
|
183
|
-
|
184
|
-
error_message = "Image data only supported through in-line base64 format."
|
185
|
-
raise ValueError(error_message)
|
186
|
-
|
187
|
-
else:
|
188
|
-
error_message = f"Blocks of type {block['type']} not supported."
|
188
|
+
error_message = "Image data only supported through in-line base64 format."
|
189
189
|
raise ValueError(error_message)
|
190
190
|
|
191
|
+
error_message = f"Blocks of type {block['type']} not supported."
|
192
|
+
raise ValueError(error_message)
|
193
|
+
|
191
194
|
|
192
195
|
def _is_pydantic_class(obj: Any) -> bool:
|
193
196
|
return isinstance(obj, type) and is_basemodel_subclass(obj)
|
@@ -209,8 +212,22 @@ class ChatOllama(BaseChatModel):
|
|
209
212
|
Key init args — completion params:
|
210
213
|
model: str
|
211
214
|
Name of Ollama model to use.
|
215
|
+
reasoning: Optional[bool]
|
216
|
+
Controls the reasoning/thinking mode for
|
217
|
+
`supported models <https://ollama.com/search?c=thinking>`__.
|
218
|
+
|
219
|
+
- ``True``: Enables reasoning mode. The model's reasoning process will be
|
220
|
+
captured and returned separately in the ``additional_kwargs`` of the
|
221
|
+
response message, under ``reasoning_content``. The main response
|
222
|
+
content will not include the reasoning tags.
|
223
|
+
- ``False``: Disables reasoning mode. The model will not perform any reasoning,
|
224
|
+
and the response will not include any reasoning content.
|
225
|
+
- ``None`` (Default): The model will use its default reasoning behavior. Note
|
226
|
+
however, if the model's default behavior *is* to perform reasoning, think tags
|
227
|
+
(``<think>`` and ``</think>``) will be present within the main response content
|
228
|
+
unless you set ``reasoning`` to ``True``.
|
212
229
|
temperature: float
|
213
|
-
Sampling temperature. Ranges from 0.0 to 1.0
|
230
|
+
Sampling temperature. Ranges from ``0.0`` to ``1.0``.
|
214
231
|
num_predict: Optional[int]
|
215
232
|
Max number of tokens to generate.
|
216
233
|
|
@@ -326,7 +343,6 @@ class ChatOllama(BaseChatModel):
|
|
326
343
|
'{"location": "Pune, India", "time_of_day": "morning"}'
|
327
344
|
|
328
345
|
Tool Calling:
|
329
|
-
|
330
346
|
.. code-block:: python
|
331
347
|
|
332
348
|
from langchain_ollama import ChatOllama
|
@@ -345,17 +361,70 @@ class ChatOllama(BaseChatModel):
|
|
345
361
|
'args': {'a': 45, 'b': 67},
|
346
362
|
'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
|
347
363
|
'type': 'tool_call'}]
|
348
|
-
|
364
|
+
|
365
|
+
Thinking / Reasoning:
|
366
|
+
You can enable reasoning mode for models that support it by setting
|
367
|
+
the ``reasoning`` parameter to ``True`` in either the constructor or
|
368
|
+
the ``invoke``/``stream`` methods. This will enable the model to think
|
369
|
+
through the problem and return the reasoning process separately in the
|
370
|
+
``additional_kwargs`` of the response message, under ``reasoning_content``.
|
371
|
+
|
372
|
+
If ``reasoning`` is set to ``None``, the model will use its default reasoning
|
373
|
+
behavior, and any reasoning content will *not* be captured under the
|
374
|
+
``reasoning_content`` key, but will be present within the main response content
|
375
|
+
as think tags (``<think>`` and ``</think>``).
|
376
|
+
|
377
|
+
.. note::
|
378
|
+
This feature is only available for `models that support reasoning <https://ollama.com/search?c=thinking>`__.
|
379
|
+
|
380
|
+
.. code-block:: python
|
381
|
+
|
382
|
+
from langchain_ollama import ChatOllama
|
383
|
+
|
384
|
+
llm = ChatOllama(
|
385
|
+
model = "deepseek-r1:8b",
|
386
|
+
reasoning= True,
|
387
|
+
)
|
388
|
+
|
389
|
+
user_message = HumanMessage(content="how many r in the word strawberry?")
|
390
|
+
messages: List[Any] = [user_message]
|
391
|
+
llm.invoke(messages)
|
392
|
+
|
393
|
+
# or, on an invocation basis:
|
394
|
+
|
395
|
+
llm.invoke(messages, reasoning=True)
|
396
|
+
# or llm.stream(messages, reasoning=True)
|
397
|
+
|
398
|
+
# If not provided, the invocation will default to the ChatOllama reasoning
|
399
|
+
# param provided (None by default).
|
400
|
+
|
401
|
+
.. code-block:: python
|
402
|
+
|
403
|
+
AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
|
404
|
+
|
405
|
+
|
406
|
+
""" # noqa: E501, pylint: disable=line-too-long
|
349
407
|
|
350
408
|
model: str
|
351
409
|
"""Model name to use."""
|
352
410
|
|
353
|
-
|
354
|
-
"""
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
411
|
+
reasoning: Optional[bool] = None
|
412
|
+
"""Controls the reasoning/thinking mode for
|
413
|
+
`supported models <https://ollama.com/search?c=thinking>`__.
|
414
|
+
|
415
|
+
- ``True``: Enables reasoning mode. The model's reasoning process will be
|
416
|
+
captured and returned separately in the ``additional_kwargs`` of the
|
417
|
+
response message, under ``reasoning_content``. The main response
|
418
|
+
content will not include the reasoning tags.
|
419
|
+
- ``False``: Disables reasoning mode. The model will not perform any reasoning,
|
420
|
+
and the response will not include any reasoning content.
|
421
|
+
- ``None`` (Default): The model will use its default reasoning behavior. Note
|
422
|
+
however, if the model's default behavior *is* to perform reasoning, think tags
|
423
|
+
()``<think>`` and ``</think>``) will be present within the main response content
|
424
|
+
unless you set ``reasoning`` to ``True``."""
|
425
|
+
|
426
|
+
validate_model_on_init: bool = False
|
427
|
+
"""Whether to validate the model exists in Ollama locally on initialization."""
|
359
428
|
|
360
429
|
mirostat: Optional[int] = None
|
361
430
|
"""Enable Mirostat sampling for controlling perplexity.
|
@@ -436,7 +505,7 @@ class ChatOllama(BaseChatModel):
|
|
436
505
|
"""Base url the model is hosted under."""
|
437
506
|
|
438
507
|
client_kwargs: Optional[dict] = {}
|
439
|
-
"""Additional kwargs to pass to the httpx clients.
|
508
|
+
"""Additional kwargs to pass to the httpx clients.
|
440
509
|
These arguments are passed to both synchronous and async clients.
|
441
510
|
Use sync_client_kwargs and async_client_kwargs to pass different arguments
|
442
511
|
to synchronous and asynchronous clients.
|
@@ -445,21 +514,21 @@ class ChatOllama(BaseChatModel):
|
|
445
514
|
async_client_kwargs: Optional[dict] = {}
|
446
515
|
"""Additional kwargs to merge with client_kwargs before
|
447
516
|
passing to the httpx AsyncClient.
|
448
|
-
|
517
|
+
`Full list of params. <https://www.python-httpx.org/api/#asyncclient>`__
|
449
518
|
"""
|
450
519
|
|
451
520
|
sync_client_kwargs: Optional[dict] = {}
|
452
521
|
"""Additional kwargs to merge with client_kwargs before
|
453
522
|
passing to the httpx Client.
|
454
|
-
|
523
|
+
`Full list of params. <https://www.python-httpx.org/api/#client>`__
|
455
524
|
"""
|
456
525
|
|
457
|
-
_client: Client = PrivateAttr(
|
526
|
+
_client: Client = PrivateAttr()
|
458
527
|
"""
|
459
528
|
The client to use for making requests.
|
460
529
|
"""
|
461
530
|
|
462
|
-
_async_client: AsyncClient = PrivateAttr(
|
531
|
+
_async_client: AsyncClient = PrivateAttr()
|
463
532
|
"""
|
464
533
|
The async client to use for making requests.
|
465
534
|
"""
|
@@ -473,8 +542,9 @@ class ChatOllama(BaseChatModel):
|
|
473
542
|
ollama_messages = self._convert_messages_to_ollama_messages(messages)
|
474
543
|
|
475
544
|
if self.stop is not None and stop is not None:
|
476
|
-
|
477
|
-
|
545
|
+
msg = "`stop` found in both the input and default params."
|
546
|
+
raise ValueError(msg)
|
547
|
+
if self.stop is not None:
|
478
548
|
stop = self.stop
|
479
549
|
|
480
550
|
options_dict = kwargs.pop(
|
@@ -502,6 +572,7 @@ class ChatOllama(BaseChatModel):
|
|
502
572
|
"messages": ollama_messages,
|
503
573
|
"stream": kwargs.pop("stream", True),
|
504
574
|
"model": kwargs.pop("model", self.model),
|
575
|
+
"think": kwargs.pop("reasoning", self.reasoning),
|
505
576
|
"format": kwargs.pop("format", self.format),
|
506
577
|
"options": Options(**options_dict),
|
507
578
|
"keep_alive": kwargs.pop("keep_alive", self.keep_alive),
|
@@ -528,6 +599,8 @@ class ChatOllama(BaseChatModel):
|
|
528
599
|
|
529
600
|
self._client = Client(host=self.base_url, **sync_client_kwargs)
|
530
601
|
self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
|
602
|
+
if self.validate_model_on_init:
|
603
|
+
validate_model(self._client, self.model)
|
531
604
|
return self
|
532
605
|
|
533
606
|
def _convert_messages_to_ollama_messages(
|
@@ -558,7 +631,8 @@ class ChatOllama(BaseChatModel):
|
|
558
631
|
role = "tool"
|
559
632
|
tool_call_id = message.tool_call_id
|
560
633
|
else:
|
561
|
-
|
634
|
+
msg = "Received unsupported message type for Ollama."
|
635
|
+
raise ValueError(msg)
|
562
636
|
|
563
637
|
content = ""
|
564
638
|
images = []
|
@@ -582,10 +656,11 @@ class ChatOllama(BaseChatModel):
|
|
582
656
|
):
|
583
657
|
image_url = temp_image_url["url"]
|
584
658
|
else:
|
585
|
-
|
659
|
+
msg = (
|
586
660
|
"Only string image_url or dict with string 'url' "
|
587
661
|
"inside content parts are supported."
|
588
662
|
)
|
663
|
+
raise ValueError(msg)
|
589
664
|
|
590
665
|
image_url_components = image_url.split(",")
|
591
666
|
# Support data:image/jpeg;base64,<image> format
|
@@ -598,47 +673,27 @@ class ChatOllama(BaseChatModel):
|
|
598
673
|
image = _get_image_from_data_content_block(content_part)
|
599
674
|
images.append(image)
|
600
675
|
else:
|
601
|
-
|
676
|
+
msg = (
|
602
677
|
"Unsupported message content type. "
|
603
678
|
"Must either have type 'text' or type 'image_url' "
|
604
679
|
"with a string 'image_url' field."
|
605
680
|
)
|
606
|
-
|
607
|
-
|
681
|
+
raise ValueError(msg)
|
682
|
+
# Should convert to ollama.Message once role includes tool,
|
683
|
+
# and tool_call_id is in Message
|
684
|
+
msg_: dict = {
|
608
685
|
"role": role,
|
609
686
|
"content": content,
|
610
687
|
"images": images,
|
611
688
|
}
|
612
689
|
if tool_calls:
|
613
|
-
|
690
|
+
msg_["tool_calls"] = tool_calls
|
614
691
|
if tool_call_id:
|
615
|
-
|
616
|
-
ollama_messages.append(
|
692
|
+
msg_["tool_call_id"] = tool_call_id
|
693
|
+
ollama_messages.append(msg_)
|
617
694
|
|
618
695
|
return ollama_messages
|
619
696
|
|
620
|
-
def _extract_reasoning(
|
621
|
-
self, message_chunk: BaseMessageChunk, is_thinking: bool
|
622
|
-
) -> tuple[BaseMessageChunk, bool]:
|
623
|
-
"""Mutate a message chunk to extract reasoning content."""
|
624
|
-
if not self.extract_reasoning:
|
625
|
-
return message_chunk, is_thinking
|
626
|
-
elif self.extract_reasoning is True:
|
627
|
-
start_token = DEFAULT_THINK_TOKEN_START
|
628
|
-
end_token = DEFAULT_THINK_TOKEN_END
|
629
|
-
else:
|
630
|
-
start_token, end_token = cast(tuple, self.extract_reasoning)
|
631
|
-
if start_token in message_chunk.content:
|
632
|
-
is_thinking = True
|
633
|
-
content = message_chunk.content
|
634
|
-
if is_thinking:
|
635
|
-
message_chunk.additional_kwargs["reasoning_content"] = content
|
636
|
-
message_chunk.content = ""
|
637
|
-
if end_token in content:
|
638
|
-
is_thinking = False
|
639
|
-
|
640
|
-
return message_chunk, is_thinking
|
641
|
-
|
642
697
|
async def _acreate_chat_stream(
|
643
698
|
self,
|
644
699
|
messages: list[BaseMessage],
|
@@ -662,16 +717,18 @@ class ChatOllama(BaseChatModel):
|
|
662
717
|
chat_params = self._chat_params(messages, stop, **kwargs)
|
663
718
|
|
664
719
|
if chat_params["stream"]:
|
665
|
-
|
720
|
+
if self._client:
|
721
|
+
yield from self._client.chat(**chat_params)
|
666
722
|
else:
|
667
|
-
|
723
|
+
if self._client:
|
724
|
+
yield self._client.chat(**chat_params)
|
668
725
|
|
669
726
|
def _chat_stream_with_aggregation(
|
670
727
|
self,
|
671
728
|
messages: list[BaseMessage],
|
672
729
|
stop: Optional[list[str]] = None,
|
673
730
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
674
|
-
verbose: bool = False,
|
731
|
+
verbose: bool = False, # noqa: FBT001, FBT002
|
675
732
|
**kwargs: Any,
|
676
733
|
) -> ChatGenerationChunk:
|
677
734
|
final_chunk = None
|
@@ -687,7 +744,8 @@ class ChatOllama(BaseChatModel):
|
|
687
744
|
verbose=verbose,
|
688
745
|
)
|
689
746
|
if final_chunk is None:
|
690
|
-
|
747
|
+
msg = "No data received from Ollama stream."
|
748
|
+
raise ValueError(msg)
|
691
749
|
|
692
750
|
return final_chunk
|
693
751
|
|
@@ -696,7 +754,7 @@ class ChatOllama(BaseChatModel):
|
|
696
754
|
messages: list[BaseMessage],
|
697
755
|
stop: Optional[list[str]] = None,
|
698
756
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
699
|
-
verbose: bool = False,
|
757
|
+
verbose: bool = False, # noqa: FBT001, FBT002
|
700
758
|
**kwargs: Any,
|
701
759
|
) -> ChatGenerationChunk:
|
702
760
|
final_chunk = None
|
@@ -712,7 +770,8 @@ class ChatOllama(BaseChatModel):
|
|
712
770
|
verbose=verbose,
|
713
771
|
)
|
714
772
|
if final_chunk is None:
|
715
|
-
|
773
|
+
msg = "No data received from Ollama stream."
|
774
|
+
raise ValueError(msg)
|
716
775
|
|
717
776
|
return final_chunk
|
718
777
|
|
@@ -759,22 +818,35 @@ class ChatOllama(BaseChatModel):
|
|
759
818
|
stop: Optional[list[str]] = None,
|
760
819
|
**kwargs: Any,
|
761
820
|
) -> Iterator[ChatGenerationChunk]:
|
762
|
-
|
821
|
+
reasoning = kwargs.get("reasoning", self.reasoning)
|
763
822
|
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
|
764
823
|
if not isinstance(stream_resp, str):
|
765
824
|
if stream_resp.get("done") is True:
|
766
825
|
generation_info = dict(stream_resp)
|
826
|
+
if "model" in generation_info:
|
827
|
+
generation_info["model_name"] = generation_info["model"]
|
767
828
|
_ = generation_info.pop("message", None)
|
768
829
|
else:
|
769
830
|
generation_info = None
|
831
|
+
|
832
|
+
content = (
|
833
|
+
stream_resp["message"]["content"]
|
834
|
+
if "message" in stream_resp and "content" in stream_resp["message"]
|
835
|
+
else ""
|
836
|
+
)
|
837
|
+
|
838
|
+
additional_kwargs = {}
|
839
|
+
if (
|
840
|
+
reasoning
|
841
|
+
and "message" in stream_resp
|
842
|
+
and (thinking_content := stream_resp["message"].get("thinking"))
|
843
|
+
):
|
844
|
+
additional_kwargs["reasoning_content"] = thinking_content
|
845
|
+
|
770
846
|
chunk = ChatGenerationChunk(
|
771
847
|
message=AIMessageChunk(
|
772
|
-
content=
|
773
|
-
|
774
|
-
if "message" in stream_resp
|
775
|
-
and "content" in stream_resp["message"]
|
776
|
-
else ""
|
777
|
-
),
|
848
|
+
content=content,
|
849
|
+
additional_kwargs=additional_kwargs,
|
778
850
|
usage_metadata=_get_usage_metadata_from_generation_info(
|
779
851
|
stream_resp
|
780
852
|
),
|
@@ -782,15 +854,7 @@ class ChatOllama(BaseChatModel):
|
|
782
854
|
),
|
783
855
|
generation_info=generation_info,
|
784
856
|
)
|
785
|
-
|
786
|
-
model := chunk.generation_info.get("model")
|
787
|
-
):
|
788
|
-
chunk.generation_info["model_name"] = model # backwards compat
|
789
|
-
if self.extract_reasoning:
|
790
|
-
message, is_thinking = self._extract_reasoning(
|
791
|
-
chunk.message, is_thinking
|
792
|
-
)
|
793
|
-
chunk.message = message
|
857
|
+
|
794
858
|
yield chunk
|
795
859
|
|
796
860
|
def _stream(
|
@@ -814,22 +878,35 @@ class ChatOllama(BaseChatModel):
|
|
814
878
|
stop: Optional[list[str]] = None,
|
815
879
|
**kwargs: Any,
|
816
880
|
) -> AsyncIterator[ChatGenerationChunk]:
|
817
|
-
|
881
|
+
reasoning = kwargs.get("reasoning", self.reasoning)
|
818
882
|
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
|
819
883
|
if not isinstance(stream_resp, str):
|
820
884
|
if stream_resp.get("done") is True:
|
821
885
|
generation_info = dict(stream_resp)
|
886
|
+
if "model" in generation_info:
|
887
|
+
generation_info["model_name"] = generation_info["model"]
|
822
888
|
_ = generation_info.pop("message", None)
|
823
889
|
else:
|
824
890
|
generation_info = None
|
891
|
+
|
892
|
+
content = (
|
893
|
+
stream_resp["message"]["content"]
|
894
|
+
if "message" in stream_resp and "content" in stream_resp["message"]
|
895
|
+
else ""
|
896
|
+
)
|
897
|
+
|
898
|
+
additional_kwargs = {}
|
899
|
+
if (
|
900
|
+
reasoning
|
901
|
+
and "message" in stream_resp
|
902
|
+
and (thinking_content := stream_resp["message"].get("thinking"))
|
903
|
+
):
|
904
|
+
additional_kwargs["reasoning_content"] = thinking_content
|
905
|
+
|
825
906
|
chunk = ChatGenerationChunk(
|
826
907
|
message=AIMessageChunk(
|
827
|
-
content=
|
828
|
-
|
829
|
-
if "message" in stream_resp
|
830
|
-
and "content" in stream_resp["message"]
|
831
|
-
else ""
|
832
|
-
),
|
908
|
+
content=content,
|
909
|
+
additional_kwargs=additional_kwargs,
|
833
910
|
usage_metadata=_get_usage_metadata_from_generation_info(
|
834
911
|
stream_resp
|
835
912
|
),
|
@@ -837,15 +914,7 @@ class ChatOllama(BaseChatModel):
|
|
837
914
|
),
|
838
915
|
generation_info=generation_info,
|
839
916
|
)
|
840
|
-
|
841
|
-
model := chunk.generation_info.get("model")
|
842
|
-
):
|
843
|
-
chunk.generation_info["model_name"] = model # backwards compat
|
844
|
-
if self.extract_reasoning:
|
845
|
-
message, is_thinking = self._extract_reasoning(
|
846
|
-
chunk.message, is_thinking
|
847
|
-
)
|
848
|
-
chunk.message = message
|
917
|
+
|
849
918
|
yield chunk
|
850
919
|
|
851
920
|
async def _astream(
|
@@ -894,7 +963,7 @@ class ChatOllama(BaseChatModel):
|
|
894
963
|
self,
|
895
964
|
tools: Sequence[Union[dict[str, Any], type, Callable, BaseTool]],
|
896
965
|
*,
|
897
|
-
tool_choice: Optional[Union[dict, str, Literal["auto", "any"], bool]] = None,
|
966
|
+
tool_choice: Optional[Union[dict, str, Literal["auto", "any"], bool]] = None, # noqa: PYI051
|
898
967
|
**kwargs: Any,
|
899
968
|
) -> Runnable[LanguageModelInput, BaseMessage]:
|
900
969
|
"""Bind tool-like objects to this chat model.
|
@@ -909,7 +978,7 @@ class ChatOllama(BaseChatModel):
|
|
909
978
|
is currently ignored as it is not supported by Ollama.**
|
910
979
|
kwargs: Any additional parameters are passed directly to
|
911
980
|
``self.bind(**kwargs)``.
|
912
|
-
"""
|
981
|
+
"""
|
913
982
|
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
|
914
983
|
return super().bind(tools=formatted_tools, **kwargs)
|
915
984
|
|
@@ -942,7 +1011,7 @@ class ChatOllama(BaseChatModel):
|
|
942
1011
|
method: The method for steering model generation, one of:
|
943
1012
|
|
944
1013
|
- "json_schema":
|
945
|
-
Uses Ollama's structured output API
|
1014
|
+
Uses Ollama's `structured output API <https://ollama.com/blog/structured-outputs>`__
|
946
1015
|
- "function_calling":
|
947
1016
|
Uses Ollama's tool-calling API
|
948
1017
|
- "json_mode":
|
@@ -1166,14 +1235,16 @@ class ChatOllama(BaseChatModel):
|
|
1166
1235
|
""" # noqa: E501, D301
|
1167
1236
|
_ = kwargs.pop("strict", None)
|
1168
1237
|
if kwargs:
|
1169
|
-
|
1238
|
+
msg = f"Received unsupported arguments {kwargs}"
|
1239
|
+
raise ValueError(msg)
|
1170
1240
|
is_pydantic_schema = _is_pydantic_class(schema)
|
1171
1241
|
if method == "function_calling":
|
1172
1242
|
if schema is None:
|
1173
|
-
|
1243
|
+
msg = (
|
1174
1244
|
"schema must be specified when method is not 'json_mode'. "
|
1175
1245
|
"Received None."
|
1176
1246
|
)
|
1247
|
+
raise ValueError(msg)
|
1177
1248
|
formatted_tool = convert_to_openai_tool(schema)
|
1178
1249
|
tool_name = formatted_tool["function"]["name"]
|
1179
1250
|
llm = self.bind_tools(
|
@@ -1208,10 +1279,11 @@ class ChatOllama(BaseChatModel):
|
|
1208
1279
|
)
|
1209
1280
|
elif method == "json_schema":
|
1210
1281
|
if schema is None:
|
1211
|
-
|
1282
|
+
msg = (
|
1212
1283
|
"schema must be specified when method is not 'json_mode'. "
|
1213
1284
|
"Received None."
|
1214
1285
|
)
|
1286
|
+
raise ValueError(msg)
|
1215
1287
|
if is_pydantic_schema:
|
1216
1288
|
schema = cast(TypeBaseModel, schema)
|
1217
1289
|
if issubclass(schema, BaseModelV1):
|
@@ -1225,7 +1297,7 @@ class ChatOllama(BaseChatModel):
|
|
1225
1297
|
"schema": schema,
|
1226
1298
|
},
|
1227
1299
|
)
|
1228
|
-
output_parser = PydanticOutputParser(pydantic_object=schema)
|
1300
|
+
output_parser = PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type]
|
1229
1301
|
else:
|
1230
1302
|
if is_typeddict(schema):
|
1231
1303
|
response_format = convert_to_json_schema(schema)
|
@@ -1245,10 +1317,11 @@ class ChatOllama(BaseChatModel):
|
|
1245
1317
|
)
|
1246
1318
|
output_parser = JsonOutputParser()
|
1247
1319
|
else:
|
1248
|
-
|
1320
|
+
msg = (
|
1249
1321
|
f"Unrecognized method argument. Expected one of 'function_calling', "
|
1250
1322
|
f"'json_schema', or 'json_mode'. Received: '{method}'"
|
1251
1323
|
)
|
1324
|
+
raise ValueError(msg)
|
1252
1325
|
|
1253
1326
|
if include_raw:
|
1254
1327
|
parser_assign = RunnablePassthrough.assign(
|
@@ -1259,5 +1332,4 @@ class ChatOllama(BaseChatModel):
|
|
1259
1332
|
[parser_none], exception_key="parsing_error"
|
1260
1333
|
)
|
1261
1334
|
return RunnableMap(raw=llm) | parser_with_fallback
|
1262
|
-
|
1263
|
-
return llm | output_parser
|
1335
|
+
return llm | output_parser
|
langchain_ollama/embeddings.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
"""Ollama embeddings models."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from typing import Any, Optional
|
4
6
|
|
5
7
|
from langchain_core.embeddings import Embeddings
|
@@ -12,6 +14,8 @@ from pydantic import (
|
|
12
14
|
)
|
13
15
|
from typing_extensions import Self
|
14
16
|
|
17
|
+
from ._utils import validate_model
|
18
|
+
|
15
19
|
|
16
20
|
class OllamaEmbeddings(BaseModel, Embeddings):
|
17
21
|
"""Ollama embedding model integration.
|
@@ -95,7 +99,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
95
99
|
Embed multiple texts:
|
96
100
|
.. code-block:: python
|
97
101
|
|
98
|
-
|
102
|
+
input_texts = ["Document 1...", "Document 2..."]
|
99
103
|
vectors = embed.embed_documents(input_texts)
|
100
104
|
print(len(vectors))
|
101
105
|
# The first 3 coordinates for the first vector
|
@@ -110,7 +114,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
110
114
|
.. code-block:: python
|
111
115
|
|
112
116
|
vector = await embed.aembed_query(input_text)
|
113
|
-
|
117
|
+
print(vector[:3])
|
114
118
|
|
115
119
|
# multiple:
|
116
120
|
# await embed.aembed_documents(input_texts)
|
@@ -123,34 +127,38 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
123
127
|
model: str
|
124
128
|
"""Model name to use."""
|
125
129
|
|
130
|
+
validate_model_on_init: bool = False
|
131
|
+
"""Whether to validate the model exists in ollama locally on initialization."""
|
132
|
+
|
126
133
|
base_url: Optional[str] = None
|
127
134
|
"""Base url the model is hosted under."""
|
128
135
|
|
129
136
|
client_kwargs: Optional[dict] = {}
|
130
|
-
"""Additional kwargs to pass to the httpx clients.
|
137
|
+
"""Additional kwargs to pass to the httpx clients.
|
131
138
|
These arguments are passed to both synchronous and async clients.
|
132
139
|
Use sync_client_kwargs and async_client_kwargs to pass different arguments
|
133
140
|
to synchronous and asynchronous clients.
|
134
141
|
"""
|
135
142
|
|
136
143
|
async_client_kwargs: Optional[dict] = {}
|
137
|
-
"""Additional kwargs to merge with client_kwargs before
|
138
|
-
|
139
|
-
|
144
|
+
"""Additional kwargs to merge with client_kwargs before passing to the httpx
|
145
|
+
AsyncClient.
|
146
|
+
|
147
|
+
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
140
148
|
"""
|
141
149
|
|
142
150
|
sync_client_kwargs: Optional[dict] = {}
|
143
|
-
"""Additional kwargs to merge with client_kwargs before
|
144
|
-
|
145
|
-
For a full list of the params, see
|
151
|
+
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
|
152
|
+
|
153
|
+
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
146
154
|
"""
|
147
155
|
|
148
|
-
_client: Client = PrivateAttr(default=None)
|
156
|
+
_client: Optional[Client] = PrivateAttr(default=None)
|
149
157
|
"""
|
150
158
|
The client to use for making requests.
|
151
159
|
"""
|
152
160
|
|
153
|
-
_async_client: AsyncClient = PrivateAttr(default=None)
|
161
|
+
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
154
162
|
"""
|
155
163
|
The async client to use for making requests.
|
156
164
|
"""
|
@@ -258,14 +266,21 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
258
266
|
|
259
267
|
self._client = Client(host=self.base_url, **sync_client_kwargs)
|
260
268
|
self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
|
269
|
+
if self.validate_model_on_init:
|
270
|
+
validate_model(self._client, self.model)
|
261
271
|
return self
|
262
272
|
|
263
273
|
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
264
274
|
"""Embed search docs."""
|
265
|
-
|
275
|
+
if not self._client:
|
276
|
+
msg = (
|
277
|
+
"Ollama client is not initialized. "
|
278
|
+
"Please ensure Ollama is running and the model is loaded."
|
279
|
+
)
|
280
|
+
raise ValueError(msg)
|
281
|
+
return self._client.embed(
|
266
282
|
self.model, texts, options=self._default_params, keep_alive=self.keep_alive
|
267
283
|
)["embeddings"]
|
268
|
-
return embedded_docs
|
269
284
|
|
270
285
|
def embed_query(self, text: str) -> list[float]:
|
271
286
|
"""Embed query text."""
|
@@ -273,12 +288,17 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
273
288
|
|
274
289
|
async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
|
275
290
|
"""Embed search docs."""
|
276
|
-
|
291
|
+
if not self._async_client:
|
292
|
+
msg = (
|
293
|
+
"Ollama client is not initialized. "
|
294
|
+
"Please ensure Ollama is running and the model is loaded."
|
295
|
+
)
|
296
|
+
raise ValueError(msg)
|
297
|
+
return (
|
277
298
|
await self._async_client.embed(
|
278
299
|
self.model, texts, keep_alive=self.keep_alive
|
279
300
|
)
|
280
301
|
)["embeddings"]
|
281
|
-
return embedded_docs
|
282
302
|
|
283
303
|
async def aembed_query(self, text: str) -> list[float]:
|
284
304
|
"""Embed query text."""
|
langchain_ollama/llms.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
"""Ollama large language models."""
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from collections.abc import AsyncIterator, Iterator, Mapping
|
4
6
|
from typing import (
|
5
7
|
Any,
|
@@ -18,6 +20,8 @@ from ollama import AsyncClient, Client, Options
|
|
18
20
|
from pydantic import PrivateAttr, model_validator
|
19
21
|
from typing_extensions import Self
|
20
22
|
|
23
|
+
from ._utils import validate_model
|
24
|
+
|
21
25
|
|
22
26
|
class OllamaLLM(BaseLLM):
|
23
27
|
"""OllamaLLM large language models.
|
@@ -28,12 +32,29 @@ class OllamaLLM(BaseLLM):
|
|
28
32
|
from langchain_ollama import OllamaLLM
|
29
33
|
|
30
34
|
model = OllamaLLM(model="llama3")
|
31
|
-
model.invoke("Come up with 10 names for a song about parrots")
|
35
|
+
print(model.invoke("Come up with 10 names for a song about parrots"))
|
32
36
|
"""
|
33
37
|
|
34
38
|
model: str
|
35
39
|
"""Model name to use."""
|
36
40
|
|
41
|
+
reasoning: Optional[bool] = None
|
42
|
+
"""Controls the reasoning/thinking mode for
|
43
|
+
`supported models <https://ollama.com/search?c=thinking>`__.
|
44
|
+
|
45
|
+
- ``True``: Enables reasoning mode. The model's reasoning process will be
|
46
|
+
captured and returned separately in the ``additional_kwargs`` of the
|
47
|
+
response message, under ``reasoning_content``. The main response
|
48
|
+
content will not include the reasoning tags.
|
49
|
+
- ``False``: Disables reasoning mode. The model will not perform any reasoning,
|
50
|
+
and the response will not include any reasoning content.
|
51
|
+
- ``None`` (Default): The model will use its default reasoning behavior. If
|
52
|
+
the model performs reasoning, the ``<think>`` and ``</think>`` tags will
|
53
|
+
be present directly within the main response content."""
|
54
|
+
|
55
|
+
validate_model_on_init: bool = False
|
56
|
+
"""Whether to validate the model exists in ollama locally on initialization."""
|
57
|
+
|
37
58
|
mirostat: Optional[int] = None
|
38
59
|
"""Enable Mirostat sampling for controlling perplexity.
|
39
60
|
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
|
@@ -51,7 +72,7 @@ class OllamaLLM(BaseLLM):
|
|
51
72
|
|
52
73
|
num_ctx: Optional[int] = None
|
53
74
|
"""Sets the size of the context window used to generate the
|
54
|
-
next token. (Default: 2048)
|
75
|
+
next token. (Default: 2048)"""
|
55
76
|
|
56
77
|
num_gpu: Optional[int] = None
|
57
78
|
"""The number of GPUs to use. On macOS it defaults to 1 to
|
@@ -113,30 +134,31 @@ class OllamaLLM(BaseLLM):
|
|
113
134
|
"""Base url the model is hosted under."""
|
114
135
|
|
115
136
|
client_kwargs: Optional[dict] = {}
|
116
|
-
"""Additional kwargs to pass to the httpx clients.
|
137
|
+
"""Additional kwargs to pass to the httpx clients.
|
117
138
|
These arguments are passed to both synchronous and async clients.
|
118
139
|
Use sync_client_kwargs and async_client_kwargs to pass different arguments
|
119
140
|
to synchronous and asynchronous clients.
|
120
141
|
"""
|
121
142
|
|
122
143
|
async_client_kwargs: Optional[dict] = {}
|
123
|
-
"""Additional kwargs to merge with client_kwargs before
|
124
|
-
|
125
|
-
|
144
|
+
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX
|
145
|
+
AsyncClient.
|
146
|
+
|
147
|
+
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
126
148
|
"""
|
127
149
|
|
128
150
|
sync_client_kwargs: Optional[dict] = {}
|
129
|
-
"""Additional kwargs to merge with client_kwargs before
|
130
|
-
|
131
|
-
For a full list of the params, see
|
151
|
+
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
|
152
|
+
|
153
|
+
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
132
154
|
"""
|
133
155
|
|
134
|
-
_client: Client = PrivateAttr(default=None)
|
156
|
+
_client: Optional[Client] = PrivateAttr(default=None)
|
135
157
|
"""
|
136
158
|
The client to use for making requests.
|
137
159
|
"""
|
138
160
|
|
139
|
-
_async_client: AsyncClient = PrivateAttr(default=None)
|
161
|
+
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
140
162
|
"""
|
141
163
|
The async client to use for making requests.
|
142
164
|
"""
|
@@ -148,8 +170,9 @@ class OllamaLLM(BaseLLM):
|
|
148
170
|
**kwargs: Any,
|
149
171
|
) -> dict[str, Any]:
|
150
172
|
if self.stop is not None and stop is not None:
|
151
|
-
|
152
|
-
|
173
|
+
msg = "`stop` found in both the input and default params."
|
174
|
+
raise ValueError(msg)
|
175
|
+
if self.stop is not None:
|
153
176
|
stop = self.stop
|
154
177
|
|
155
178
|
options_dict = kwargs.pop(
|
@@ -173,18 +196,17 @@ class OllamaLLM(BaseLLM):
|
|
173
196
|
},
|
174
197
|
)
|
175
198
|
|
176
|
-
|
199
|
+
return {
|
177
200
|
"prompt": prompt,
|
178
201
|
"stream": kwargs.pop("stream", True),
|
179
202
|
"model": kwargs.pop("model", self.model),
|
203
|
+
"think": kwargs.pop("reasoning", self.reasoning),
|
180
204
|
"format": kwargs.pop("format", self.format),
|
181
205
|
"options": Options(**options_dict),
|
182
206
|
"keep_alive": kwargs.pop("keep_alive", self.keep_alive),
|
183
207
|
**kwargs,
|
184
208
|
}
|
185
209
|
|
186
|
-
return params
|
187
|
-
|
188
210
|
@property
|
189
211
|
def _llm_type(self) -> str:
|
190
212
|
"""Return type of LLM."""
|
@@ -214,6 +236,8 @@ class OllamaLLM(BaseLLM):
|
|
214
236
|
|
215
237
|
self._client = Client(host=self.base_url, **sync_client_kwargs)
|
216
238
|
self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
|
239
|
+
if self.validate_model_on_init:
|
240
|
+
validate_model(self._client, self.model)
|
217
241
|
return self
|
218
242
|
|
219
243
|
async def _acreate_generate_stream(
|
@@ -222,10 +246,11 @@ class OllamaLLM(BaseLLM):
|
|
222
246
|
stop: Optional[list[str]] = None,
|
223
247
|
**kwargs: Any,
|
224
248
|
) -> AsyncIterator[Union[Mapping[str, Any], str]]:
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
249
|
+
if self._async_client:
|
250
|
+
async for part in await self._async_client.generate(
|
251
|
+
**self._generate_params(prompt, stop=stop, **kwargs)
|
252
|
+
):
|
253
|
+
yield part
|
229
254
|
|
230
255
|
def _create_generate_stream(
|
231
256
|
self,
|
@@ -233,23 +258,27 @@ class OllamaLLM(BaseLLM):
|
|
233
258
|
stop: Optional[list[str]] = None,
|
234
259
|
**kwargs: Any,
|
235
260
|
) -> Iterator[Union[Mapping[str, Any], str]]:
|
236
|
-
|
237
|
-
|
238
|
-
|
261
|
+
if self._client:
|
262
|
+
yield from self._client.generate(
|
263
|
+
**self._generate_params(prompt, stop=stop, **kwargs)
|
264
|
+
)
|
239
265
|
|
240
266
|
async def _astream_with_aggregation(
|
241
267
|
self,
|
242
268
|
prompt: str,
|
243
269
|
stop: Optional[list[str]] = None,
|
244
270
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
245
|
-
verbose: bool = False,
|
271
|
+
verbose: bool = False, # noqa: FBT001, FBT002
|
246
272
|
**kwargs: Any,
|
247
273
|
) -> GenerationChunk:
|
248
274
|
final_chunk = None
|
275
|
+
thinking_content = ""
|
249
276
|
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
|
250
277
|
if not isinstance(stream_resp, str):
|
278
|
+
if stream_resp.get("thinking"):
|
279
|
+
thinking_content += stream_resp["thinking"]
|
251
280
|
chunk = GenerationChunk(
|
252
|
-
text=stream_resp
|
281
|
+
text=stream_resp.get("response", ""),
|
253
282
|
generation_info=(
|
254
283
|
dict(stream_resp) if stream_resp.get("done") is True else None
|
255
284
|
),
|
@@ -265,7 +294,14 @@ class OllamaLLM(BaseLLM):
|
|
265
294
|
verbose=verbose,
|
266
295
|
)
|
267
296
|
if final_chunk is None:
|
268
|
-
|
297
|
+
msg = "No data received from Ollama stream."
|
298
|
+
raise ValueError(msg)
|
299
|
+
|
300
|
+
if thinking_content:
|
301
|
+
if final_chunk.generation_info:
|
302
|
+
final_chunk.generation_info["thinking"] = thinking_content
|
303
|
+
else:
|
304
|
+
final_chunk.generation_info = {"thinking": thinking_content}
|
269
305
|
|
270
306
|
return final_chunk
|
271
307
|
|
@@ -274,14 +310,17 @@ class OllamaLLM(BaseLLM):
|
|
274
310
|
prompt: str,
|
275
311
|
stop: Optional[list[str]] = None,
|
276
312
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
277
|
-
verbose: bool = False,
|
313
|
+
verbose: bool = False, # noqa: FBT001, FBT002
|
278
314
|
**kwargs: Any,
|
279
315
|
) -> GenerationChunk:
|
280
316
|
final_chunk = None
|
317
|
+
thinking_content = ""
|
281
318
|
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
|
282
319
|
if not isinstance(stream_resp, str):
|
320
|
+
if stream_resp.get("thinking"):
|
321
|
+
thinking_content += stream_resp["thinking"]
|
283
322
|
chunk = GenerationChunk(
|
284
|
-
text=stream_resp
|
323
|
+
text=stream_resp.get("response", ""),
|
285
324
|
generation_info=(
|
286
325
|
dict(stream_resp) if stream_resp.get("done") is True else None
|
287
326
|
),
|
@@ -297,7 +336,14 @@ class OllamaLLM(BaseLLM):
|
|
297
336
|
verbose=verbose,
|
298
337
|
)
|
299
338
|
if final_chunk is None:
|
300
|
-
|
339
|
+
msg = "No data received from Ollama stream."
|
340
|
+
raise ValueError(msg)
|
341
|
+
|
342
|
+
if thinking_content:
|
343
|
+
if final_chunk.generation_info:
|
344
|
+
final_chunk.generation_info["thinking"] = thinking_content
|
345
|
+
else:
|
346
|
+
final_chunk.generation_info = {"thinking": thinking_content}
|
301
347
|
|
302
348
|
return final_chunk
|
303
349
|
|
@@ -346,13 +392,22 @@ class OllamaLLM(BaseLLM):
|
|
346
392
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
347
393
|
**kwargs: Any,
|
348
394
|
) -> Iterator[GenerationChunk]:
|
395
|
+
reasoning = kwargs.get("reasoning", self.reasoning)
|
349
396
|
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
|
350
397
|
if not isinstance(stream_resp, str):
|
398
|
+
additional_kwargs = {}
|
399
|
+
if reasoning and (thinking_content := stream_resp.get("thinking")):
|
400
|
+
additional_kwargs["reasoning_content"] = thinking_content
|
401
|
+
|
351
402
|
chunk = GenerationChunk(
|
352
403
|
text=(stream_resp.get("response", "")),
|
353
|
-
generation_info=
|
354
|
-
|
355
|
-
|
404
|
+
generation_info={
|
405
|
+
"finish_reason": self.stop,
|
406
|
+
**additional_kwargs,
|
407
|
+
**(
|
408
|
+
dict(stream_resp) if stream_resp.get("done") is True else {}
|
409
|
+
),
|
410
|
+
},
|
356
411
|
)
|
357
412
|
if run_manager:
|
358
413
|
run_manager.on_llm_new_token(
|
@@ -368,13 +423,22 @@ class OllamaLLM(BaseLLM):
|
|
368
423
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
369
424
|
**kwargs: Any,
|
370
425
|
) -> AsyncIterator[GenerationChunk]:
|
426
|
+
reasoning = kwargs.get("reasoning", self.reasoning)
|
371
427
|
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
|
372
428
|
if not isinstance(stream_resp, str):
|
429
|
+
additional_kwargs = {}
|
430
|
+
if reasoning and (thinking_content := stream_resp.get("thinking")):
|
431
|
+
additional_kwargs["reasoning_content"] = thinking_content
|
432
|
+
|
373
433
|
chunk = GenerationChunk(
|
374
434
|
text=(stream_resp.get("response", "")),
|
375
|
-
generation_info=
|
376
|
-
|
377
|
-
|
435
|
+
generation_info={
|
436
|
+
"finish_reason": self.stop,
|
437
|
+
**additional_kwargs,
|
438
|
+
**(
|
439
|
+
dict(stream_resp) if stream_resp.get("done") is True else {}
|
440
|
+
),
|
441
|
+
},
|
378
442
|
)
|
379
443
|
if run_manager:
|
380
444
|
await run_manager.on_llm_new_token(
|
@@ -1,14 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langchain-ollama
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.4
|
4
4
|
Summary: An integration package connecting Ollama and LangChain
|
5
5
|
License: MIT
|
6
6
|
Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
|
7
7
|
Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
|
8
8
|
Project-URL: repository, https://github.com/langchain-ai/langchain
|
9
9
|
Requires-Python: >=3.9
|
10
|
-
Requires-Dist: ollama<1.0.0,>=0.
|
11
|
-
Requires-Dist: langchain-core<1.0.0,>=0.3.
|
10
|
+
Requires-Dist: ollama<1.0.0,>=0.5.1
|
11
|
+
Requires-Dist: langchain-core<1.0.0,>=0.3.68
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
|
14
14
|
# langchain-ollama
|
@@ -21,37 +21,48 @@ This package contains the LangChain integration with Ollama
|
|
21
21
|
pip install -U langchain-ollama
|
22
22
|
```
|
23
23
|
|
24
|
-
|
25
|
-
You can download it [here](https://ollama.com/download).
|
24
|
+
For the package to work, you will need to install and run the Ollama server locally ([download](https://ollama.com/download)).
|
26
25
|
|
27
|
-
|
26
|
+
To run integration tests (`make integration_tests`), you will need the following models installed in your Ollama server:
|
27
|
+
|
28
|
+
- `llama3.1`
|
29
|
+
- `deepseek-r1:1.5b`
|
30
|
+
|
31
|
+
Install these models by running:
|
32
|
+
|
33
|
+
```bash
|
34
|
+
ollama pull <name-of-model>
|
35
|
+
```
|
36
|
+
|
37
|
+
## [Chat Models](https://python.langchain.com/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html#chatollama)
|
28
38
|
|
29
39
|
`ChatOllama` class exposes chat models from Ollama.
|
30
40
|
|
31
41
|
```python
|
32
42
|
from langchain_ollama import ChatOllama
|
33
43
|
|
34
|
-
llm = ChatOllama(model="llama3
|
44
|
+
llm = ChatOllama(model="llama3.1")
|
35
45
|
llm.invoke("Sing a ballad of LangChain.")
|
36
46
|
```
|
37
47
|
|
38
|
-
## Embeddings
|
48
|
+
## [Embeddings](https://python.langchain.com/api_reference/ollama/embeddings/langchain_ollama.embeddings.OllamaEmbeddings.html#ollamaembeddings)
|
39
49
|
|
40
50
|
`OllamaEmbeddings` class exposes embeddings from Ollama.
|
41
51
|
|
42
52
|
```python
|
43
53
|
from langchain_ollama import OllamaEmbeddings
|
44
54
|
|
45
|
-
embeddings = OllamaEmbeddings(model="llama3")
|
55
|
+
embeddings = OllamaEmbeddings(model="llama3.1")
|
46
56
|
embeddings.embed_query("What is the meaning of life?")
|
47
57
|
```
|
48
58
|
|
49
|
-
## LLMs
|
50
|
-
|
59
|
+
## [LLMs](https://python.langchain.com/api_reference/ollama/llms/langchain_ollama.llms.OllamaLLM.html#ollamallm)
|
60
|
+
|
61
|
+
`OllamaLLM` class exposes traditional LLMs from Ollama.
|
51
62
|
|
52
63
|
```python
|
53
64
|
from langchain_ollama import OllamaLLM
|
54
65
|
|
55
|
-
llm = OllamaLLM(model="llama3")
|
66
|
+
llm = OllamaLLM(model="llama3.1")
|
56
67
|
llm.invoke("The meaning of life is")
|
57
68
|
```
|
@@ -0,0 +1,11 @@
|
|
1
|
+
langchain_ollama-0.3.4.dist-info/METADATA,sha256=wM54qEosykpO89kExse0V4Y3K3ncspLP_mFNKsBxTNY,2072
|
2
|
+
langchain_ollama-0.3.4.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
3
|
+
langchain_ollama-0.3.4.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
4
|
+
langchain_ollama-0.3.4.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
|
5
|
+
langchain_ollama/__init__.py,sha256=TI1gI0Wpg7mRXehGpxrJG2flF_t4Ev-aIJlLKV-CgL0,633
|
6
|
+
langchain_ollama/_utils.py,sha256=dmFO4tSvDTeMALc89QnTBLNWPMZL0eNAq1EDwuMjRA8,1416
|
7
|
+
langchain_ollama/chat_models.py,sha256=olz3KJeLG1vk47Xl38nN9bP4bcol5cBQnPnu5MyP8k8,55539
|
8
|
+
langchain_ollama/embeddings.py,sha256=VprOFiBRuUPGEygoIfxvAZStUsqRj65ZNMpkvCAo_9Y,10239
|
9
|
+
langchain_ollama/llms.py,sha256=PSJ-VQMocp1nm-pgtnKnozidt66RKJiEnhdzftoLNNc,16778
|
10
|
+
langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
langchain_ollama-0.3.4.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
langchain_ollama-0.3.3.dist-info/METADATA,sha256=K2QhMD3eEMIMegVdXf6ZyQ7C5fbl2wQ1CvvqtUOmyug,1462
|
2
|
-
langchain_ollama-0.3.3.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
|
3
|
-
langchain_ollama-0.3.3.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
4
|
-
langchain_ollama-0.3.3.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
|
5
|
-
langchain_ollama/__init__.py,sha256=1f8Cyf1_bS0CT16U8-Os1P1Oa3erIDtIBTH4KVmBLvY,633
|
6
|
-
langchain_ollama/chat_models.py,sha256=Z2wzR5R568aNyH1LKN84kUdNZFOvvgY-csE626_sBVc,51723
|
7
|
-
langchain_ollama/embeddings.py,sha256=udL26XHdUMybQogY9Gj3vlJXxxkVAVZ-9He2U8wlJ3k,9547
|
8
|
-
langchain_ollama/llms.py,sha256=Rin6HVZvrH1epRsjhojSmOBFWAaU0cfOU1gV6I0bqJE,13933
|
9
|
-
langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
langchain_ollama-0.3.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|