langchain-ollama 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@ del metadata # optional, avoids polluting the results of dir(__package__)
18
18
 
19
19
  __all__ = [
20
20
  "ChatOllama",
21
- "OllamaLLM",
22
21
  "OllamaEmbeddings",
22
+ "OllamaLLM",
23
23
  "__version__",
24
24
  ]
@@ -0,0 +1,39 @@
1
+ """Utility functions for validating Ollama models."""
2
+
3
+ from httpx import ConnectError
4
+ from ollama import Client, ResponseError
5
+
6
+
7
+ def validate_model(client: Client, model_name: str) -> None:
8
+ """Validate that a model exists in the Ollama instance.
9
+
10
+ Args:
11
+ client: The Ollama client.
12
+ model_name: The name of the model to validate.
13
+
14
+ Raises:
15
+ ValueError: If the model is not found or if there's a connection issue.
16
+ """
17
+ try:
18
+ response = client.list()
19
+
20
+ model_names: list[str] = [model["model"] for model in response["models"]]
21
+
22
+ if not any(
23
+ model_name == m or m.startswith(f"{model_name}:") for m in model_names
24
+ ):
25
+ msg = (
26
+ f"Model `{model_name}` not found in Ollama. Please pull the "
27
+ f"model (using `ollama pull {model_name}`) or specify a valid "
28
+ f"model name. Available local models: {', '.join(model_names)}"
29
+ )
30
+ raise ValueError(msg)
31
+ except ConnectError as e:
32
+ msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download" # noqa: E501
33
+ raise ValueError(msg) from e
34
+ except ResponseError as e:
35
+ msg = (
36
+ "Received an error from the Ollama API. "
37
+ "Please check your Ollama server logs."
38
+ )
39
+ raise ValueError(msg) from e
@@ -1,12 +1,13 @@
1
1
  """Ollama chat models."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import json
4
6
  from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
5
7
  from operator import itemgetter
6
8
  from typing import (
7
9
  Any,
8
10
  Callable,
9
- Final,
10
11
  Literal,
11
12
  Optional,
12
13
  Union,
@@ -25,7 +26,7 @@ from langchain_core.messages import (
25
26
  AIMessage,
26
27
  AIMessageChunk,
27
28
  BaseMessage,
28
- BaseMessageChunk,
29
+ ChatMessage,
29
30
  HumanMessage,
30
31
  SystemMessage,
31
32
  ToolCall,
@@ -54,8 +55,7 @@ from pydantic.json_schema import JsonSchemaValue
54
55
  from pydantic.v1 import BaseModel as BaseModelV1
55
56
  from typing_extensions import Self, is_typeddict
56
57
 
57
- DEFAULT_THINK_TOKEN_START: Final[str] = "<think>"
58
- DEFAULT_THINK_TOKEN_END: Final[str] = "</think>"
58
+ from ._utils import validate_model
59
59
 
60
60
 
61
61
  def _get_usage_metadata_from_generation_info(
@@ -76,7 +76,9 @@ def _get_usage_metadata_from_generation_info(
76
76
 
77
77
 
78
78
  def _parse_json_string(
79
- json_string: str, raw_tool_call: dict[str, Any], skip: bool
79
+ json_string: str,
80
+ raw_tool_call: dict[str, Any],
81
+ skip: bool, # noqa: FBT001
80
82
  ) -> Any:
81
83
  """Attempt to parse a JSON string for tool calling.
82
84
 
@@ -150,26 +152,30 @@ def _get_tool_calls_from_response(
150
152
  ) -> list[ToolCall]:
151
153
  """Get tool calls from ollama response."""
152
154
  tool_calls = []
153
- if "message" in response:
154
- if raw_tool_calls := response["message"].get("tool_calls"):
155
- for tc in raw_tool_calls:
156
- tool_calls.append(
157
- tool_call(
158
- id=str(uuid4()),
159
- name=tc["function"]["name"],
160
- args=_parse_arguments_from_tool_call(tc) or {},
161
- )
155
+ if "message" in response and (
156
+ raw_tool_calls := response["message"].get("tool_calls")
157
+ ):
158
+ tool_calls.extend(
159
+ [
160
+ tool_call(
161
+ id=str(uuid4()),
162
+ name=tc["function"]["name"],
163
+ args=_parse_arguments_from_tool_call(tc) or {},
162
164
  )
165
+ for tc in raw_tool_calls
166
+ ]
167
+ )
163
168
  return tool_calls
164
169
 
165
170
 
166
- def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
171
+ def _lc_tool_call_to_openai_tool_call(tool_call_: ToolCall) -> dict:
172
+ """Convert a LangChain tool call to an OpenAI tool call format."""
167
173
  return {
168
174
  "type": "function",
169
- "id": tool_call["id"],
175
+ "id": tool_call_["id"],
170
176
  "function": {
171
- "name": tool_call["name"],
172
- "arguments": tool_call["args"],
177
+ "name": tool_call_["name"],
178
+ "arguments": tool_call_["args"],
173
179
  },
174
180
  }
175
181
 
@@ -179,14 +185,12 @@ def _get_image_from_data_content_block(block: dict) -> str:
179
185
  if block["type"] == "image":
180
186
  if block["source_type"] == "base64":
181
187
  return block["data"]
182
- else:
183
- error_message = "Image data only supported through in-line base64 format."
184
- raise ValueError(error_message)
185
-
186
- else:
187
- error_message = f"Blocks of type {block['type']} not supported."
188
+ error_message = "Image data only supported through in-line base64 format."
188
189
  raise ValueError(error_message)
189
190
 
191
+ error_message = f"Blocks of type {block['type']} not supported."
192
+ raise ValueError(error_message)
193
+
190
194
 
191
195
  def _is_pydantic_class(obj: Any) -> bool:
192
196
  return isinstance(obj, type) and is_basemodel_subclass(obj)
@@ -208,8 +212,22 @@ class ChatOllama(BaseChatModel):
208
212
  Key init args — completion params:
209
213
  model: str
210
214
  Name of Ollama model to use.
215
+ reasoning: Optional[bool]
216
+ Controls the reasoning/thinking mode for
217
+ `supported models <https://ollama.com/search?c=thinking>`__.
218
+
219
+ - ``True``: Enables reasoning mode. The model's reasoning process will be
220
+ captured and returned separately in the ``additional_kwargs`` of the
221
+ response message, under ``reasoning_content``. The main response
222
+ content will not include the reasoning tags.
223
+ - ``False``: Disables reasoning mode. The model will not perform any reasoning,
224
+ and the response will not include any reasoning content.
225
+ - ``None`` (Default): The model will use its default reasoning behavior. Note
226
+ however, if the model's default behavior *is* to perform reasoning, think tags
227
+ (``<think>`` and ``</think>``) will be present within the main response content
228
+ unless you set ``reasoning`` to ``True``.
211
229
  temperature: float
212
- Sampling temperature. Ranges from 0.0 to 1.0.
230
+ Sampling temperature. Ranges from ``0.0`` to ``1.0``.
213
231
  num_predict: Optional[int]
214
232
  Max number of tokens to generate.
215
233
 
@@ -325,7 +343,6 @@ class ChatOllama(BaseChatModel):
325
343
  '{"location": "Pune, India", "time_of_day": "morning"}'
326
344
 
327
345
  Tool Calling:
328
-
329
346
  .. code-block:: python
330
347
 
331
348
  from langchain_ollama import ChatOllama
@@ -344,17 +361,70 @@ class ChatOllama(BaseChatModel):
344
361
  'args': {'a': 45, 'b': 67},
345
362
  'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
346
363
  'type': 'tool_call'}]
347
- """ # noqa: E501
364
+
365
+ Thinking / Reasoning:
366
+ You can enable reasoning mode for models that support it by setting
367
+ the ``reasoning`` parameter to ``True`` in either the constructor or
368
+ the ``invoke``/``stream`` methods. This will enable the model to think
369
+ through the problem and return the reasoning process separately in the
370
+ ``additional_kwargs`` of the response message, under ``reasoning_content``.
371
+
372
+ If ``reasoning`` is set to ``None``, the model will use its default reasoning
373
+ behavior, and any reasoning content will *not* be captured under the
374
+ ``reasoning_content`` key, but will be present within the main response content
375
+ as think tags (``<think>`` and ``</think>``).
376
+
377
+ .. note::
378
+ This feature is only available for `models that support reasoning <https://ollama.com/search?c=thinking>`__.
379
+
380
+ .. code-block:: python
381
+
382
+ from langchain_ollama import ChatOllama
383
+
384
+ llm = ChatOllama(
385
+ model = "deepseek-r1:8b",
386
+ reasoning= True,
387
+ )
388
+
389
+ user_message = HumanMessage(content="how many r in the word strawberry?")
390
+ messages: List[Any] = [user_message]
391
+ llm.invoke(messages)
392
+
393
+ # or, on an invocation basis:
394
+
395
+ llm.invoke(messages, reasoning=True)
396
+ # or llm.stream(messages, reasoning=True)
397
+
398
+ # If not provided, the invocation will default to the ChatOllama reasoning
399
+ # param provided (None by default).
400
+
401
+ .. code-block:: python
402
+
403
+ AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
404
+
405
+
406
+ """ # noqa: E501, pylint: disable=line-too-long
348
407
 
349
408
  model: str
350
409
  """Model name to use."""
351
410
 
352
- extract_reasoning: Optional[Union[bool, tuple[str, str]]] = False
353
- """Whether to extract the reasoning tokens in think blocks.
354
- Extracts `chunk.content` to `chunk.additional_kwargs.reasoning_content`.
355
- If a tuple is supplied, they are assumed to be the (start, end) tokens.
356
- If `extract_reasoning=True`, the tokens will default to (<think>, </think>).
357
- """
411
+ reasoning: Optional[bool] = None
412
+ """Controls the reasoning/thinking mode for
413
+ `supported models <https://ollama.com/search?c=thinking>`__.
414
+
415
+ - ``True``: Enables reasoning mode. The model's reasoning process will be
416
+ captured and returned separately in the ``additional_kwargs`` of the
417
+ response message, under ``reasoning_content``. The main response
418
+ content will not include the reasoning tags.
419
+ - ``False``: Disables reasoning mode. The model will not perform any reasoning,
420
+ and the response will not include any reasoning content.
421
+ - ``None`` (Default): The model will use its default reasoning behavior. Note
422
+ however, if the model's default behavior *is* to perform reasoning, think tags
423
+ ()``<think>`` and ``</think>``) will be present within the main response content
424
+ unless you set ``reasoning`` to ``True``."""
425
+
426
+ validate_model_on_init: bool = False
427
+ """Whether to validate the model exists in Ollama locally on initialization."""
358
428
 
359
429
  mirostat: Optional[int] = None
360
430
  """Enable Mirostat sampling for controlling perplexity.
@@ -435,16 +505,30 @@ class ChatOllama(BaseChatModel):
435
505
  """Base url the model is hosted under."""
436
506
 
437
507
  client_kwargs: Optional[dict] = {}
438
- """Additional kwargs to pass to the httpx Client.
439
- For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
508
+ """Additional kwargs to pass to the httpx clients.
509
+ These arguments are passed to both synchronous and async clients.
510
+ Use sync_client_kwargs and async_client_kwargs to pass different arguments
511
+ to synchronous and asynchronous clients.
512
+ """
513
+
514
+ async_client_kwargs: Optional[dict] = {}
515
+ """Additional kwargs to merge with client_kwargs before
516
+ passing to the httpx AsyncClient.
517
+ `Full list of params. <https://www.python-httpx.org/api/#asyncclient>`__
440
518
  """
441
519
 
442
- _client: Client = PrivateAttr(default=None) # type: ignore
520
+ sync_client_kwargs: Optional[dict] = {}
521
+ """Additional kwargs to merge with client_kwargs before
522
+ passing to the httpx Client.
523
+ `Full list of params. <https://www.python-httpx.org/api/#client>`__
524
+ """
525
+
526
+ _client: Client = PrivateAttr()
443
527
  """
444
528
  The client to use for making requests.
445
529
  """
446
530
 
447
- _async_client: AsyncClient = PrivateAttr(default=None) # type: ignore
531
+ _async_client: AsyncClient = PrivateAttr()
448
532
  """
449
533
  The async client to use for making requests.
450
534
  """
@@ -458,8 +542,9 @@ class ChatOllama(BaseChatModel):
458
542
  ollama_messages = self._convert_messages_to_ollama_messages(messages)
459
543
 
460
544
  if self.stop is not None and stop is not None:
461
- raise ValueError("`stop` found in both the input and default params.")
462
- elif self.stop is not None:
545
+ msg = "`stop` found in both the input and default params."
546
+ raise ValueError(msg)
547
+ if self.stop is not None:
463
548
  stop = self.stop
464
549
 
465
550
  options_dict = kwargs.pop(
@@ -487,6 +572,7 @@ class ChatOllama(BaseChatModel):
487
572
  "messages": ollama_messages,
488
573
  "stream": kwargs.pop("stream", True),
489
574
  "model": kwargs.pop("model", self.model),
575
+ "think": kwargs.pop("reasoning", self.reasoning),
490
576
  "format": kwargs.pop("format", self.format),
491
577
  "options": Options(**options_dict),
492
578
  "keep_alive": kwargs.pop("keep_alive", self.keep_alive),
@@ -502,8 +588,19 @@ class ChatOllama(BaseChatModel):
502
588
  def _set_clients(self) -> Self:
503
589
  """Set clients to use for ollama."""
504
590
  client_kwargs = self.client_kwargs or {}
505
- self._client = Client(host=self.base_url, **client_kwargs)
506
- self._async_client = AsyncClient(host=self.base_url, **client_kwargs)
591
+
592
+ sync_client_kwargs = client_kwargs
593
+ if self.sync_client_kwargs:
594
+ sync_client_kwargs = {**sync_client_kwargs, **self.sync_client_kwargs}
595
+
596
+ async_client_kwargs = client_kwargs
597
+ if self.async_client_kwargs:
598
+ async_client_kwargs = {**async_client_kwargs, **self.async_client_kwargs}
599
+
600
+ self._client = Client(host=self.base_url, **sync_client_kwargs)
601
+ self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
602
+ if self.validate_model_on_init:
603
+ validate_model(self._client, self.model)
507
604
  return self
508
605
 
509
606
  def _convert_messages_to_ollama_messages(
@@ -511,7 +608,7 @@ class ChatOllama(BaseChatModel):
511
608
  ) -> Sequence[Message]:
512
609
  ollama_messages: list = []
513
610
  for message in messages:
514
- role: Literal["user", "assistant", "system", "tool"]
611
+ role: str
515
612
  tool_call_id: Optional[str] = None
516
613
  tool_calls: Optional[list[dict[str, Any]]] = None
517
614
  if isinstance(message, HumanMessage):
@@ -528,11 +625,14 @@ class ChatOllama(BaseChatModel):
528
625
  )
529
626
  elif isinstance(message, SystemMessage):
530
627
  role = "system"
628
+ elif isinstance(message, ChatMessage):
629
+ role = message.role
531
630
  elif isinstance(message, ToolMessage):
532
631
  role = "tool"
533
632
  tool_call_id = message.tool_call_id
534
633
  else:
535
- raise ValueError("Received unsupported message type for Ollama.")
634
+ msg = "Received unsupported message type for Ollama."
635
+ raise ValueError(msg)
536
636
 
537
637
  content = ""
538
638
  images = []
@@ -556,10 +656,11 @@ class ChatOllama(BaseChatModel):
556
656
  ):
557
657
  image_url = temp_image_url["url"]
558
658
  else:
559
- raise ValueError(
659
+ msg = (
560
660
  "Only string image_url or dict with string 'url' "
561
661
  "inside content parts are supported."
562
662
  )
663
+ raise ValueError(msg)
563
664
 
564
665
  image_url_components = image_url.split(",")
565
666
  # Support data:image/jpeg;base64,<image> format
@@ -572,47 +673,27 @@ class ChatOllama(BaseChatModel):
572
673
  image = _get_image_from_data_content_block(content_part)
573
674
  images.append(image)
574
675
  else:
575
- raise ValueError(
676
+ msg = (
576
677
  "Unsupported message content type. "
577
678
  "Must either have type 'text' or type 'image_url' "
578
679
  "with a string 'image_url' field."
579
680
  )
580
- # Should convert to ollama.Message once role includes tool, and tool_call_id is in Message # noqa: E501
581
- msg: dict = {
681
+ raise ValueError(msg)
682
+ # Should convert to ollama.Message once role includes tool,
683
+ # and tool_call_id is in Message
684
+ msg_: dict = {
582
685
  "role": role,
583
686
  "content": content,
584
687
  "images": images,
585
688
  }
586
689
  if tool_calls:
587
- msg["tool_calls"] = tool_calls # type: ignore
690
+ msg_["tool_calls"] = tool_calls
588
691
  if tool_call_id:
589
- msg["tool_call_id"] = tool_call_id
590
- ollama_messages.append(msg)
692
+ msg_["tool_call_id"] = tool_call_id
693
+ ollama_messages.append(msg_)
591
694
 
592
695
  return ollama_messages
593
696
 
594
- def _extract_reasoning(
595
- self, message_chunk: BaseMessageChunk, is_thinking: bool
596
- ) -> tuple[BaseMessageChunk, bool]:
597
- """Mutate a message chunk to extract reasoning content."""
598
- if not self.extract_reasoning:
599
- return message_chunk, is_thinking
600
- elif self.extract_reasoning is True:
601
- start_token = DEFAULT_THINK_TOKEN_START
602
- end_token = DEFAULT_THINK_TOKEN_END
603
- else:
604
- start_token, end_token = cast(tuple, self.extract_reasoning)
605
- if start_token in message_chunk.content:
606
- is_thinking = True
607
- content = message_chunk.content
608
- if is_thinking:
609
- message_chunk.additional_kwargs["reasoning_content"] = content
610
- message_chunk.content = ""
611
- if end_token in content:
612
- is_thinking = False
613
-
614
- return message_chunk, is_thinking
615
-
616
697
  async def _acreate_chat_stream(
617
698
  self,
618
699
  messages: list[BaseMessage],
@@ -636,16 +717,18 @@ class ChatOllama(BaseChatModel):
636
717
  chat_params = self._chat_params(messages, stop, **kwargs)
637
718
 
638
719
  if chat_params["stream"]:
639
- yield from self._client.chat(**chat_params)
720
+ if self._client:
721
+ yield from self._client.chat(**chat_params)
640
722
  else:
641
- yield self._client.chat(**chat_params)
723
+ if self._client:
724
+ yield self._client.chat(**chat_params)
642
725
 
643
726
  def _chat_stream_with_aggregation(
644
727
  self,
645
728
  messages: list[BaseMessage],
646
729
  stop: Optional[list[str]] = None,
647
730
  run_manager: Optional[CallbackManagerForLLMRun] = None,
648
- verbose: bool = False,
731
+ verbose: bool = False, # noqa: FBT001, FBT002
649
732
  **kwargs: Any,
650
733
  ) -> ChatGenerationChunk:
651
734
  final_chunk = None
@@ -661,7 +744,8 @@ class ChatOllama(BaseChatModel):
661
744
  verbose=verbose,
662
745
  )
663
746
  if final_chunk is None:
664
- raise ValueError("No data received from Ollama stream.")
747
+ msg = "No data received from Ollama stream."
748
+ raise ValueError(msg)
665
749
 
666
750
  return final_chunk
667
751
 
@@ -670,7 +754,7 @@ class ChatOllama(BaseChatModel):
670
754
  messages: list[BaseMessage],
671
755
  stop: Optional[list[str]] = None,
672
756
  run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
673
- verbose: bool = False,
757
+ verbose: bool = False, # noqa: FBT001, FBT002
674
758
  **kwargs: Any,
675
759
  ) -> ChatGenerationChunk:
676
760
  final_chunk = None
@@ -686,7 +770,8 @@ class ChatOllama(BaseChatModel):
686
770
  verbose=verbose,
687
771
  )
688
772
  if final_chunk is None:
689
- raise ValueError("No data received from Ollama stream.")
773
+ msg = "No data received from Ollama stream."
774
+ raise ValueError(msg)
690
775
 
691
776
  return final_chunk
692
777
 
@@ -733,22 +818,35 @@ class ChatOllama(BaseChatModel):
733
818
  stop: Optional[list[str]] = None,
734
819
  **kwargs: Any,
735
820
  ) -> Iterator[ChatGenerationChunk]:
736
- is_thinking = False
821
+ reasoning = kwargs.get("reasoning", self.reasoning)
737
822
  for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
738
823
  if not isinstance(stream_resp, str):
739
824
  if stream_resp.get("done") is True:
740
825
  generation_info = dict(stream_resp)
826
+ if "model" in generation_info:
827
+ generation_info["model_name"] = generation_info["model"]
741
828
  _ = generation_info.pop("message", None)
742
829
  else:
743
830
  generation_info = None
831
+
832
+ content = (
833
+ stream_resp["message"]["content"]
834
+ if "message" in stream_resp and "content" in stream_resp["message"]
835
+ else ""
836
+ )
837
+
838
+ additional_kwargs = {}
839
+ if (
840
+ reasoning
841
+ and "message" in stream_resp
842
+ and (thinking_content := stream_resp["message"].get("thinking"))
843
+ ):
844
+ additional_kwargs["reasoning_content"] = thinking_content
845
+
744
846
  chunk = ChatGenerationChunk(
745
847
  message=AIMessageChunk(
746
- content=(
747
- stream_resp["message"]["content"]
748
- if "message" in stream_resp
749
- and "content" in stream_resp["message"]
750
- else ""
751
- ),
848
+ content=content,
849
+ additional_kwargs=additional_kwargs,
752
850
  usage_metadata=_get_usage_metadata_from_generation_info(
753
851
  stream_resp
754
852
  ),
@@ -756,15 +854,7 @@ class ChatOllama(BaseChatModel):
756
854
  ),
757
855
  generation_info=generation_info,
758
856
  )
759
- if chunk.generation_info and (
760
- model := chunk.generation_info.get("model")
761
- ):
762
- chunk.generation_info["model_name"] = model # backwards compat
763
- if self.extract_reasoning:
764
- message, is_thinking = self._extract_reasoning(
765
- chunk.message, is_thinking
766
- )
767
- chunk.message = message
857
+
768
858
  yield chunk
769
859
 
770
860
  def _stream(
@@ -788,22 +878,35 @@ class ChatOllama(BaseChatModel):
788
878
  stop: Optional[list[str]] = None,
789
879
  **kwargs: Any,
790
880
  ) -> AsyncIterator[ChatGenerationChunk]:
791
- is_thinking = False
881
+ reasoning = kwargs.get("reasoning", self.reasoning)
792
882
  async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
793
883
  if not isinstance(stream_resp, str):
794
884
  if stream_resp.get("done") is True:
795
885
  generation_info = dict(stream_resp)
886
+ if "model" in generation_info:
887
+ generation_info["model_name"] = generation_info["model"]
796
888
  _ = generation_info.pop("message", None)
797
889
  else:
798
890
  generation_info = None
891
+
892
+ content = (
893
+ stream_resp["message"]["content"]
894
+ if "message" in stream_resp and "content" in stream_resp["message"]
895
+ else ""
896
+ )
897
+
898
+ additional_kwargs = {}
899
+ if (
900
+ reasoning
901
+ and "message" in stream_resp
902
+ and (thinking_content := stream_resp["message"].get("thinking"))
903
+ ):
904
+ additional_kwargs["reasoning_content"] = thinking_content
905
+
799
906
  chunk = ChatGenerationChunk(
800
907
  message=AIMessageChunk(
801
- content=(
802
- stream_resp["message"]["content"]
803
- if "message" in stream_resp
804
- and "content" in stream_resp["message"]
805
- else ""
806
- ),
908
+ content=content,
909
+ additional_kwargs=additional_kwargs,
807
910
  usage_metadata=_get_usage_metadata_from_generation_info(
808
911
  stream_resp
809
912
  ),
@@ -811,15 +914,7 @@ class ChatOllama(BaseChatModel):
811
914
  ),
812
915
  generation_info=generation_info,
813
916
  )
814
- if chunk.generation_info and (
815
- model := chunk.generation_info.get("model")
816
- ):
817
- chunk.generation_info["model_name"] = model # backwards compat
818
- if self.extract_reasoning:
819
- message, is_thinking = self._extract_reasoning(
820
- chunk.message, is_thinking
821
- )
822
- chunk.message = message
917
+
823
918
  yield chunk
824
919
 
825
920
  async def _astream(
@@ -868,7 +963,7 @@ class ChatOllama(BaseChatModel):
868
963
  self,
869
964
  tools: Sequence[Union[dict[str, Any], type, Callable, BaseTool]],
870
965
  *,
871
- tool_choice: Optional[Union[dict, str, Literal["auto", "any"], bool]] = None,
966
+ tool_choice: Optional[Union[dict, str, Literal["auto", "any"], bool]] = None, # noqa: PYI051
872
967
  **kwargs: Any,
873
968
  ) -> Runnable[LanguageModelInput, BaseMessage]:
874
969
  """Bind tool-like objects to this chat model.
@@ -883,7 +978,7 @@ class ChatOllama(BaseChatModel):
883
978
  is currently ignored as it is not supported by Ollama.**
884
979
  kwargs: Any additional parameters are passed directly to
885
980
  ``self.bind(**kwargs)``.
886
- """ # noqa: E501
981
+ """
887
982
  formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
888
983
  return super().bind(tools=formatted_tools, **kwargs)
889
984
 
@@ -916,7 +1011,7 @@ class ChatOllama(BaseChatModel):
916
1011
  method: The method for steering model generation, one of:
917
1012
 
918
1013
  - "json_schema":
919
- Uses Ollama's structured output API: https://ollama.com/blog/structured-outputs
1014
+ Uses Ollama's `structured output API <https://ollama.com/blog/structured-outputs>`__
920
1015
  - "function_calling":
921
1016
  Uses Ollama's tool-calling API
922
1017
  - "json_mode":
@@ -1140,14 +1235,16 @@ class ChatOllama(BaseChatModel):
1140
1235
  """ # noqa: E501, D301
1141
1236
  _ = kwargs.pop("strict", None)
1142
1237
  if kwargs:
1143
- raise ValueError(f"Received unsupported arguments {kwargs}")
1238
+ msg = f"Received unsupported arguments {kwargs}"
1239
+ raise ValueError(msg)
1144
1240
  is_pydantic_schema = _is_pydantic_class(schema)
1145
1241
  if method == "function_calling":
1146
1242
  if schema is None:
1147
- raise ValueError(
1243
+ msg = (
1148
1244
  "schema must be specified when method is not 'json_mode'. "
1149
1245
  "Received None."
1150
1246
  )
1247
+ raise ValueError(msg)
1151
1248
  formatted_tool = convert_to_openai_tool(schema)
1152
1249
  tool_name = formatted_tool["function"]["name"]
1153
1250
  llm = self.bind_tools(
@@ -1182,10 +1279,11 @@ class ChatOllama(BaseChatModel):
1182
1279
  )
1183
1280
  elif method == "json_schema":
1184
1281
  if schema is None:
1185
- raise ValueError(
1282
+ msg = (
1186
1283
  "schema must be specified when method is not 'json_mode'. "
1187
1284
  "Received None."
1188
1285
  )
1286
+ raise ValueError(msg)
1189
1287
  if is_pydantic_schema:
1190
1288
  schema = cast(TypeBaseModel, schema)
1191
1289
  if issubclass(schema, BaseModelV1):
@@ -1199,7 +1297,7 @@ class ChatOllama(BaseChatModel):
1199
1297
  "schema": schema,
1200
1298
  },
1201
1299
  )
1202
- output_parser = PydanticOutputParser(pydantic_object=schema)
1300
+ output_parser = PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type]
1203
1301
  else:
1204
1302
  if is_typeddict(schema):
1205
1303
  response_format = convert_to_json_schema(schema)
@@ -1219,10 +1317,11 @@ class ChatOllama(BaseChatModel):
1219
1317
  )
1220
1318
  output_parser = JsonOutputParser()
1221
1319
  else:
1222
- raise ValueError(
1320
+ msg = (
1223
1321
  f"Unrecognized method argument. Expected one of 'function_calling', "
1224
1322
  f"'json_schema', or 'json_mode'. Received: '{method}'"
1225
1323
  )
1324
+ raise ValueError(msg)
1226
1325
 
1227
1326
  if include_raw:
1228
1327
  parser_assign = RunnablePassthrough.assign(
@@ -1233,5 +1332,4 @@ class ChatOllama(BaseChatModel):
1233
1332
  [parser_none], exception_key="parsing_error"
1234
1333
  )
1235
1334
  return RunnableMap(raw=llm) | parser_with_fallback
1236
- else:
1237
- return llm | output_parser
1335
+ return llm | output_parser
@@ -1,5 +1,7 @@
1
1
  """Ollama embeddings models."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from typing import Any, Optional
4
6
 
5
7
  from langchain_core.embeddings import Embeddings
@@ -12,6 +14,8 @@ from pydantic import (
12
14
  )
13
15
  from typing_extensions import Self
14
16
 
17
+ from ._utils import validate_model
18
+
15
19
 
16
20
  class OllamaEmbeddings(BaseModel, Embeddings):
17
21
  """Ollama embedding model integration.
@@ -95,7 +99,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
95
99
  Embed multiple texts:
96
100
  .. code-block:: python
97
101
 
98
- input_texts = ["Document 1...", "Document 2..."]
102
+ input_texts = ["Document 1...", "Document 2..."]
99
103
  vectors = embed.embed_documents(input_texts)
100
104
  print(len(vectors))
101
105
  # The first 3 coordinates for the first vector
@@ -110,7 +114,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
110
114
  .. code-block:: python
111
115
 
112
116
  vector = await embed.aembed_query(input_text)
113
- print(vector[:3])
117
+ print(vector[:3])
114
118
 
115
119
  # multiple:
116
120
  # await embed.aembed_documents(input_texts)
@@ -123,20 +127,38 @@ class OllamaEmbeddings(BaseModel, Embeddings):
123
127
  model: str
124
128
  """Model name to use."""
125
129
 
130
+ validate_model_on_init: bool = False
131
+ """Whether to validate the model exists in ollama locally on initialization."""
132
+
126
133
  base_url: Optional[str] = None
127
134
  """Base url the model is hosted under."""
128
135
 
129
136
  client_kwargs: Optional[dict] = {}
130
- """Additional kwargs to pass to the httpx Client.
131
- For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
137
+ """Additional kwargs to pass to the httpx clients.
138
+ These arguments are passed to both synchronous and async clients.
139
+ Use sync_client_kwargs and async_client_kwargs to pass different arguments
140
+ to synchronous and asynchronous clients.
132
141
  """
133
142
 
134
- _client: Client = PrivateAttr(default=None) # type: ignore
143
+ async_client_kwargs: Optional[dict] = {}
144
+ """Additional kwargs to merge with client_kwargs before passing to the httpx
145
+ AsyncClient.
146
+
147
+ For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
148
+ """
149
+
150
+ sync_client_kwargs: Optional[dict] = {}
151
+ """Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
152
+
153
+ For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
154
+ """
155
+
156
+ _client: Optional[Client] = PrivateAttr(default=None)
135
157
  """
136
158
  The client to use for making requests.
137
159
  """
138
160
 
139
- _async_client: AsyncClient = PrivateAttr(default=None) # type: ignore
161
+ _async_client: Optional[AsyncClient] = PrivateAttr(default=None)
140
162
  """
141
163
  The async client to use for making requests.
142
164
  """
@@ -233,16 +255,32 @@ class OllamaEmbeddings(BaseModel, Embeddings):
233
255
  def _set_clients(self) -> Self:
234
256
  """Set clients to use for ollama."""
235
257
  client_kwargs = self.client_kwargs or {}
236
- self._client = Client(host=self.base_url, **client_kwargs)
237
- self._async_client = AsyncClient(host=self.base_url, **client_kwargs)
258
+
259
+ sync_client_kwargs = client_kwargs
260
+ if self.sync_client_kwargs:
261
+ sync_client_kwargs = {**sync_client_kwargs, **self.sync_client_kwargs}
262
+
263
+ async_client_kwargs = client_kwargs
264
+ if self.async_client_kwargs:
265
+ async_client_kwargs = {**async_client_kwargs, **self.async_client_kwargs}
266
+
267
+ self._client = Client(host=self.base_url, **sync_client_kwargs)
268
+ self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
269
+ if self.validate_model_on_init:
270
+ validate_model(self._client, self.model)
238
271
  return self
239
272
 
240
273
  def embed_documents(self, texts: list[str]) -> list[list[float]]:
241
274
  """Embed search docs."""
242
- embedded_docs = self._client.embed(
275
+ if not self._client:
276
+ msg = (
277
+ "Ollama client is not initialized. "
278
+ "Please ensure Ollama is running and the model is loaded."
279
+ )
280
+ raise ValueError(msg)
281
+ return self._client.embed(
243
282
  self.model, texts, options=self._default_params, keep_alive=self.keep_alive
244
283
  )["embeddings"]
245
- return embedded_docs
246
284
 
247
285
  def embed_query(self, text: str) -> list[float]:
248
286
  """Embed query text."""
@@ -250,12 +288,17 @@ class OllamaEmbeddings(BaseModel, Embeddings):
250
288
 
251
289
  async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
252
290
  """Embed search docs."""
253
- embedded_docs = (
291
+ if not self._async_client:
292
+ msg = (
293
+ "Ollama client is not initialized. "
294
+ "Please ensure Ollama is running and the model is loaded."
295
+ )
296
+ raise ValueError(msg)
297
+ return (
254
298
  await self._async_client.embed(
255
299
  self.model, texts, keep_alive=self.keep_alive
256
300
  )
257
301
  )["embeddings"]
258
- return embedded_docs
259
302
 
260
303
  async def aembed_query(self, text: str) -> list[float]:
261
304
  """Embed query text."""
langchain_ollama/llms.py CHANGED
@@ -1,5 +1,7 @@
1
1
  """Ollama large language models."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from collections.abc import AsyncIterator, Iterator, Mapping
4
6
  from typing import (
5
7
  Any,
@@ -18,6 +20,8 @@ from ollama import AsyncClient, Client, Options
18
20
  from pydantic import PrivateAttr, model_validator
19
21
  from typing_extensions import Self
20
22
 
23
+ from ._utils import validate_model
24
+
21
25
 
22
26
  class OllamaLLM(BaseLLM):
23
27
  """OllamaLLM large language models.
@@ -28,12 +32,29 @@ class OllamaLLM(BaseLLM):
28
32
  from langchain_ollama import OllamaLLM
29
33
 
30
34
  model = OllamaLLM(model="llama3")
31
- model.invoke("Come up with 10 names for a song about parrots")
35
+ print(model.invoke("Come up with 10 names for a song about parrots"))
32
36
  """
33
37
 
34
38
  model: str
35
39
  """Model name to use."""
36
40
 
41
+ reasoning: Optional[bool] = None
42
+ """Controls the reasoning/thinking mode for
43
+ `supported models <https://ollama.com/search?c=thinking>`__.
44
+
45
+ - ``True``: Enables reasoning mode. The model's reasoning process will be
46
+ captured and returned separately in the ``additional_kwargs`` of the
47
+ response message, under ``reasoning_content``. The main response
48
+ content will not include the reasoning tags.
49
+ - ``False``: Disables reasoning mode. The model will not perform any reasoning,
50
+ and the response will not include any reasoning content.
51
+ - ``None`` (Default): The model will use its default reasoning behavior. If
52
+ the model performs reasoning, the ``<think>`` and ``</think>`` tags will
53
+ be present directly within the main response content."""
54
+
55
+ validate_model_on_init: bool = False
56
+ """Whether to validate the model exists in ollama locally on initialization."""
57
+
37
58
  mirostat: Optional[int] = None
38
59
  """Enable Mirostat sampling for controlling perplexity.
39
60
  (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
@@ -51,7 +72,7 @@ class OllamaLLM(BaseLLM):
51
72
 
52
73
  num_ctx: Optional[int] = None
53
74
  """Sets the size of the context window used to generate the
54
- next token. (Default: 2048) """
75
+ next token. (Default: 2048)"""
55
76
 
56
77
  num_gpu: Optional[int] = None
57
78
  """The number of GPUs to use. On macOS it defaults to 1 to
@@ -113,16 +134,31 @@ class OllamaLLM(BaseLLM):
113
134
  """Base url the model is hosted under."""
114
135
 
115
136
  client_kwargs: Optional[dict] = {}
116
- """Additional kwargs to pass to the httpx Client.
117
- For a full list of the params, see [this link](https://pydoc.dev/httpx/latest/httpx.Client.html)
137
+ """Additional kwargs to pass to the httpx clients.
138
+ These arguments are passed to both synchronous and async clients.
139
+ Use sync_client_kwargs and async_client_kwargs to pass different arguments
140
+ to synchronous and asynchronous clients.
141
+ """
142
+
143
+ async_client_kwargs: Optional[dict] = {}
144
+ """Additional kwargs to merge with client_kwargs before passing to the HTTPX
145
+ AsyncClient.
146
+
147
+ For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
148
+ """
149
+
150
+ sync_client_kwargs: Optional[dict] = {}
151
+ """Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
152
+
153
+ For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
118
154
  """
119
155
 
120
- _client: Client = PrivateAttr(default=None) # type: ignore
156
+ _client: Optional[Client] = PrivateAttr(default=None)
121
157
  """
122
158
  The client to use for making requests.
123
159
  """
124
160
 
125
- _async_client: AsyncClient = PrivateAttr(default=None) # type: ignore
161
+ _async_client: Optional[AsyncClient] = PrivateAttr(default=None)
126
162
  """
127
163
  The async client to use for making requests.
128
164
  """
@@ -134,8 +170,9 @@ class OllamaLLM(BaseLLM):
134
170
  **kwargs: Any,
135
171
  ) -> dict[str, Any]:
136
172
  if self.stop is not None and stop is not None:
137
- raise ValueError("`stop` found in both the input and default params.")
138
- elif self.stop is not None:
173
+ msg = "`stop` found in both the input and default params."
174
+ raise ValueError(msg)
175
+ if self.stop is not None:
139
176
  stop = self.stop
140
177
 
141
178
  options_dict = kwargs.pop(
@@ -159,18 +196,17 @@ class OllamaLLM(BaseLLM):
159
196
  },
160
197
  )
161
198
 
162
- params = {
199
+ return {
163
200
  "prompt": prompt,
164
201
  "stream": kwargs.pop("stream", True),
165
202
  "model": kwargs.pop("model", self.model),
203
+ "think": kwargs.pop("reasoning", self.reasoning),
166
204
  "format": kwargs.pop("format", self.format),
167
205
  "options": Options(**options_dict),
168
206
  "keep_alive": kwargs.pop("keep_alive", self.keep_alive),
169
207
  **kwargs,
170
208
  }
171
209
 
172
- return params
173
-
174
210
  @property
175
211
  def _llm_type(self) -> str:
176
212
  """Return type of LLM."""
@@ -189,8 +225,19 @@ class OllamaLLM(BaseLLM):
189
225
  def _set_clients(self) -> Self:
190
226
  """Set clients to use for ollama."""
191
227
  client_kwargs = self.client_kwargs or {}
192
- self._client = Client(host=self.base_url, **client_kwargs)
193
- self._async_client = AsyncClient(host=self.base_url, **client_kwargs)
228
+
229
+ sync_client_kwargs = client_kwargs
230
+ if self.sync_client_kwargs:
231
+ sync_client_kwargs = {**sync_client_kwargs, **self.sync_client_kwargs}
232
+
233
+ async_client_kwargs = client_kwargs
234
+ if self.async_client_kwargs:
235
+ async_client_kwargs = {**async_client_kwargs, **self.async_client_kwargs}
236
+
237
+ self._client = Client(host=self.base_url, **sync_client_kwargs)
238
+ self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
239
+ if self.validate_model_on_init:
240
+ validate_model(self._client, self.model)
194
241
  return self
195
242
 
196
243
  async def _acreate_generate_stream(
@@ -199,10 +246,11 @@ class OllamaLLM(BaseLLM):
199
246
  stop: Optional[list[str]] = None,
200
247
  **kwargs: Any,
201
248
  ) -> AsyncIterator[Union[Mapping[str, Any], str]]:
202
- async for part in await self._async_client.generate(
203
- **self._generate_params(prompt, stop=stop, **kwargs)
204
- ): # type: ignore
205
- yield part # type: ignore
249
+ if self._async_client:
250
+ async for part in await self._async_client.generate(
251
+ **self._generate_params(prompt, stop=stop, **kwargs)
252
+ ):
253
+ yield part
206
254
 
207
255
  def _create_generate_stream(
208
256
  self,
@@ -210,23 +258,27 @@ class OllamaLLM(BaseLLM):
210
258
  stop: Optional[list[str]] = None,
211
259
  **kwargs: Any,
212
260
  ) -> Iterator[Union[Mapping[str, Any], str]]:
213
- yield from self._client.generate(
214
- **self._generate_params(prompt, stop=stop, **kwargs)
215
- ) # type: ignore
261
+ if self._client:
262
+ yield from self._client.generate(
263
+ **self._generate_params(prompt, stop=stop, **kwargs)
264
+ )
216
265
 
217
266
  async def _astream_with_aggregation(
218
267
  self,
219
268
  prompt: str,
220
269
  stop: Optional[list[str]] = None,
221
270
  run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
222
- verbose: bool = False,
271
+ verbose: bool = False, # noqa: FBT001, FBT002
223
272
  **kwargs: Any,
224
273
  ) -> GenerationChunk:
225
274
  final_chunk = None
275
+ thinking_content = ""
226
276
  async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
227
277
  if not isinstance(stream_resp, str):
278
+ if stream_resp.get("thinking"):
279
+ thinking_content += stream_resp["thinking"]
228
280
  chunk = GenerationChunk(
229
- text=stream_resp["response"] if "response" in stream_resp else "",
281
+ text=stream_resp.get("response", ""),
230
282
  generation_info=(
231
283
  dict(stream_resp) if stream_resp.get("done") is True else None
232
284
  ),
@@ -242,7 +294,14 @@ class OllamaLLM(BaseLLM):
242
294
  verbose=verbose,
243
295
  )
244
296
  if final_chunk is None:
245
- raise ValueError("No data received from Ollama stream.")
297
+ msg = "No data received from Ollama stream."
298
+ raise ValueError(msg)
299
+
300
+ if thinking_content:
301
+ if final_chunk.generation_info:
302
+ final_chunk.generation_info["thinking"] = thinking_content
303
+ else:
304
+ final_chunk.generation_info = {"thinking": thinking_content}
246
305
 
247
306
  return final_chunk
248
307
 
@@ -251,14 +310,17 @@ class OllamaLLM(BaseLLM):
251
310
  prompt: str,
252
311
  stop: Optional[list[str]] = None,
253
312
  run_manager: Optional[CallbackManagerForLLMRun] = None,
254
- verbose: bool = False,
313
+ verbose: bool = False, # noqa: FBT001, FBT002
255
314
  **kwargs: Any,
256
315
  ) -> GenerationChunk:
257
316
  final_chunk = None
317
+ thinking_content = ""
258
318
  for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
259
319
  if not isinstance(stream_resp, str):
320
+ if stream_resp.get("thinking"):
321
+ thinking_content += stream_resp["thinking"]
260
322
  chunk = GenerationChunk(
261
- text=stream_resp["response"] if "response" in stream_resp else "",
323
+ text=stream_resp.get("response", ""),
262
324
  generation_info=(
263
325
  dict(stream_resp) if stream_resp.get("done") is True else None
264
326
  ),
@@ -274,7 +336,14 @@ class OllamaLLM(BaseLLM):
274
336
  verbose=verbose,
275
337
  )
276
338
  if final_chunk is None:
277
- raise ValueError("No data received from Ollama stream.")
339
+ msg = "No data received from Ollama stream."
340
+ raise ValueError(msg)
341
+
342
+ if thinking_content:
343
+ if final_chunk.generation_info:
344
+ final_chunk.generation_info["thinking"] = thinking_content
345
+ else:
346
+ final_chunk.generation_info = {"thinking": thinking_content}
278
347
 
279
348
  return final_chunk
280
349
 
@@ -323,13 +392,22 @@ class OllamaLLM(BaseLLM):
323
392
  run_manager: Optional[CallbackManagerForLLMRun] = None,
324
393
  **kwargs: Any,
325
394
  ) -> Iterator[GenerationChunk]:
395
+ reasoning = kwargs.get("reasoning", self.reasoning)
326
396
  for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
327
397
  if not isinstance(stream_resp, str):
398
+ additional_kwargs = {}
399
+ if reasoning and (thinking_content := stream_resp.get("thinking")):
400
+ additional_kwargs["reasoning_content"] = thinking_content
401
+
328
402
  chunk = GenerationChunk(
329
403
  text=(stream_resp.get("response", "")),
330
- generation_info=(
331
- dict(stream_resp) if stream_resp.get("done") is True else None
332
- ),
404
+ generation_info={
405
+ "finish_reason": self.stop,
406
+ **additional_kwargs,
407
+ **(
408
+ dict(stream_resp) if stream_resp.get("done") is True else {}
409
+ ),
410
+ },
333
411
  )
334
412
  if run_manager:
335
413
  run_manager.on_llm_new_token(
@@ -345,13 +423,22 @@ class OllamaLLM(BaseLLM):
345
423
  run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
346
424
  **kwargs: Any,
347
425
  ) -> AsyncIterator[GenerationChunk]:
426
+ reasoning = kwargs.get("reasoning", self.reasoning)
348
427
  async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
349
428
  if not isinstance(stream_resp, str):
429
+ additional_kwargs = {}
430
+ if reasoning and (thinking_content := stream_resp.get("thinking")):
431
+ additional_kwargs["reasoning_content"] = thinking_content
432
+
350
433
  chunk = GenerationChunk(
351
434
  text=(stream_resp.get("response", "")),
352
- generation_info=(
353
- dict(stream_resp) if stream_resp.get("done") is True else None
354
- ),
435
+ generation_info={
436
+ "finish_reason": self.stop,
437
+ **additional_kwargs,
438
+ **(
439
+ dict(stream_resp) if stream_resp.get("done") is True else {}
440
+ ),
441
+ },
355
442
  )
356
443
  if run_manager:
357
444
  await run_manager.on_llm_new_token(
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.1
2
+ Name: langchain-ollama
3
+ Version: 0.3.4
4
+ Summary: An integration package connecting Ollama and LangChain
5
+ License: MIT
6
+ Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
7
+ Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
8
+ Project-URL: repository, https://github.com/langchain-ai/langchain
9
+ Requires-Python: >=3.9
10
+ Requires-Dist: ollama<1.0.0,>=0.5.1
11
+ Requires-Dist: langchain-core<1.0.0,>=0.3.68
12
+ Description-Content-Type: text/markdown
13
+
14
+ # langchain-ollama
15
+
16
+ This package contains the LangChain integration with Ollama
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install -U langchain-ollama
22
+ ```
23
+
24
+ For the package to work, you will need to install and run the Ollama server locally ([download](https://ollama.com/download)).
25
+
26
+ To run integration tests (`make integration_tests`), you will need the following models installed in your Ollama server:
27
+
28
+ - `llama3.1`
29
+ - `deepseek-r1:1.5b`
30
+
31
+ Install these models by running:
32
+
33
+ ```bash
34
+ ollama pull <name-of-model>
35
+ ```
36
+
37
+ ## [Chat Models](https://python.langchain.com/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html#chatollama)
38
+
39
+ `ChatOllama` class exposes chat models from Ollama.
40
+
41
+ ```python
42
+ from langchain_ollama import ChatOllama
43
+
44
+ llm = ChatOllama(model="llama3.1")
45
+ llm.invoke("Sing a ballad of LangChain.")
46
+ ```
47
+
48
+ ## [Embeddings](https://python.langchain.com/api_reference/ollama/embeddings/langchain_ollama.embeddings.OllamaEmbeddings.html#ollamaembeddings)
49
+
50
+ `OllamaEmbeddings` class exposes embeddings from Ollama.
51
+
52
+ ```python
53
+ from langchain_ollama import OllamaEmbeddings
54
+
55
+ embeddings = OllamaEmbeddings(model="llama3.1")
56
+ embeddings.embed_query("What is the meaning of life?")
57
+ ```
58
+
59
+ ## [LLMs](https://python.langchain.com/api_reference/ollama/llms/langchain_ollama.llms.OllamaLLM.html#ollamallm)
60
+
61
+ `OllamaLLM` class exposes traditional LLMs from Ollama.
62
+
63
+ ```python
64
+ from langchain_ollama import OllamaLLM
65
+
66
+ llm = OllamaLLM(model="llama3.1")
67
+ llm.invoke("The meaning of life is")
68
+ ```
@@ -0,0 +1,11 @@
1
+ langchain_ollama-0.3.4.dist-info/METADATA,sha256=wM54qEosykpO89kExse0V4Y3K3ncspLP_mFNKsBxTNY,2072
2
+ langchain_ollama-0.3.4.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
3
+ langchain_ollama-0.3.4.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
4
+ langchain_ollama-0.3.4.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
5
+ langchain_ollama/__init__.py,sha256=TI1gI0Wpg7mRXehGpxrJG2flF_t4Ev-aIJlLKV-CgL0,633
6
+ langchain_ollama/_utils.py,sha256=dmFO4tSvDTeMALc89QnTBLNWPMZL0eNAq1EDwuMjRA8,1416
7
+ langchain_ollama/chat_models.py,sha256=olz3KJeLG1vk47Xl38nN9bP4bcol5cBQnPnu5MyP8k8,55539
8
+ langchain_ollama/embeddings.py,sha256=VprOFiBRuUPGEygoIfxvAZStUsqRj65ZNMpkvCAo_9Y,10239
9
+ langchain_ollama/llms.py,sha256=PSJ-VQMocp1nm-pgtnKnozidt66RKJiEnhdzftoLNNc,16778
10
+ langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ langchain_ollama-0.3.4.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: pdm-backend (2.4.4)
2
+ Generator: pdm-backend (2.4.5)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,57 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: langchain-ollama
3
- Version: 0.3.2
4
- Summary: An integration package connecting Ollama and LangChain
5
- License: MIT
6
- Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
7
- Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
8
- Project-URL: repository, https://github.com/langchain-ai/langchain
9
- Requires-Python: <4.0,>=3.9
10
- Requires-Dist: ollama<1,>=0.4.4
11
- Requires-Dist: langchain-core<1.0.0,>=0.3.52
12
- Description-Content-Type: text/markdown
13
-
14
- # langchain-ollama
15
-
16
- This package contains the LangChain integration with Ollama
17
-
18
- ## Installation
19
-
20
- ```bash
21
- pip install -U langchain-ollama
22
- ```
23
-
24
- You will also need to run the Ollama server locally.
25
- You can download it [here](https://ollama.com/download).
26
-
27
- ## Chat Models
28
-
29
- `ChatOllama` class exposes chat models from Ollama.
30
-
31
- ```python
32
- from langchain_ollama import ChatOllama
33
-
34
- llm = ChatOllama(model="llama3-groq-tool-use")
35
- llm.invoke("Sing a ballad of LangChain.")
36
- ```
37
-
38
- ## Embeddings
39
-
40
- `OllamaEmbeddings` class exposes embeddings from Ollama.
41
-
42
- ```python
43
- from langchain_ollama import OllamaEmbeddings
44
-
45
- embeddings = OllamaEmbeddings(model="llama3")
46
- embeddings.embed_query("What is the meaning of life?")
47
- ```
48
-
49
- ## LLMs
50
- `OllamaLLM` class exposes LLMs from Ollama.
51
-
52
- ```python
53
- from langchain_ollama import OllamaLLM
54
-
55
- llm = OllamaLLM(model="llama3")
56
- llm.invoke("The meaning of life is")
57
- ```
@@ -1,10 +0,0 @@
1
- langchain_ollama-0.3.2.dist-info/METADATA,sha256=58k8ADvokbZrjkTN5_-DRJWHYxZI6A1IbYO7rJ2DWc8,1463
2
- langchain_ollama-0.3.2.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
3
- langchain_ollama-0.3.2.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
4
- langchain_ollama-0.3.2.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
5
- langchain_ollama/__init__.py,sha256=1f8Cyf1_bS0CT16U8-Os1P1Oa3erIDtIBTH4KVmBLvY,633
6
- langchain_ollama/chat_models.py,sha256=3ZvSHz-14idWKykyQgMV2i84bFrXVRjpU9dbGTz4_hs,50735
7
- langchain_ollama/embeddings.py,sha256=2G0gfnUbPBpVv9oBzL7C3z3FI_VumQ2WCYCf_-LMz-Q,8621
8
- langchain_ollama/llms.py,sha256=DiCWKLX2JPZAoVoRTKKQ2yOuoXbVStg0wkS1p6IruQU,13007
9
- langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- langchain_ollama-0.3.2.dist-info/RECORD,,