PyPI - langchain-ollama - Versions diffs - 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

langchain-ollama 0.3.3py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

langchain_ollama/__init__.py +1 -1
langchain_ollama/_utils.py +39 -0
langchain_ollama/chat_models.py +190 -118
langchain_ollama/embeddings.py +35 -15
langchain_ollama/llms.py +99 -35
{langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/METADATA +23 -12
langchain_ollama-0.3.4.dist-info/RECORD +11 -0
{langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/WHEEL +1 -1
langchain_ollama-0.3.3.dist-info/RECORD +0 -10
{langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/entry_points.txt +0 -0
{langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/licenses/LICENSE +0 -0

langchain_ollama/__init__.py CHANGED Viewed

@@ -18,7 +18,7 @@ del metadata  # optional, avoids polluting the results of dir(__package__)
 __all__ = [
     "ChatOllama",
-    "OllamaLLM",
     "OllamaEmbeddings",
+    "OllamaLLM",
     "__version__",
 ]

langchain_ollama/_utils.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""Utility functions for validating Ollama models."""
+from httpx import ConnectError
+from ollama import Client, ResponseError
+def validate_model(client: Client, model_name: str) -> None:
+    """Validate that a model exists in the Ollama instance.
+    Args:
+        client: The Ollama client.
+        model_name: The name of the model to validate.
+    Raises:
+        ValueError: If the model is not found or if there's a connection issue.
+    """
+    try:
+        response = client.list()
+        model_names: list[str] = [model["model"] for model in response["models"]]
+        if not any(
+            model_name == m or m.startswith(f"{model_name}:") for m in model_names
+        ):
+            msg = (
+                f"Model `{model_name}` not found in Ollama. Please pull the "
+                f"model (using `ollama pull {model_name}`) or specify a valid "
+                f"model name. Available local models: {', '.join(model_names)}"
+            )
+            raise ValueError(msg)
+    except ConnectError as e:
+        msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download"  # noqa: E501
+        raise ValueError(msg) from e
+    except ResponseError as e:
+        msg = (
+            "Received an error from the Ollama API. "
+            "Please check your Ollama server logs."
+        )
+        raise ValueError(msg) from e

langchain_ollama/chat_models.py CHANGED Viewed

@@ -1,12 +1,13 @@
 """Ollama chat models."""
+from __future__ import annotations
 import json
 from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
 from operator import itemgetter
 from typing import (
     Any,
     Callable,
-    Final,
     Literal,
     Optional,
     Union,
@@ -25,7 +26,6 @@ from langchain_core.messages import (
     AIMessage,
     AIMessageChunk,
     BaseMessage,
-    BaseMessageChunk,
     ChatMessage,
     HumanMessage,
     SystemMessage,
@@ -55,8 +55,7 @@ from pydantic.json_schema import JsonSchemaValue
 from pydantic.v1 import BaseModel as BaseModelV1
 from typing_extensions import Self, is_typeddict
-DEFAULT_THINK_TOKEN_START: Final[str] = "<think>"
-DEFAULT_THINK_TOKEN_END: Final[str] = "</think>"
+from ._utils import validate_model
 def _get_usage_metadata_from_generation_info(
@@ -77,7 +76,9 @@ def _get_usage_metadata_from_generation_info(
 def _parse_json_string(
-    json_string: str, raw_tool_call: dict[str, Any], skip: bool
+    json_string: str,
+    raw_tool_call: dict[str, Any],
+    skip: bool,  # noqa: FBT001
 ) -> Any:
     """Attempt to parse a JSON string for tool calling.
@@ -151,26 +152,30 @@ def _get_tool_calls_from_response(
 ) -> list[ToolCall]:
     """Get tool calls from ollama response."""
     tool_calls = []
-    if "message" in response:
-        if raw_tool_calls := response["message"].get("tool_calls"):
-            for tc in raw_tool_calls:
-                tool_calls.append(
-                    tool_call(
-                        id=str(uuid4()),
-                        name=tc["function"]["name"],
-                        args=_parse_arguments_from_tool_call(tc) or {},
-                    )
+    if "message" in response and (
+        raw_tool_calls := response["message"].get("tool_calls")
+    ):
+        tool_calls.extend(
+            [
+                tool_call(
+                    id=str(uuid4()),
+                    name=tc["function"]["name"],
+                    args=_parse_arguments_from_tool_call(tc) or {},
                 )
+                for tc in raw_tool_calls
+            ]
+        )
     return tool_calls
-def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
+def _lc_tool_call_to_openai_tool_call(tool_call_: ToolCall) -> dict:
+    """Convert a LangChain tool call to an OpenAI tool call format."""
     return {
         "type": "function",
-        "id": tool_call["id"],
+        "id": tool_call_["id"],
         "function": {
-            "name": tool_call["name"],
-            "arguments": tool_call["args"],
+            "name": tool_call_["name"],
+            "arguments": tool_call_["args"],
         },
     }
@@ -180,14 +185,12 @@ def _get_image_from_data_content_block(block: dict) -> str:
     if block["type"] == "image":
         if block["source_type"] == "base64":
             return block["data"]
-        else:
-            error_message = "Image data only supported through in-line base64 format."
-            raise ValueError(error_message)
-    else:
-        error_message = f"Blocks of type {block['type']} not supported."
+        error_message = "Image data only supported through in-line base64 format."
         raise ValueError(error_message)
+    error_message = f"Blocks of type {block['type']} not supported."
+    raise ValueError(error_message)
 def _is_pydantic_class(obj: Any) -> bool:
     return isinstance(obj, type) and is_basemodel_subclass(obj)
@@ -209,8 +212,22 @@ class ChatOllama(BaseChatModel):
     Key init args — completion params:
         model: str
             Name of Ollama model to use.
+        reasoning: Optional[bool]
+            Controls the reasoning/thinking mode for
+            `supported models <https://ollama.com/search?c=thinking>`__.
+            - ``True``: Enables reasoning mode. The model's reasoning process will be
+              captured and returned separately in the ``additional_kwargs`` of the
+              response message, under ``reasoning_content``. The main response
+              content will not include the reasoning tags.
+            - ``False``: Disables reasoning mode. The model will not perform any reasoning,
+              and the response will not include any reasoning content.
+            - ``None`` (Default): The model will use its default reasoning behavior. Note
+              however, if the model's default behavior *is* to perform reasoning, think tags
+              (``<think>`` and ``</think>``) will be present within the main response content
+              unless you set ``reasoning`` to ``True``.
         temperature: float
-            Sampling temperature. Ranges from 0.0 to 1.0.
+            Sampling temperature. Ranges from ``0.0`` to ``1.0``.
         num_predict: Optional[int]
             Max number of tokens to generate.
@@ -326,7 +343,6 @@ class ChatOllama(BaseChatModel):
             '{"location": "Pune, India", "time_of_day": "morning"}'
     Tool Calling:
         .. code-block:: python
             from langchain_ollama import ChatOllama
@@ -345,17 +361,70 @@ class ChatOllama(BaseChatModel):
             'args': {'a': 45, 'b': 67},
             'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
             'type': 'tool_call'}]
-    """  # noqa: E501
+    Thinking / Reasoning:
+        You can enable reasoning mode for models that support it by setting
+        the ``reasoning`` parameter to ``True`` in either the constructor or
+        the ``invoke``/``stream`` methods. This will enable the model to think
+        through the problem and return the reasoning process separately in the
+        ``additional_kwargs`` of the response message, under ``reasoning_content``.
+        If ``reasoning`` is set to ``None``, the model will use its default reasoning
+        behavior, and any reasoning content will *not* be captured under the
+        ``reasoning_content`` key, but will be present within the main response content
+        as think tags (``<think>`` and ``</think>``).
+        .. note::
+            This feature is only available for `models that support reasoning <https://ollama.com/search?c=thinking>`__.
+        .. code-block:: python
+            from langchain_ollama import ChatOllama
+            llm = ChatOllama(
+                model = "deepseek-r1:8b",
+                reasoning= True,
+            )
+            user_message = HumanMessage(content="how many r in the word strawberry?")
+            messages: List[Any] = [user_message]
+            llm.invoke(messages)
+            # or, on an invocation basis:
+            llm.invoke(messages, reasoning=True)
+            # or llm.stream(messages, reasoning=True)
+            # If not provided, the invocation will default to the ChatOllama reasoning
+            # param provided (None by default).
+        .. code-block:: python
+            AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
+    """  # noqa: E501, pylint: disable=line-too-long
     model: str
     """Model name to use."""
-    extract_reasoning: Optional[Union[bool, tuple[str, str]]] = False
-    """Whether to extract the reasoning tokens in think blocks.
-    Extracts `chunk.content` to `chunk.additional_kwargs.reasoning_content`.
-    If a tuple is supplied, they are assumed to be the (start, end) tokens.
-    If `extract_reasoning=True`, the tokens will default to (<think>, </think>).
-    """
+    reasoning: Optional[bool] = None
+    """Controls the reasoning/thinking mode for
+    `supported models <https://ollama.com/search?c=thinking>`__.
+    - ``True``: Enables reasoning mode. The model's reasoning process will be
+      captured and returned separately in the ``additional_kwargs`` of the
+      response message, under ``reasoning_content``. The main response
+      content will not include the reasoning tags.
+    - ``False``: Disables reasoning mode. The model will not perform any reasoning,
+      and the response will not include any reasoning content.
+    - ``None`` (Default): The model will use its default reasoning behavior. Note
+      however, if the model's default behavior *is* to perform reasoning, think tags
+      ()``<think>`` and ``</think>``) will be present within the main response content
+      unless you set ``reasoning`` to ``True``."""
+    validate_model_on_init: bool = False
+    """Whether to validate the model exists in Ollama locally on initialization."""
     mirostat: Optional[int] = None
     """Enable Mirostat sampling for controlling perplexity.
@@ -436,7 +505,7 @@ class ChatOllama(BaseChatModel):
     """Base url the model is hosted under."""
     client_kwargs: Optional[dict] = {}
-    """Additional kwargs to pass to the httpx clients.
+    """Additional kwargs to pass to the httpx clients.
     These arguments are passed to both synchronous and async clients.
     Use sync_client_kwargs and async_client_kwargs to pass different arguments
     to synchronous and asynchronous clients.
@@ -445,21 +514,21 @@ class ChatOllama(BaseChatModel):
     async_client_kwargs: Optional[dict] = {}
     """Additional kwargs to merge with client_kwargs before
     passing to the httpx AsyncClient.
-    For a full list of the params, see [this link](https://www.python-httpx.org/api/#asyncclient)
+    `Full list of params. <https://www.python-httpx.org/api/#asyncclient>`__
     """
     sync_client_kwargs: Optional[dict] = {}
     """Additional kwargs to merge with client_kwargs before
     passing to the httpx Client.
-    For a full list of the params, see [this link](https://www.python-httpx.org/api/#client)
+    `Full list of params. <https://www.python-httpx.org/api/#client>`__
     """
-    _client: Client = PrivateAttr(default=None)  # type: ignore
+    _client: Client = PrivateAttr()
     """
     The client to use for making requests.
     """
-    _async_client: AsyncClient = PrivateAttr(default=None)  # type: ignore
+    _async_client: AsyncClient = PrivateAttr()
     """
     The async client to use for making requests.
     """
@@ -473,8 +542,9 @@ class ChatOllama(BaseChatModel):
         ollama_messages = self._convert_messages_to_ollama_messages(messages)
         if self.stop is not None and stop is not None:
-            raise ValueError("`stop` found in both the input and default params.")
-        elif self.stop is not None:
+            msg = "`stop` found in both the input and default params."
+            raise ValueError(msg)
+        if self.stop is not None:
             stop = self.stop
         options_dict = kwargs.pop(
@@ -502,6 +572,7 @@ class ChatOllama(BaseChatModel):
             "messages": ollama_messages,
             "stream": kwargs.pop("stream", True),
             "model": kwargs.pop("model", self.model),
+            "think": kwargs.pop("reasoning", self.reasoning),
             "format": kwargs.pop("format", self.format),
             "options": Options(**options_dict),
             "keep_alive": kwargs.pop("keep_alive", self.keep_alive),
@@ -528,6 +599,8 @@ class ChatOllama(BaseChatModel):
         self._client = Client(host=self.base_url, **sync_client_kwargs)
         self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
+        if self.validate_model_on_init:
+            validate_model(self._client, self.model)
         return self
     def _convert_messages_to_ollama_messages(
@@ -558,7 +631,8 @@ class ChatOllama(BaseChatModel):
                 role = "tool"
                 tool_call_id = message.tool_call_id
             else:
-                raise ValueError("Received unsupported message type for Ollama.")
+                msg = "Received unsupported message type for Ollama."
+                raise ValueError(msg)
             content = ""
             images = []
@@ -582,10 +656,11 @@ class ChatOllama(BaseChatModel):
                         ):
                             image_url = temp_image_url["url"]
                         else:
-                            raise ValueError(
+                            msg = (
                                 "Only string image_url or dict with string 'url' "
                                 "inside content parts are supported."
                             )
+                            raise ValueError(msg)
                         image_url_components = image_url.split(",")
                         # Support data:image/jpeg;base64,<image> format
@@ -598,47 +673,27 @@ class ChatOllama(BaseChatModel):
                         image = _get_image_from_data_content_block(content_part)
                         images.append(image)
                     else:
-                        raise ValueError(
+                        msg = (
                             "Unsupported message content type. "
                             "Must either have type 'text' or type 'image_url' "
                             "with a string 'image_url' field."
                         )
-            # Should convert to ollama.Message once role includes tool, and tool_call_id is in Message # noqa: E501
-            msg: dict = {
+                        raise ValueError(msg)
+            # Should convert to ollama.Message once role includes tool,
+            # and tool_call_id is in Message
+            msg_: dict = {
                 "role": role,
                 "content": content,
                 "images": images,
             }
             if tool_calls:
-                msg["tool_calls"] = tool_calls  # type: ignore
+                msg_["tool_calls"] = tool_calls
             if tool_call_id:
-                msg["tool_call_id"] = tool_call_id
-            ollama_messages.append(msg)
+                msg_["tool_call_id"] = tool_call_id
+            ollama_messages.append(msg_)
         return ollama_messages
-    def _extract_reasoning(
-        self, message_chunk: BaseMessageChunk, is_thinking: bool
-    ) -> tuple[BaseMessageChunk, bool]:
-        """Mutate a message chunk to extract reasoning content."""
-        if not self.extract_reasoning:
-            return message_chunk, is_thinking
-        elif self.extract_reasoning is True:
-            start_token = DEFAULT_THINK_TOKEN_START
-            end_token = DEFAULT_THINK_TOKEN_END
-        else:
-            start_token, end_token = cast(tuple, self.extract_reasoning)
-        if start_token in message_chunk.content:
-            is_thinking = True
-        content = message_chunk.content
-        if is_thinking:
-            message_chunk.additional_kwargs["reasoning_content"] = content
-            message_chunk.content = ""
-        if end_token in content:
-            is_thinking = False
-        return message_chunk, is_thinking
     async def _acreate_chat_stream(
         self,
         messages: list[BaseMessage],
@@ -662,16 +717,18 @@ class ChatOllama(BaseChatModel):
         chat_params = self._chat_params(messages, stop, **kwargs)
         if chat_params["stream"]:
-            yield from self._client.chat(**chat_params)
+            if self._client:
+                yield from self._client.chat(**chat_params)
         else:
-            yield self._client.chat(**chat_params)
+            if self._client:
+                yield self._client.chat(**chat_params)
     def _chat_stream_with_aggregation(
         self,
         messages: list[BaseMessage],
         stop: Optional[list[str]] = None,
         run_manager: Optional[CallbackManagerForLLMRun] = None,
-        verbose: bool = False,
+        verbose: bool = False,  # noqa: FBT001, FBT002
         **kwargs: Any,
     ) -> ChatGenerationChunk:
         final_chunk = None
@@ -687,7 +744,8 @@ class ChatOllama(BaseChatModel):
                     verbose=verbose,
                 )
         if final_chunk is None:
-            raise ValueError("No data received from Ollama stream.")
+            msg = "No data received from Ollama stream."
+            raise ValueError(msg)
         return final_chunk
@@ -696,7 +754,7 @@ class ChatOllama(BaseChatModel):
         messages: list[BaseMessage],
         stop: Optional[list[str]] = None,
         run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
-        verbose: bool = False,
+        verbose: bool = False,  # noqa: FBT001, FBT002
         **kwargs: Any,
     ) -> ChatGenerationChunk:
         final_chunk = None
@@ -712,7 +770,8 @@ class ChatOllama(BaseChatModel):
                     verbose=verbose,
                 )
         if final_chunk is None:
-            raise ValueError("No data received from Ollama stream.")
+            msg = "No data received from Ollama stream."
+            raise ValueError(msg)
         return final_chunk
@@ -759,22 +818,35 @@ class ChatOllama(BaseChatModel):
         stop: Optional[list[str]] = None,
         **kwargs: Any,
     ) -> Iterator[ChatGenerationChunk]:
-        is_thinking = False
+        reasoning = kwargs.get("reasoning", self.reasoning)
         for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
             if not isinstance(stream_resp, str):
                 if stream_resp.get("done") is True:
                     generation_info = dict(stream_resp)
+                    if "model" in generation_info:
+                        generation_info["model_name"] = generation_info["model"]
                     _ = generation_info.pop("message", None)
                 else:
                     generation_info = None
+                content = (
+                    stream_resp["message"]["content"]
+                    if "message" in stream_resp and "content" in stream_resp["message"]
+                    else ""
+                )
+                additional_kwargs = {}
+                if (
+                    reasoning
+                    and "message" in stream_resp
+                    and (thinking_content := stream_resp["message"].get("thinking"))
+                ):
+                    additional_kwargs["reasoning_content"] = thinking_content
                 chunk = ChatGenerationChunk(
                     message=AIMessageChunk(
-                        content=(
-                            stream_resp["message"]["content"]
-                            if "message" in stream_resp
-                            and "content" in stream_resp["message"]
-                            else ""
-                        ),
+                        content=content,
+                        additional_kwargs=additional_kwargs,
                         usage_metadata=_get_usage_metadata_from_generation_info(
                             stream_resp
                         ),
@@ -782,15 +854,7 @@ class ChatOllama(BaseChatModel):
                     ),
                     generation_info=generation_info,
                 )
-                if chunk.generation_info and (
-                    model := chunk.generation_info.get("model")
-                ):
-                    chunk.generation_info["model_name"] = model  # backwards compat
-                if self.extract_reasoning:
-                    message, is_thinking = self._extract_reasoning(
-                        chunk.message, is_thinking
-                    )
-                    chunk.message = message
                 yield chunk
     def _stream(
@@ -814,22 +878,35 @@ class ChatOllama(BaseChatModel):
         stop: Optional[list[str]] = None,
         **kwargs: Any,
     ) -> AsyncIterator[ChatGenerationChunk]:
-        is_thinking = False
+        reasoning = kwargs.get("reasoning", self.reasoning)
         async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
             if not isinstance(stream_resp, str):
                 if stream_resp.get("done") is True:
                     generation_info = dict(stream_resp)
+                    if "model" in generation_info:
+                        generation_info["model_name"] = generation_info["model"]
                     _ = generation_info.pop("message", None)
                 else:
                     generation_info = None
+                content = (
+                    stream_resp["message"]["content"]
+                    if "message" in stream_resp and "content" in stream_resp["message"]
+                    else ""
+                )
+                additional_kwargs = {}
+                if (
+                    reasoning
+                    and "message" in stream_resp
+                    and (thinking_content := stream_resp["message"].get("thinking"))
+                ):
+                    additional_kwargs["reasoning_content"] = thinking_content
                 chunk = ChatGenerationChunk(
                     message=AIMessageChunk(
-                        content=(
-                            stream_resp["message"]["content"]
-                            if "message" in stream_resp
-                            and "content" in stream_resp["message"]
-                            else ""
-                        ),
+                        content=content,
+                        additional_kwargs=additional_kwargs,
                         usage_metadata=_get_usage_metadata_from_generation_info(
                             stream_resp
                         ),
@@ -837,15 +914,7 @@ class ChatOllama(BaseChatModel):
                     ),
                     generation_info=generation_info,
                 )
-                if chunk.generation_info and (
-                    model := chunk.generation_info.get("model")
-                ):
-                    chunk.generation_info["model_name"] = model  # backwards compat
-                if self.extract_reasoning:
-                    message, is_thinking = self._extract_reasoning(
-                        chunk.message, is_thinking
-                    )
-                    chunk.message = message
                 yield chunk
     async def _astream(
@@ -894,7 +963,7 @@ class ChatOllama(BaseChatModel):
         self,
         tools: Sequence[Union[dict[str, Any], type, Callable, BaseTool]],
         *,
-        tool_choice: Optional[Union[dict, str, Literal["auto", "any"], bool]] = None,
+        tool_choice: Optional[Union[dict, str, Literal["auto", "any"], bool]] = None,  # noqa: PYI051
         **kwargs: Any,
     ) -> Runnable[LanguageModelInput, BaseMessage]:
         """Bind tool-like objects to this chat model.
@@ -909,7 +978,7 @@ class ChatOllama(BaseChatModel):
                 is currently ignored as it is not supported by Ollama.**
             kwargs: Any additional parameters are passed directly to
                 ``self.bind(**kwargs)``.
-        """  # noqa: E501
+        """
         formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
         return super().bind(tools=formatted_tools, **kwargs)
@@ -942,7 +1011,7 @@ class ChatOllama(BaseChatModel):
             method: The method for steering model generation, one of:
                 - "json_schema":
-                    Uses Ollama's structured output API: https://ollama.com/blog/structured-outputs
+                    Uses Ollama's `structured output API <https://ollama.com/blog/structured-outputs>`__
                 - "function_calling":
                     Uses Ollama's tool-calling API
                 - "json_mode":
@@ -1166,14 +1235,16 @@ class ChatOllama(BaseChatModel):
         """  # noqa: E501, D301
         _ = kwargs.pop("strict", None)
         if kwargs:
-            raise ValueError(f"Received unsupported arguments {kwargs}")
+            msg = f"Received unsupported arguments {kwargs}"
+            raise ValueError(msg)
         is_pydantic_schema = _is_pydantic_class(schema)
         if method == "function_calling":
             if schema is None:
-                raise ValueError(
+                msg = (
                     "schema must be specified when method is not 'json_mode'. "
                     "Received None."
                 )
+                raise ValueError(msg)
             formatted_tool = convert_to_openai_tool(schema)
             tool_name = formatted_tool["function"]["name"]
             llm = self.bind_tools(
@@ -1208,10 +1279,11 @@ class ChatOllama(BaseChatModel):
             )
         elif method == "json_schema":
             if schema is None:
-                raise ValueError(
+                msg = (
                     "schema must be specified when method is not 'json_mode'. "
                     "Received None."
                 )
+                raise ValueError(msg)
             if is_pydantic_schema:
                 schema = cast(TypeBaseModel, schema)
                 if issubclass(schema, BaseModelV1):
@@ -1225,7 +1297,7 @@ class ChatOllama(BaseChatModel):
                         "schema": schema,
                     },
                 )
-                output_parser = PydanticOutputParser(pydantic_object=schema)
+                output_parser = PydanticOutputParser(pydantic_object=schema)  # type: ignore[arg-type]
             else:
                 if is_typeddict(schema):
                     response_format = convert_to_json_schema(schema)
@@ -1245,10 +1317,11 @@ class ChatOllama(BaseChatModel):
                 )
                 output_parser = JsonOutputParser()
         else:
-            raise ValueError(
+            msg = (
                 f"Unrecognized method argument. Expected one of 'function_calling', "
                 f"'json_schema', or 'json_mode'. Received: '{method}'"
             )
+            raise ValueError(msg)
         if include_raw:
             parser_assign = RunnablePassthrough.assign(
@@ -1259,5 +1332,4 @@ class ChatOllama(BaseChatModel):
                 [parser_none], exception_key="parsing_error"
             )
             return RunnableMap(raw=llm) | parser_with_fallback
-        else:
-            return llm | output_parser
+        return llm | output_parser

langchain_ollama/embeddings.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """Ollama embeddings models."""
+from __future__ import annotations
 from typing import Any, Optional
 from langchain_core.embeddings import Embeddings
@@ -12,6 +14,8 @@ from pydantic import (
 )
 from typing_extensions import Self
+from ._utils import validate_model
 class OllamaEmbeddings(BaseModel, Embeddings):
     """Ollama embedding model integration.
@@ -95,7 +99,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
     Embed multiple texts:
         .. code-block:: python
-             input_texts = ["Document 1...", "Document 2..."]
+            input_texts = ["Document 1...", "Document 2..."]
             vectors = embed.embed_documents(input_texts)
             print(len(vectors))
             # The first 3 coordinates for the first vector
@@ -110,7 +114,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
         .. code-block:: python
             vector = await embed.aembed_query(input_text)
-           print(vector[:3])
+            print(vector[:3])
             # multiple:
             # await embed.aembed_documents(input_texts)
@@ -123,34 +127,38 @@ class OllamaEmbeddings(BaseModel, Embeddings):
     model: str
     """Model name to use."""
+    validate_model_on_init: bool = False
+    """Whether to validate the model exists in ollama locally on initialization."""
     base_url: Optional[str] = None
     """Base url the model is hosted under."""
     client_kwargs: Optional[dict] = {}
-    """Additional kwargs to pass to the httpx clients.
+    """Additional kwargs to pass to the httpx clients.
     These arguments are passed to both synchronous and async clients.
     Use sync_client_kwargs and async_client_kwargs to pass different arguments
     to synchronous and asynchronous clients.
     """
     async_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before
-    passing to the httpx AsyncClient.
-    For a full list of the params, see [this link](https://www.python-httpx.org/api/#asyncclient)
+    """Additional kwargs to merge with client_kwargs before passing to the httpx
+    AsyncClient.
+    For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
     """
     sync_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before
-    passing to the httpx Client.
-    For a full list of the params, see [this link](https://www.python-httpx.org/api/#client)
+    """Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
+    For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
     """
-    _client: Client = PrivateAttr(default=None)  # type: ignore
+    _client: Optional[Client] = PrivateAttr(default=None)
     """
     The client to use for making requests.
     """
-    _async_client: AsyncClient = PrivateAttr(default=None)  # type: ignore
+    _async_client: Optional[AsyncClient] = PrivateAttr(default=None)
     """
     The async client to use for making requests.
     """
@@ -258,14 +266,21 @@ class OllamaEmbeddings(BaseModel, Embeddings):
         self._client = Client(host=self.base_url, **sync_client_kwargs)
         self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
+        if self.validate_model_on_init:
+            validate_model(self._client, self.model)
         return self
     def embed_documents(self, texts: list[str]) -> list[list[float]]:
         """Embed search docs."""
-        embedded_docs = self._client.embed(
+        if not self._client:
+            msg = (
+                "Ollama client is not initialized. "
+                "Please ensure Ollama is running and the model is loaded."
+            )
+            raise ValueError(msg)
+        return self._client.embed(
             self.model, texts, options=self._default_params, keep_alive=self.keep_alive
         )["embeddings"]
-        return embedded_docs
     def embed_query(self, text: str) -> list[float]:
         """Embed query text."""
@@ -273,12 +288,17 @@ class OllamaEmbeddings(BaseModel, Embeddings):
     async def aembed_documents(self, texts: list[str]) -> list[list[float]]:
         """Embed search docs."""
-        embedded_docs = (
+        if not self._async_client:
+            msg = (
+                "Ollama client is not initialized. "
+                "Please ensure Ollama is running and the model is loaded."
+            )
+            raise ValueError(msg)
+        return (
             await self._async_client.embed(
                 self.model, texts, keep_alive=self.keep_alive
             )
         )["embeddings"]
-        return embedded_docs
     async def aembed_query(self, text: str) -> list[float]:
         """Embed query text."""

langchain_ollama/llms.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """Ollama large language models."""
+from __future__ import annotations
 from collections.abc import AsyncIterator, Iterator, Mapping
 from typing import (
     Any,
@@ -18,6 +20,8 @@ from ollama import AsyncClient, Client, Options
 from pydantic import PrivateAttr, model_validator
 from typing_extensions import Self
+from ._utils import validate_model
 class OllamaLLM(BaseLLM):
     """OllamaLLM large language models.
@@ -28,12 +32,29 @@ class OllamaLLM(BaseLLM):
             from langchain_ollama import OllamaLLM
             model = OllamaLLM(model="llama3")
-            model.invoke("Come up with 10 names for a song about parrots")
+            print(model.invoke("Come up with 10 names for a song about parrots"))
     """
     model: str
     """Model name to use."""
+    reasoning: Optional[bool] = None
+    """Controls the reasoning/thinking mode for
+    `supported models <https://ollama.com/search?c=thinking>`__.
+    - ``True``: Enables reasoning mode. The model's reasoning process will be
+      captured and returned separately in the ``additional_kwargs`` of the
+      response message, under ``reasoning_content``. The main response
+      content will not include the reasoning tags.
+    - ``False``: Disables reasoning mode. The model will not perform any reasoning,
+      and the response will not include any reasoning content.
+    - ``None`` (Default): The model will use its default reasoning behavior. If
+      the model performs reasoning, the ``<think>`` and ``</think>`` tags will
+      be present directly within the main response content."""
+    validate_model_on_init: bool = False
+    """Whether to validate the model exists in ollama locally on initialization."""
     mirostat: Optional[int] = None
     """Enable Mirostat sampling for controlling perplexity.
     (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
@@ -51,7 +72,7 @@ class OllamaLLM(BaseLLM):
     num_ctx: Optional[int] = None
     """Sets the size of the context window used to generate the
-    next token. (Default: 2048)	"""
+    next token. (Default: 2048)"""
     num_gpu: Optional[int] = None
     """The number of GPUs to use. On macOS it defaults to 1 to
@@ -113,30 +134,31 @@ class OllamaLLM(BaseLLM):
     """Base url the model is hosted under."""
     client_kwargs: Optional[dict] = {}
-    """Additional kwargs to pass to the httpx clients.
+    """Additional kwargs to pass to the httpx clients.
     These arguments are passed to both synchronous and async clients.
     Use sync_client_kwargs and async_client_kwargs to pass different arguments
     to synchronous and asynchronous clients.
     """
     async_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before
-    passing to the httpx AsyncClient.
-    For a full list of the params, see [this link](https://www.python-httpx.org/api/#asyncclient)
+    """Additional kwargs to merge with client_kwargs before passing to the HTTPX
+    AsyncClient.
+    For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
     """
     sync_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before
-    passing to the httpx Client.
-    For a full list of the params, see [this link](https://www.python-httpx.org/api/#client)
+    """Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
+    For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
     """
-    _client: Client = PrivateAttr(default=None)  # type: ignore
+    _client: Optional[Client] = PrivateAttr(default=None)
     """
     The client to use for making requests.
     """
-    _async_client: AsyncClient = PrivateAttr(default=None)  # type: ignore
+    _async_client: Optional[AsyncClient] = PrivateAttr(default=None)
     """
     The async client to use for making requests.
     """
@@ -148,8 +170,9 @@ class OllamaLLM(BaseLLM):
         **kwargs: Any,
     ) -> dict[str, Any]:
         if self.stop is not None and stop is not None:
-            raise ValueError("`stop` found in both the input and default params.")
-        elif self.stop is not None:
+            msg = "`stop` found in both the input and default params."
+            raise ValueError(msg)
+        if self.stop is not None:
             stop = self.stop
         options_dict = kwargs.pop(
@@ -173,18 +196,17 @@ class OllamaLLM(BaseLLM):
             },
         )
-        params = {
+        return {
             "prompt": prompt,
             "stream": kwargs.pop("stream", True),
             "model": kwargs.pop("model", self.model),
+            "think": kwargs.pop("reasoning", self.reasoning),
             "format": kwargs.pop("format", self.format),
             "options": Options(**options_dict),
             "keep_alive": kwargs.pop("keep_alive", self.keep_alive),
             **kwargs,
         }
-        return params
     @property
     def _llm_type(self) -> str:
         """Return type of LLM."""
@@ -214,6 +236,8 @@ class OllamaLLM(BaseLLM):
         self._client = Client(host=self.base_url, **sync_client_kwargs)
         self._async_client = AsyncClient(host=self.base_url, **async_client_kwargs)
+        if self.validate_model_on_init:
+            validate_model(self._client, self.model)
         return self
     async def _acreate_generate_stream(
@@ -222,10 +246,11 @@ class OllamaLLM(BaseLLM):
         stop: Optional[list[str]] = None,
         **kwargs: Any,
     ) -> AsyncIterator[Union[Mapping[str, Any], str]]:
-        async for part in await self._async_client.generate(
-            **self._generate_params(prompt, stop=stop, **kwargs)
-        ):  # type: ignore
-            yield part  # type: ignore
+        if self._async_client:
+            async for part in await self._async_client.generate(
+                **self._generate_params(prompt, stop=stop, **kwargs)
+            ):
+                yield part
     def _create_generate_stream(
         self,
@@ -233,23 +258,27 @@ class OllamaLLM(BaseLLM):
         stop: Optional[list[str]] = None,
         **kwargs: Any,
     ) -> Iterator[Union[Mapping[str, Any], str]]:
-        yield from self._client.generate(
-            **self._generate_params(prompt, stop=stop, **kwargs)
-        )  # type: ignore
+        if self._client:
+            yield from self._client.generate(
+                **self._generate_params(prompt, stop=stop, **kwargs)
+            )
     async def _astream_with_aggregation(
         self,
         prompt: str,
         stop: Optional[list[str]] = None,
         run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
-        verbose: bool = False,
+        verbose: bool = False,  # noqa: FBT001, FBT002
         **kwargs: Any,
     ) -> GenerationChunk:
         final_chunk = None
+        thinking_content = ""
         async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
             if not isinstance(stream_resp, str):
+                if stream_resp.get("thinking"):
+                    thinking_content += stream_resp["thinking"]
                 chunk = GenerationChunk(
-                    text=stream_resp["response"] if "response" in stream_resp else "",
+                    text=stream_resp.get("response", ""),
                     generation_info=(
                         dict(stream_resp) if stream_resp.get("done") is True else None
                     ),
@@ -265,7 +294,14 @@ class OllamaLLM(BaseLLM):
                         verbose=verbose,
                     )
         if final_chunk is None:
-            raise ValueError("No data received from Ollama stream.")
+            msg = "No data received from Ollama stream."
+            raise ValueError(msg)
+        if thinking_content:
+            if final_chunk.generation_info:
+                final_chunk.generation_info["thinking"] = thinking_content
+            else:
+                final_chunk.generation_info = {"thinking": thinking_content}
         return final_chunk
@@ -274,14 +310,17 @@ class OllamaLLM(BaseLLM):
         prompt: str,
         stop: Optional[list[str]] = None,
         run_manager: Optional[CallbackManagerForLLMRun] = None,
-        verbose: bool = False,
+        verbose: bool = False,  # noqa: FBT001, FBT002
         **kwargs: Any,
     ) -> GenerationChunk:
         final_chunk = None
+        thinking_content = ""
         for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
             if not isinstance(stream_resp, str):
+                if stream_resp.get("thinking"):
+                    thinking_content += stream_resp["thinking"]
                 chunk = GenerationChunk(
-                    text=stream_resp["response"] if "response" in stream_resp else "",
+                    text=stream_resp.get("response", ""),
                     generation_info=(
                         dict(stream_resp) if stream_resp.get("done") is True else None
                     ),
@@ -297,7 +336,14 @@ class OllamaLLM(BaseLLM):
                         verbose=verbose,
                     )
         if final_chunk is None:
-            raise ValueError("No data received from Ollama stream.")
+            msg = "No data received from Ollama stream."
+            raise ValueError(msg)
+        if thinking_content:
+            if final_chunk.generation_info:
+                final_chunk.generation_info["thinking"] = thinking_content
+            else:
+                final_chunk.generation_info = {"thinking": thinking_content}
         return final_chunk
@@ -346,13 +392,22 @@ class OllamaLLM(BaseLLM):
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> Iterator[GenerationChunk]:
+        reasoning = kwargs.get("reasoning", self.reasoning)
         for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
             if not isinstance(stream_resp, str):
+                additional_kwargs = {}
+                if reasoning and (thinking_content := stream_resp.get("thinking")):
+                    additional_kwargs["reasoning_content"] = thinking_content
                 chunk = GenerationChunk(
                     text=(stream_resp.get("response", "")),
-                    generation_info=(
-                        dict(stream_resp) if stream_resp.get("done") is True else None
-                    ),
+                    generation_info={
+                        "finish_reason": self.stop,
+                        **additional_kwargs,
+                        **(
+                            dict(stream_resp) if stream_resp.get("done") is True else {}
+                        ),
+                    },
                 )
                 if run_manager:
                     run_manager.on_llm_new_token(
@@ -368,13 +423,22 @@ class OllamaLLM(BaseLLM):
         run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> AsyncIterator[GenerationChunk]:
+        reasoning = kwargs.get("reasoning", self.reasoning)
         async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
             if not isinstance(stream_resp, str):
+                additional_kwargs = {}
+                if reasoning and (thinking_content := stream_resp.get("thinking")):
+                    additional_kwargs["reasoning_content"] = thinking_content
                 chunk = GenerationChunk(
                     text=(stream_resp.get("response", "")),
-                    generation_info=(
-                        dict(stream_resp) if stream_resp.get("done") is True else None
-                    ),
+                    generation_info={
+                        "finish_reason": self.stop,
+                        **additional_kwargs,
+                        **(
+                            dict(stream_resp) if stream_resp.get("done") is True else {}
+                        ),
+                    },
                 )
                 if run_manager:
                     await run_manager.on_llm_new_token(

{langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/METADATA RENAMED Viewed

@@ -1,14 +1,14 @@
 Metadata-Version: 2.1
 Name: langchain-ollama
-Version: 0.3.3
+Version: 0.3.4
 Summary: An integration package connecting Ollama and LangChain
 License: MIT
 Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
 Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
 Project-URL: repository, https://github.com/langchain-ai/langchain
 Requires-Python: >=3.9
-Requires-Dist: ollama<1.0.0,>=0.4.8
-Requires-Dist: langchain-core<1.0.0,>=0.3.60
+Requires-Dist: ollama<1.0.0,>=0.5.1
+Requires-Dist: langchain-core<1.0.0,>=0.3.68
 Description-Content-Type: text/markdown
 # langchain-ollama
@@ -21,37 +21,48 @@ This package contains the LangChain integration with Ollama
 pip install -U langchain-ollama
 ```
-You will also need to run the Ollama server locally.
-You can download it [here](https://ollama.com/download).
+For the package to work, you will need to install and run the Ollama server locally ([download](https://ollama.com/download)).
-## Chat Models
+To run integration tests (`make integration_tests`), you will need the following models installed in your Ollama server:
+- `llama3.1`
+- `deepseek-r1:1.5b`
+Install these models by running:
+```bash
+ollama pull <name-of-model>
+```
+## [Chat Models](https://python.langchain.com/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html#chatollama)
 `ChatOllama` class exposes chat models from Ollama.
 ```python
 from langchain_ollama import ChatOllama
-llm = ChatOllama(model="llama3-groq-tool-use")
+llm = ChatOllama(model="llama3.1")
 llm.invoke("Sing a ballad of LangChain.")
 ```
-## Embeddings
+## [Embeddings](https://python.langchain.com/api_reference/ollama/embeddings/langchain_ollama.embeddings.OllamaEmbeddings.html#ollamaembeddings)
 `OllamaEmbeddings` class exposes embeddings from Ollama.
 ```python
 from langchain_ollama import OllamaEmbeddings
-embeddings = OllamaEmbeddings(model="llama3")
+embeddings = OllamaEmbeddings(model="llama3.1")
 embeddings.embed_query("What is the meaning of life?")
 ```
-## LLMs
-`OllamaLLM` class exposes LLMs from Ollama.
+## [LLMs](https://python.langchain.com/api_reference/ollama/llms/langchain_ollama.llms.OllamaLLM.html#ollamallm)
+`OllamaLLM` class exposes traditional LLMs from Ollama.
 ```python
 from langchain_ollama import OllamaLLM
-llm = OllamaLLM(model="llama3")
+llm = OllamaLLM(model="llama3.1")
 llm.invoke("The meaning of life is")
 ```

langchain_ollama-0.3.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+langchain_ollama-0.3.4.dist-info/METADATA,sha256=wM54qEosykpO89kExse0V4Y3K3ncspLP_mFNKsBxTNY,2072
+langchain_ollama-0.3.4.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
+langchain_ollama-0.3.4.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
+langchain_ollama-0.3.4.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
+langchain_ollama/__init__.py,sha256=TI1gI0Wpg7mRXehGpxrJG2flF_t4Ev-aIJlLKV-CgL0,633
+langchain_ollama/_utils.py,sha256=dmFO4tSvDTeMALc89QnTBLNWPMZL0eNAq1EDwuMjRA8,1416
+langchain_ollama/chat_models.py,sha256=olz3KJeLG1vk47Xl38nN9bP4bcol5cBQnPnu5MyP8k8,55539
+langchain_ollama/embeddings.py,sha256=VprOFiBRuUPGEygoIfxvAZStUsqRj65ZNMpkvCAo_9Y,10239
+langchain_ollama/llms.py,sha256=PSJ-VQMocp1nm-pgtnKnozidt66RKJiEnhdzftoLNNc,16778
+langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+langchain_ollama-0.3.4.dist-info/RECORD,,

{langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: pdm-backend (2.4.4)
+Generator: pdm-backend (2.4.5)
 Root-Is-Purelib: true
 Tag: py3-none-any

langchain_ollama-0.3.3.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-langchain_ollama-0.3.3.dist-info/METADATA,sha256=K2QhMD3eEMIMegVdXf6ZyQ7C5fbl2wQ1CvvqtUOmyug,1462
-langchain_ollama-0.3.3.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
-langchain_ollama-0.3.3.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
-langchain_ollama-0.3.3.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
-langchain_ollama/__init__.py,sha256=1f8Cyf1_bS0CT16U8-Os1P1Oa3erIDtIBTH4KVmBLvY,633
-langchain_ollama/chat_models.py,sha256=Z2wzR5R568aNyH1LKN84kUdNZFOvvgY-csE626_sBVc,51723
-langchain_ollama/embeddings.py,sha256=udL26XHdUMybQogY9Gj3vlJXxxkVAVZ-9He2U8wlJ3k,9547
-langchain_ollama/llms.py,sha256=Rin6HVZvrH1epRsjhojSmOBFWAaU0cfOU1gV6I0bqJE,13933
-langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-langchain_ollama-0.3.3.dist-info/RECORD,,

{langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langchain_ollama-0.3.3.dist-info → langchain_ollama-0.3.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langchain-ollama 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

langchain-ollama 0.3.3py3-none-any.whl → 0.3.4py3-none-any.whl