PyPI - langchain-ollama - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

langchain-ollama 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

langchain_ollama/__init__.py CHANGED Viewed

@@ -1,6 +1,16 @@
 """This is the langchain_ollama package.
-It provides infrastructure for interacting with the Ollama service.
+Provides infrastructure for interacting with the `Ollama <https://ollama.com/>`__
+service.
+.. note::
+    **Newly added in 0.3.4:** ``validate_model_on_init`` param on all models.
+    This parameter allows you to validate the model exists in Ollama locally on
+    initialization. If set to ``True``, it will raise an error if the model does not
+    exist locally. This is useful for ensuring that the model is available before
+    attempting to use it, especially in environments where models may not be
+    pre-downloaded.
 """
 from importlib import metadata
@@ -10,6 +20,8 @@ from langchain_ollama.embeddings import OllamaEmbeddings
 from langchain_ollama.llms import OllamaLLM
 try:
+    if __package__ is None:
+        raise metadata.PackageNotFoundError
     __version__ = metadata.version(__package__)
 except metadata.PackageNotFoundError:
     # Case where package metadata is not available.

langchain_ollama/_utils.py CHANGED Viewed

@@ -1,11 +1,11 @@
-"""Utility functions for validating Ollama models."""
+"""Utility function to validate Ollama models."""
 from httpx import ConnectError
 from ollama import Client, ResponseError
 def validate_model(client: Client, model_name: str) -> None:
-    """Validate that a model exists in the Ollama instance.
+    """Validate that a model exists in the local Ollama instance.
     Args:
         client: The Ollama client.
@@ -29,7 +29,10 @@ def validate_model(client: Client, model_name: str) -> None:
             )
             raise ValueError(msg)
     except ConnectError as e:
-        msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download"  # noqa: E501
+        msg = (
+            "Failed to connect to Ollama. Please check that Ollama is downloaded, "
+            "running and accessible. https://ollama.com/download"
+        )
         raise ValueError(msg) from e
     except ResponseError as e:
         msg = (

langchain_ollama/chat_models.py CHANGED Viewed

@@ -2,7 +2,9 @@
 from __future__ import annotations
+import ast
 import json
+import logging
 from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
 from operator import itemgetter
 from typing import (
@@ -57,6 +59,8 @@ from typing_extensions import Self, is_typeddict
 from ._utils import validate_model
+log = logging.getLogger(__name__)
 def _get_usage_metadata_from_generation_info(
     generation_info: Optional[Mapping[str, Any]],
@@ -77,33 +81,46 @@ def _get_usage_metadata_from_generation_info(
 def _parse_json_string(
     json_string: str,
+    *,
     raw_tool_call: dict[str, Any],
-    skip: bool,  # noqa: FBT001
+    skip: bool,
 ) -> Any:
     """Attempt to parse a JSON string for tool calling.
+    It first tries to use the standard ``json.loads``. If that fails, it falls
+    back to ``ast.literal_eval`` to safely parse Python literals, which is more
+    robust against models using single quotes or containing apostrophes.
     Args:
         json_string: JSON string to parse.
-        skip: Whether to ignore parsing errors and return the value anyways.
         raw_tool_call: Raw tool call to include in error message.
+        skip: Whether to ignore parsing errors and return the value anyways.
     Returns:
-        The parsed JSON string.
+        The parsed JSON string or Python literal.
     Raises:
-        OutputParserException: If the JSON string wrong invalid and skip=False.
+        OutputParserException: If the string is invalid and ``skip=False``.
     """
     try:
         return json.loads(json_string)
-    except json.JSONDecodeError as e:
-        if skip:
-            return json_string
-        msg = (
-            f"Function {raw_tool_call['function']['name']} arguments:\n\n"
-            f"{raw_tool_call['function']['arguments']}\n\nare not valid JSON. "
-            f"Received JSONDecodeError {e}"
-        )
-        raise OutputParserException(msg) from e
+    except json.JSONDecodeError:
+        try:
+            # Use ast.literal_eval to safely parse Python-style dicts
+            # (e.g. with single quotes)
+            return ast.literal_eval(json_string)
+        except (SyntaxError, ValueError) as e:
+            # If both fail, and we're not skipping, raise an informative error.
+            if skip:
+                return json_string
+            msg = (
+                f"Function {raw_tool_call['function']['name']} arguments:\n\n"
+                f"{raw_tool_call['function']['arguments']}"
+                "\n\nare not valid JSON or a Python literal. "
+                f"Received error: {e}"
+            )
+            raise OutputParserException(msg) from e
     except TypeError as e:
         if skip:
             return json_string
@@ -122,14 +139,20 @@ def _parse_arguments_from_tool_call(
     Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
     Should be removed/changed if fixed upstream.
     See https://github.com/ollama/ollama/issues/6155
     """
     if "function" not in raw_tool_call:
         return None
+    function_name = raw_tool_call["function"]["name"]
     arguments = raw_tool_call["function"]["arguments"]
     parsed_arguments: dict = {}
     if isinstance(arguments, dict):
         for key, value in arguments.items():
+            # Filter out metadata fields like 'functionName' that echo function name
+            if key == "functionName" and value == function_name:
+                continue
             if isinstance(value, str):
                 parsed_value = _parse_json_string(
                     value, skip=True, raw_tool_call=raw_tool_call
@@ -206,7 +229,7 @@ class ChatOllama(BaseChatModel):
         .. code-block:: bash
-            ollama pull mistral:v0.3
+            ollama pull gpt-oss:20b
             pip install -U langchain-ollama
     Key init args — completion params:
@@ -239,7 +262,8 @@ class ChatOllama(BaseChatModel):
             from langchain_ollama import ChatOllama
             llm = ChatOllama(
-                model = "llama3",
+                model = "gpt-oss:20b",
+                validate_model_on_init = True,
                 temperature = 0.8,
                 num_predict = 256,
                 # other params ...
@@ -261,10 +285,7 @@ class ChatOllama(BaseChatModel):
     Stream:
         .. code-block:: python
-            messages = [
-                ("human", "Return the words Hello World!"),
-            ]
-            for chunk in llm.stream(messages):
+            for chunk in llm.stream("Return the words Hello World!"):
                 print(chunk.text(), end="")
@@ -291,10 +312,7 @@ class ChatOllama(BaseChatModel):
     Async:
         .. code-block:: python
-            messages = [
-                ("human", "Hello how are you!"),
-            ]
-            await llm.ainvoke(messages)
+            await llm.ainvoke("Hello how are you!")
         .. code-block:: python
@@ -302,10 +320,7 @@ class ChatOllama(BaseChatModel):
         .. code-block:: python
-            messages = [
-                ("human", "Say hello world!"),
-            ]
-            async for chunk in llm.astream(messages):
+            async for chunk in llm.astream("Say hello world!"):
                 print(chunk.content)
         .. code-block:: python
@@ -333,10 +348,7 @@ class ChatOllama(BaseChatModel):
             json_llm = ChatOllama(format="json")
-            messages = [
-                ("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."),
-            ]
-            llm.invoke(messages).content
+            llm.invoke("Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only.").content
         .. code-block:: python
@@ -383,17 +395,16 @@ class ChatOllama(BaseChatModel):
             llm = ChatOllama(
                 model = "deepseek-r1:8b",
+                validate_model_on_init = True,
                 reasoning= True,
             )
-            user_message = HumanMessage(content="how many r in the word strawberry?")
-            messages: List[Any] = [user_message]
-            llm.invoke(messages)
+            llm.invoke("how many r in the word strawberry?")
             # or, on an invocation basis:
-            llm.invoke(messages, reasoning=True)
-            # or llm.stream(messages, reasoning=True)
+            llm.invoke("how many r in the word strawberry?", reasoning=True)
+            # or llm.stream("how many r in the word strawberry?", reasoning=True)
             # If not provided, the invocation will default to the ChatOllama reasoning
             # param provided (None by default).
@@ -402,13 +413,12 @@ class ChatOllama(BaseChatModel):
             AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
     """  # noqa: E501, pylint: disable=line-too-long
     model: str
     """Model name to use."""
-    reasoning: Optional[bool] = None
+    reasoning: Optional[Union[bool, str]] = None
     """Controls the reasoning/thinking mode for
     `supported models <https://ollama.com/search?c=thinking>`__.
@@ -421,33 +431,42 @@ class ChatOllama(BaseChatModel):
     - ``None`` (Default): The model will use its default reasoning behavior. Note
       however, if the model's default behavior *is* to perform reasoning, think tags
       ()``<think>`` and ``</think>``) will be present within the main response content
-      unless you set ``reasoning`` to ``True``."""
+      unless you set ``reasoning`` to ``True``.
+    - ``str``: e.g. ``'low'``, ``'medium'``, ``'high'``. Enables reasoning with a custom
+      intensity level. Currently, this is only supported ``gpt-oss``. See the
+      `Ollama docs <https://github.com/ollama/ollama-python/blob/da79e987f0ac0a4986bf396f043b36ef840370bc/ollama/_types.py#L210>`__
+      for more information.
+    """
     validate_model_on_init: bool = False
-    """Whether to validate the model exists in Ollama locally on initialization."""
+    """Whether to validate the model exists in Ollama locally on initialization.
+    .. versionadded:: 0.3.4
+    """
     mirostat: Optional[int] = None
     """Enable Mirostat sampling for controlling perplexity.
-    (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
+    (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
     mirostat_eta: Optional[float] = None
     """Influences how quickly the algorithm responds to feedback
     from the generated text. A lower learning rate will result in
     slower adjustments, while a higher learning rate will make
-    the algorithm more responsive. (Default: 0.1)"""
+    the algorithm more responsive. (Default: ``0.1``)"""
     mirostat_tau: Optional[float] = None
     """Controls the balance between coherence and diversity
     of the output. A lower value will result in more focused and
-    coherent text. (Default: 5.0)"""
+    coherent text. (Default: ``5.0``)"""
     num_ctx: Optional[int] = None
     """Sets the size of the context window used to generate the
-    next token. (Default: 2048)	"""
+    next token. (Default: ``2048``)	"""
     num_gpu: Optional[int] = None
-    """The number of GPUs to use. On macOS it defaults to 1 to
-    enable metal support, 0 to disable."""
+    """The number of GPUs to use. On macOS it defaults to ``1`` to
+    enable metal support, ``0`` to disable."""
     num_thread: Optional[int] = None
     """Sets the number of threads to use during computation.
@@ -457,20 +476,20 @@ class ChatOllama(BaseChatModel):
     num_predict: Optional[int] = None
     """Maximum number of tokens to predict when generating text.
-    (Default: 128, -1 = infinite generation, -2 = fill context)"""
+    (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
     repeat_last_n: Optional[int] = None
     """Sets how far back for the model to look back to prevent
-    repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
+    repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
     repeat_penalty: Optional[float] = None
-    """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
-    will penalize repetitions more strongly, while a lower value (e.g., 0.9)
-    will be more lenient. (Default: 1.1)"""
+    """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
+    will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
+    will be more lenient. (Default: ``1.1``)"""
     temperature: Optional[float] = None
     """The temperature of the model. Increasing the temperature will
-    make the model answer more creatively. (Default: 0.8)"""
+    make the model answer more creatively. (Default: ``0.8``)"""
     seed: Optional[int] = None
     """Sets the random number seed to use for generation. Setting this
@@ -482,21 +501,21 @@ class ChatOllama(BaseChatModel):
     tfs_z: Optional[float] = None
     """Tail free sampling is used to reduce the impact of less probable
-    tokens from the output. A higher value (e.g., 2.0) will reduce the
-    impact more, while a value of 1.0 disables this setting. (default: 1)"""
+    tokens from the output. A higher value (e.g., ``2.0``) will reduce the
+    impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
     top_k: Optional[int] = None
-    """Reduces the probability of generating nonsense. A higher value (e.g. 100)
-    will give more diverse answers, while a lower value (e.g. 10)
-    will be more conservative. (Default: 40)"""
+    """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
+    will give more diverse answers, while a lower value (e.g. ``10``)
+    will be more conservative. (Default: ``40``)"""
     top_p: Optional[float] = None
-    """Works together with top-k. A higher value (e.g., 0.95) will lead
-    to more diverse text, while a lower value (e.g., 0.5) will
-    generate more focused and conservative text. (Default: 0.9)"""
+    """Works together with top-k. A higher value (e.g., ``0.95``) will lead
+    to more diverse text, while a lower value (e.g., ``0.5``) will
+    generate more focused and conservative text. (Default: ``0.9``)"""
     format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
-    """Specify the format of the output (options: "json", JSON schema)."""
+    """Specify the format of the output (options: ``'json'``, JSON schema)."""
     keep_alive: Optional[Union[int, str]] = None
     """How long the model will stay loaded into memory."""
@@ -506,32 +525,35 @@ class ChatOllama(BaseChatModel):
     client_kwargs: Optional[dict] = {}
     """Additional kwargs to pass to the httpx clients.
     These arguments are passed to both synchronous and async clients.
-    Use sync_client_kwargs and async_client_kwargs to pass different arguments
+    Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
     to synchronous and asynchronous clients.
     """
     async_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before
+    """Additional kwargs to merge with ``client_kwargs`` before
     passing to the httpx AsyncClient.
     `Full list of params. <https://www.python-httpx.org/api/#asyncclient>`__
     """
     sync_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before
+    """Additional kwargs to merge with ``client_kwargs`` before
     passing to the httpx Client.
     `Full list of params. <https://www.python-httpx.org/api/#client>`__
     """
     _client: Client = PrivateAttr()
-    """
-    The client to use for making requests.
-    """
+    """The client to use for making requests."""
     _async_client: AsyncClient = PrivateAttr()
-    """
-    The async client to use for making requests.
-    """
+    """The async client to use for making requests."""
     def _chat_params(
         self,
@@ -821,6 +843,28 @@ class ChatOllama(BaseChatModel):
         reasoning = kwargs.get("reasoning", self.reasoning)
         for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
             if not isinstance(stream_resp, str):
+                content = (
+                    stream_resp["message"]["content"]
+                    if "message" in stream_resp and "content" in stream_resp["message"]
+                    else ""
+                )
+                # Warn and skip responses with done_reason: 'load' and empty content
+                # These indicate the model was loaded but no actual generation occurred
+                is_load_response_with_empty_content = (
+                    stream_resp.get("done") is True
+                    and stream_resp.get("done_reason") == "load"
+                    and not content.strip()
+                )
+                if is_load_response_with_empty_content:
+                    log.warning(
+                        "Ollama returned empty response with done_reason='load'."
+                        "This typically indicates the model was loaded but no content "
+                        "was generated. Skipping this response."
+                    )
+                    continue
                 if stream_resp.get("done") is True:
                     generation_info = dict(stream_resp)
                     if "model" in generation_info:
@@ -829,12 +873,6 @@ class ChatOllama(BaseChatModel):
                 else:
                     generation_info = None
-                content = (
-                    stream_resp["message"]["content"]
-                    if "message" in stream_resp and "content" in stream_resp["message"]
-                    else ""
-                )
                 additional_kwargs = {}
                 if (
                     reasoning
@@ -881,6 +919,28 @@ class ChatOllama(BaseChatModel):
         reasoning = kwargs.get("reasoning", self.reasoning)
         async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
             if not isinstance(stream_resp, str):
+                content = (
+                    stream_resp["message"]["content"]
+                    if "message" in stream_resp and "content" in stream_resp["message"]
+                    else ""
+                )
+                # Warn and skip responses with done_reason: 'load' and empty content
+                # These indicate the model was loaded but no actual generation occurred
+                is_load_response_with_empty_content = (
+                    stream_resp.get("done") is True
+                    and stream_resp.get("done_reason") == "load"
+                    and not content.strip()
+                )
+                if is_load_response_with_empty_content:
+                    log.warning(
+                        "Ollama returned empty response with done_reason='load'. "
+                        "This typically indicates the model was loaded but no content "
+                        "was generated. Skipping this response."
+                    )
+                    continue
                 if stream_resp.get("done") is True:
                     generation_info = dict(stream_resp)
                     if "model" in generation_info:
@@ -889,12 +949,6 @@ class ChatOllama(BaseChatModel):
                 else:
                     generation_info = None
-                content = (
-                    stream_resp["message"]["content"]
-                    if "message" in stream_resp and "content" in stream_resp["message"]
-                    else ""
-                )
                 additional_kwargs = {}
                 if (
                     reasoning
@@ -993,8 +1047,7 @@ class ChatOllama(BaseChatModel):
         """Model wrapper that returns outputs formatted to match the given schema.
         Args:
-            schema:
-                The output schema. Can be passed in as:
+            schema: The output schema. Can be passed in as:
                 - a Pydantic class,
                 - a JSON schema
@@ -1010,35 +1063,35 @@ class ChatOllama(BaseChatModel):
             method: The method for steering model generation, one of:
-                - "json_schema":
+                - ``'json_schema'``:
                     Uses Ollama's `structured output API <https://ollama.com/blog/structured-outputs>`__
-                - "function_calling":
+                - ``'function_calling'``:
                     Uses Ollama's tool-calling API
-                - "json_mode":
-                    Specifies ``format="json"``. Note that if using JSON mode then you
+                - ``'json_mode'``:
+                    Specifies ``format='json'``. Note that if using JSON mode then you
                     must include instructions for formatting the output into the
                     desired schema into the model call.
             include_raw:
                 If False then only the parsed structured output is returned. If
                 an error occurs during model output parsing it will be raised. If True
-                then both the raw model response (a BaseMessage) and the parsed model
+                then both the raw model response (a ``BaseMessage``) and the parsed model
                 response will be returned. If an error occurs during output parsing it
                 will be caught and returned as well. The final output is always a dict
-                with keys "raw", "parsed", and "parsing_error".
+                with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
             kwargs: Additional keyword args aren't supported.
         Returns:
             A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
-            | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
+            If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
-            | If ``include_raw`` is True, then Runnable outputs a dict with keys:
+            If ``include_raw`` is True, then Runnable outputs a dict with keys:
-            - "raw": BaseMessage
-            - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
-            - "parsing_error": Optional[BaseException]
+            - ``'raw'``: ``BaseMessage``
+            - ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
+            - ``'parsing_error'``: Optional[BaseException]
         .. versionchanged:: 0.2.2
@@ -1046,7 +1099,7 @@ class ChatOllama(BaseChatModel):
         .. versionchanged:: 0.3.0
-            Updated default ``method`` to ``"json_schema"``.
+            Updated default ``method`` to ``'json_schema'``.
         .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False
@@ -1081,7 +1134,7 @@ class ChatOllama(BaseChatModel):
                 #     justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
                 # )
-        .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=True
+        .. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_schema'``, ``include_raw=True``
             .. code-block:: python
@@ -1110,7 +1163,7 @@ class ChatOllama(BaseChatModel):
                 #     'parsing_error': None
                 # }
-        .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False
+        .. dropdown:: Example: ``schema=Pydantic`` class, ``method='function_calling'``, ``include_raw=False``
             .. code-block:: python
@@ -1174,7 +1227,7 @@ class ChatOllama(BaseChatModel):
                 #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
                 # }
-        .. dropdown:: Example: schema=OpenAI function schema, method="function_calling", include_raw=False
+        .. dropdown:: Example: ``schema=OpenAI`` function schema, ``method='function_calling'``, ``include_raw=False``
             .. code-block:: python
@@ -1204,7 +1257,7 @@ class ChatOllama(BaseChatModel):
                 #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
                 # }
-        .. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True
+        .. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_mode'``, ``include_raw=True``
             .. code-block::
@@ -1232,6 +1285,7 @@ class ChatOllama(BaseChatModel):
                 #     'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
                 #     'parsing_error': None
                 # }
         """  # noqa: E501, D301
         _ = kwargs.pop("strict", None)
         if kwargs:

langchain_ollama/embeddings.py CHANGED Viewed

@@ -21,12 +21,12 @@ class OllamaEmbeddings(BaseModel, Embeddings):
     """Ollama embedding model integration.
     Set up a local Ollama instance:
-        Install the Ollama package and set up a local Ollama instance
-        using the instructions here: https://github.com/ollama/ollama .
+        `Install the Ollama package <https://github.com/ollama/ollama>`__ and set up a
+        local Ollama instance.
         You will need to choose a model to serve.
-        You can view a list of available models via the model library (https://ollama.com/library).
+        You can view a list of available models via `the model library <https://ollama.com/library>`__.
         To fetch a model from the Ollama model library use ``ollama pull <name-of-model>``.
@@ -39,8 +39,8 @@ class OllamaEmbeddings(BaseModel, Embeddings):
         This will download the default tagged version of the model.
         Typically, the default points to the latest, smallest sized-parameter model.
-        * On Mac, the models will be downloaded to ~/.ollama/models
-        * On Linux (or WSL), the models will be stored at /usr/share/ollama/.ollama/models
+        * On Mac, the models will be downloaded to ``~/.ollama/models``
+        * On Linux (or WSL), the models will be stored at ``/usr/share/ollama/.ollama/models``
         You can specify the exact version of the model of interest
         as such ``ollama pull vicuna:13b-v1.5-16k-q4_0``.
@@ -122,73 +122,80 @@ class OllamaEmbeddings(BaseModel, Embeddings):
         .. code-block:: python
             [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188]
     """  # noqa: E501
     model: str
     """Model name to use."""
     validate_model_on_init: bool = False
-    """Whether to validate the model exists in ollama locally on initialization."""
+    """Whether to validate the model exists in ollama locally on initialization.
+    .. versionadded:: 0.3.4
+    """
     base_url: Optional[str] = None
     """Base url the model is hosted under."""
     client_kwargs: Optional[dict] = {}
     """Additional kwargs to pass to the httpx clients.
     These arguments are passed to both synchronous and async clients.
-    Use sync_client_kwargs and async_client_kwargs to pass different arguments
+    Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
     to synchronous and asynchronous clients.
     """
     async_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before passing to the httpx
+    """Additional kwargs to merge with ``client_kwargs`` before passing to the httpx
     AsyncClient.
     For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
     """
     sync_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
+    """Additional kwargs to merge with ``client_kwargs`` before
+    passing to the HTTPX Client.
     For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
     """
     _client: Optional[Client] = PrivateAttr(default=None)
-    """
-    The client to use for making requests.
-    """
+    """The client to use for making requests."""
     _async_client: Optional[AsyncClient] = PrivateAttr(default=None)
-    """
-    The async client to use for making requests.
-    """
+    """The async client to use for making requests."""
     mirostat: Optional[int] = None
     """Enable Mirostat sampling for controlling perplexity.
-    (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
+    (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
     mirostat_eta: Optional[float] = None
     """Influences how quickly the algorithm responds to feedback
     from the generated text. A lower learning rate will result in
     slower adjustments, while a higher learning rate will make
-    the algorithm more responsive. (Default: 0.1)"""
+    the algorithm more responsive. (Default: ``0.1``)"""
     mirostat_tau: Optional[float] = None
     """Controls the balance between coherence and diversity
     of the output. A lower value will result in more focused and
-    coherent text. (Default: 5.0)"""
+    coherent text. (Default: ``5.0``)"""
     num_ctx: Optional[int] = None
     """Sets the size of the context window used to generate the
-    next token. (Default: 2048)	"""
+    next token. (Default: ``2048``)	"""
     num_gpu: Optional[int] = None
-    """The number of GPUs to use. On macOS it defaults to 1 to
-    enable metal support, 0 to disable."""
+    """The number of GPUs to use. On macOS it defaults to ``1`` to
+    enable metal support, ``0`` to disable."""
     keep_alive: Optional[int] = None
-    """controls how long the model will stay loaded into memory
-    following the request (default: 5m)
+    """Controls how long the model will stay loaded into memory
+    following the request (default: ``5m``)
     """
     num_thread: Optional[int] = None
@@ -199,34 +206,34 @@ class OllamaEmbeddings(BaseModel, Embeddings):
     repeat_last_n: Optional[int] = None
     """Sets how far back for the model to look back to prevent
-    repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
+    repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
     repeat_penalty: Optional[float] = None
-    """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
-    will penalize repetitions more strongly, while a lower value (e.g., 0.9)
-    will be more lenient. (Default: 1.1)"""
+    """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
+    will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
+    will be more lenient. (Default: ``1.1``)"""
     temperature: Optional[float] = None
     """The temperature of the model. Increasing the temperature will
-    make the model answer more creatively. (Default: 0.8)"""
+    make the model answer more creatively. (Default: ``0.8``)"""
     stop: Optional[list[str]] = None
     """Sets the stop tokens to use."""
     tfs_z: Optional[float] = None
     """Tail free sampling is used to reduce the impact of less probable
-    tokens from the output. A higher value (e.g., 2.0) will reduce the
-    impact more, while a value of 1.0 disables this setting. (default: 1)"""
+    tokens from the output. A higher value (e.g., ``2.0``) will reduce the
+    impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
     top_k: Optional[int] = None
-    """Reduces the probability of generating nonsense. A higher value (e.g. 100)
-    will give more diverse answers, while a lower value (e.g. 10)
-    will be more conservative. (Default: 40)"""
+    """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
+    will give more diverse answers, while a lower value (e.g. ``10``)
+    will be more conservative. (Default: ``40``)"""
     top_p: Optional[float] = None
-    """Works together with top-k. A higher value (e.g., 0.95) will lead
-    to more diverse text, while a lower value (e.g., 0.5) will
-    generate more focused and conservative text. (Default: 0.9)"""
+    """Works together with top-k. A higher value (e.g., ``0.95``) will lead
+    to more diverse text, while a lower value (e.g., ``0.5``) will
+    generate more focused and conservative text. (Default: ``0.9``)"""
     model_config = ConfigDict(
         extra="forbid",
@@ -253,7 +260,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
     @model_validator(mode="after")
     def _set_clients(self) -> Self:
-        """Set clients to use for ollama."""
+        """Set clients to use for Ollama."""
         client_kwargs = self.client_kwargs or {}
         sync_client_kwargs = client_kwargs

langchain_ollama/llms.py CHANGED Viewed

@@ -33,6 +33,7 @@ class OllamaLLM(BaseLLM):
             model = OllamaLLM(model="llama3")
             print(model.invoke("Come up with 10 names for a song about parrots"))
     """
     model: str
@@ -53,30 +54,33 @@ class OllamaLLM(BaseLLM):
       be present directly within the main response content."""
     validate_model_on_init: bool = False
-    """Whether to validate the model exists in ollama locally on initialization."""
+    """Whether to validate the model exists in ollama locally on initialization.
+    .. versionadded:: 0.3.4
+    """
     mirostat: Optional[int] = None
     """Enable Mirostat sampling for controlling perplexity.
-    (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
+    (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
     mirostat_eta: Optional[float] = None
     """Influences how quickly the algorithm responds to feedback
     from the generated text. A lower learning rate will result in
     slower adjustments, while a higher learning rate will make
-    the algorithm more responsive. (Default: 0.1)"""
+    the algorithm more responsive. (Default: ``0.1``)"""
     mirostat_tau: Optional[float] = None
     """Controls the balance between coherence and diversity
     of the output. A lower value will result in more focused and
-    coherent text. (Default: 5.0)"""
+    coherent text. (Default: ``5.0``)"""
     num_ctx: Optional[int] = None
     """Sets the size of the context window used to generate the
-    next token. (Default: 2048)"""
+    next token. (Default: ``2048``)"""
     num_gpu: Optional[int] = None
-    """The number of GPUs to use. On macOS it defaults to 1 to
-    enable metal support, 0 to disable."""
+    """The number of GPUs to use. On macOS it defaults to ``1`` to
+    enable metal support, ``0`` to disable."""
     num_thread: Optional[int] = None
     """Sets the number of threads to use during computation.
@@ -86,20 +90,20 @@ class OllamaLLM(BaseLLM):
     num_predict: Optional[int] = None
     """Maximum number of tokens to predict when generating text.
-    (Default: 128, -1 = infinite generation, -2 = fill context)"""
+    (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
     repeat_last_n: Optional[int] = None
     """Sets how far back for the model to look back to prevent
-    repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
+    repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
     repeat_penalty: Optional[float] = None
-    """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
-    will penalize repetitions more strongly, while a lower value (e.g., 0.9)
-    will be more lenient. (Default: 1.1)"""
+    """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
+    will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
+    will be more lenient. (Default: ``1.1``)"""
     temperature: Optional[float] = None
     """The temperature of the model. Increasing the temperature will
-    make the model answer more creatively. (Default: 0.8)"""
+    make the model answer more creatively. (Default: ``0.8``)"""
     seed: Optional[int] = None
     """Sets the random number seed to use for generation. Setting this
@@ -111,21 +115,21 @@ class OllamaLLM(BaseLLM):
     tfs_z: Optional[float] = None
     """Tail free sampling is used to reduce the impact of less probable
-    tokens from the output. A higher value (e.g., 2.0) will reduce the
-    impact more, while a value of 1.0 disables this setting. (default: 1)"""
+    tokens from the output. A higher value (e.g., ``2.0``) will reduce the
+    impact more, while a value of 1.0 disables this setting. (default: ``1``)"""
     top_k: Optional[int] = None
-    """Reduces the probability of generating nonsense. A higher value (e.g. 100)
-    will give more diverse answers, while a lower value (e.g. 10)
-    will be more conservative. (Default: 40)"""
+    """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
+    will give more diverse answers, while a lower value (e.g. ``10``)
+    will be more conservative. (Default: ``40``)"""
     top_p: Optional[float] = None
-    """Works together with top-k. A higher value (e.g., 0.95) will lead
-    to more diverse text, while a lower value (e.g., 0.5) will
-    generate more focused and conservative text. (Default: 0.9)"""
+    """Works together with top-k. A higher value (e.g., ``0.95``) will lead
+    to more diverse text, while a lower value (e.g., ``0.5``) will
+    generate more focused and conservative text. (Default: ``0.9``)"""
     format: Literal["", "json"] = ""
-    """Specify the format of the output (options: json)"""
+    """Specify the format of the output (options: ``'json'``)"""
     keep_alive: Optional[Union[int, str]] = None
     """How long the model will stay loaded into memory."""
@@ -135,33 +139,35 @@ class OllamaLLM(BaseLLM):
     client_kwargs: Optional[dict] = {}
     """Additional kwargs to pass to the httpx clients.
     These arguments are passed to both synchronous and async clients.
-    Use sync_client_kwargs and async_client_kwargs to pass different arguments
+    Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
     to synchronous and asynchronous clients.
     """
     async_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before passing to the HTTPX
+    """Additional kwargs to merge with ``client_kwargs`` before passing to the HTTPX
     AsyncClient.
     For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
     """
     sync_client_kwargs: Optional[dict] = {}
-    """Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
+    """Additional kwargs to merge with ``client_kwargs`` before
+    passing to the HTTPX Client.
     For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
     """
     _client: Optional[Client] = PrivateAttr(default=None)
-    """
-    The client to use for making requests.
-    """
+    """The client to use for making requests."""
     _async_client: Optional[AsyncClient] = PrivateAttr(default=None)
-    """
-    The async client to use for making requests.
-    """
+    """The async client to use for making requests."""
     def _generate_params(
         self,

{langchain_ollama-0.3.5.dist-info → langchain_ollama-0.3.7.dist-info}/METADATA RENAMED Viewed

@@ -1,14 +1,14 @@
 Metadata-Version: 2.1
 Name: langchain-ollama
-Version: 0.3.5
+Version: 0.3.7
 Summary: An integration package connecting Ollama and LangChain
 License: MIT
 Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
 Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
 Project-URL: repository, https://github.com/langchain-ai/langchain
 Requires-Python: >=3.9
-Requires-Dist: ollama<1.0.0,>=0.5.1
-Requires-Dist: langchain-core<1.0.0,>=0.3.69
+Requires-Dist: ollama<1.0.0,>=0.5.3
+Requires-Dist: langchain-core<1.0.0,>=0.3.74
 Description-Content-Type: text/markdown
 # langchain-ollama

langchain_ollama-0.3.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+langchain_ollama-0.3.7.dist-info/METADATA,sha256=JdFmlg4IUkM7Sy99773NdtIJKygkdJ6pYauIo29IWtQ,2072
+langchain_ollama-0.3.7.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
+langchain_ollama-0.3.7.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
+langchain_ollama-0.3.7.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
+langchain_ollama/__init__.py,sha256=yXoECBZPpZGcg6V80qJCrgB02jW4KcjICZ5soLel6gw,1162
+langchain_ollama/_utils.py,sha256=amg8-DK0XaWe_aUO-ADFDUzb9EMJ-vMeyz2e1qrqAUo,1443
+langchain_ollama/chat_models.py,sha256=xV5R4hlOAQOsRwb4WdfR4UPveI3-QChBaa3nGa4Ny3I,58314
+langchain_ollama/embeddings.py,sha256=PxYDwvHbq-J6lTusSBIXtTeBnYSjG3R-a2wXFa52LXM,10455
+langchain_ollama/llms.py,sha256=65NARxA7xKYmwugvw4SxfpDPzHKiGGa6jsX7BgR64oM,16936
+langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+langchain_ollama-0.3.7.dist-info/RECORD,,

langchain_ollama-0.3.5.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-langchain_ollama-0.3.5.dist-info/METADATA,sha256=15Fpg-jcUr1QRiPMxng3GPbvRHmAapo1s6qykTZvHk0,2072
-langchain_ollama-0.3.5.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
-langchain_ollama-0.3.5.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
-langchain_ollama-0.3.5.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
-langchain_ollama/__init__.py,sha256=TI1gI0Wpg7mRXehGpxrJG2flF_t4Ev-aIJlLKV-CgL0,633
-langchain_ollama/_utils.py,sha256=dmFO4tSvDTeMALc89QnTBLNWPMZL0eNAq1EDwuMjRA8,1416
-langchain_ollama/chat_models.py,sha256=olz3KJeLG1vk47Xl38nN9bP4bcol5cBQnPnu5MyP8k8,55539
-langchain_ollama/embeddings.py,sha256=walU1vZq_YamLLPDSJLbMtOu6jFbiNnhJ5ni2sybCRs,10318
-langchain_ollama/llms.py,sha256=PSJ-VQMocp1nm-pgtnKnozidt66RKJiEnhdzftoLNNc,16778
-langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-langchain_ollama-0.3.5.dist-info/RECORD,,

{langchain_ollama-0.3.5.dist-info → langchain_ollama-0.3.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{langchain_ollama-0.3.5.dist-info → langchain_ollama-0.3.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{langchain_ollama-0.3.5.dist-info → langchain_ollama-0.3.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langchain-ollama 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

langchain-ollama 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl