PyPI - langchain-ollama - Versions diffs - 0.2.2rc1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

langchain-ollama 0.2.2rc1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

langchain_ollama/chat_models.py CHANGED Viewed

@@ -1,16 +1,20 @@
 """Ollama chat models."""
+import json
+from operator import itemgetter
 from typing import (
     Any,
     AsyncIterator,
     Callable,
     Dict,
+    Final,
     Iterator,
     List,
     Literal,
     Mapping,
     Optional,
     Sequence,
+    Tuple,
     Type,
     Union,
     cast,
@@ -21,12 +25,14 @@ from langchain_core.callbacks import (
     CallbackManagerForLLMRun,
 )
 from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun
+from langchain_core.exceptions import OutputParserException
 from langchain_core.language_models import LanguageModelInput
 from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
 from langchain_core.messages import (
     AIMessage,
     AIMessageChunk,
     BaseMessage,
+    BaseMessageChunk,
     HumanMessage,
     SystemMessage,
     ToolCall,
@@ -34,13 +40,28 @@ from langchain_core.messages import (
 )
 from langchain_core.messages.ai import UsageMetadata
 from langchain_core.messages.tool import tool_call
+from langchain_core.output_parsers import (
+    JsonOutputKeyToolsParser,
+    JsonOutputParser,
+    PydanticOutputParser,
+    PydanticToolsParser,
+)
 from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
-from langchain_core.runnables import Runnable
+from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
 from langchain_core.tools import BaseTool
-from langchain_core.utils.function_calling import convert_to_openai_tool
+from langchain_core.utils.function_calling import (
+    convert_to_json_schema,
+    convert_to_openai_tool,
+)
+from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
 from ollama import AsyncClient, Client, Message, Options
-from pydantic import PrivateAttr, model_validator
-from typing_extensions import Self
+from pydantic import BaseModel, PrivateAttr, model_validator
+from pydantic.json_schema import JsonSchemaValue
+from pydantic.v1 import BaseModel as BaseModelV1
+from typing_extensions import Self, is_typeddict
+DEFAULT_THINK_TOKEN_START: Final[str] = "<think>"
+DEFAULT_THINK_TOKEN_END: Final[str] = "</think>"
 def _get_usage_metadata_from_generation_info(
@@ -60,6 +81,76 @@ def _get_usage_metadata_from_generation_info(
     return None
+def _parse_json_string(
+    json_string: str, raw_tool_call: dict[str, Any], skip: bool
+) -> Any:
+    """Attempt to parse a JSON string for tool calling.
+    Args:
+        json_string: JSON string to parse.
+        skip: Whether to ignore parsing errors and return the value anyways.
+        raw_tool_call: Raw tool call to include in error message.
+    Returns:
+        The parsed JSON string.
+    Raises:
+        OutputParserException: If the JSON string wrong invalid and skip=False.
+    """
+    try:
+        return json.loads(json_string)
+    except json.JSONDecodeError as e:
+        if skip:
+            return json_string
+        msg = (
+            f"Function {raw_tool_call['function']['name']} arguments:\n\n"
+            f"{raw_tool_call['function']['arguments']}\n\nare not valid JSON. "
+            f"Received JSONDecodeError {e}"
+        )
+        raise OutputParserException(msg) from e
+    except TypeError as e:
+        if skip:
+            return json_string
+        msg = (
+            f"Function {raw_tool_call['function']['name']} arguments:\n\n"
+            f"{raw_tool_call['function']['arguments']}\n\nare not a string or a "
+            f"dictionary. Received TypeError {e}"
+        )
+        raise OutputParserException(msg) from e
+def _parse_arguments_from_tool_call(
+    raw_tool_call: dict[str, Any],
+) -> Optional[dict[str, Any]]:
+    """Parse arguments by trying to parse any shallowly nested string-encoded JSON.
+    Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
+    Should be removed/changed if fixed upstream.
+    See https://github.com/ollama/ollama/issues/6155
+    """
+    if "function" not in raw_tool_call:
+        return None
+    arguments = raw_tool_call["function"]["arguments"]
+    parsed_arguments: dict = {}
+    if isinstance(arguments, dict):
+        for key, value in arguments.items():
+            if isinstance(value, str):
+                parsed_value = _parse_json_string(
+                    value, skip=True, raw_tool_call=raw_tool_call
+                )
+                if isinstance(parsed_value, (dict, list)):
+                    parsed_arguments[key] = parsed_value
+                else:
+                    parsed_arguments[key] = value
+            else:
+                parsed_arguments[key] = value
+    else:
+        parsed_arguments = _parse_json_string(
+            arguments, skip=False, raw_tool_call=raw_tool_call
+        )
+    return parsed_arguments
 def _get_tool_calls_from_response(
     response: Mapping[str, Any],
 ) -> List[ToolCall]:
@@ -72,7 +163,7 @@ def _get_tool_calls_from_response(
                     tool_call(
                         id=str(uuid4()),
                         name=tc["function"]["name"],
-                        args=tc["function"]["arguments"],
+                        args=_parse_arguments_from_tool_call(tc) or {},
                     )
                 )
     return tool_calls
@@ -89,6 +180,10 @@ def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
     }
+def _is_pydantic_class(obj: Any) -> bool:
+    return isinstance(obj, type) and is_basemodel_subclass(obj)
 class ChatOllama(BaseChatModel):
     r"""Ollama chat model integration.
@@ -144,7 +239,7 @@ class ChatOllama(BaseChatModel):
                 ("human", "Return the words Hello World!"),
             ]
             for chunk in llm.stream(messages):
-                print(chunk)
+                print(chunk.text(), end="")
         .. code-block:: python
@@ -222,8 +317,6 @@ class ChatOllama(BaseChatModel):
             '{"location": "Pune, India", "time_of_day": "morning"}'
     Tool Calling:
-        .. warning::
-            Ollama currently does not support streaming for tools
         .. code-block:: python
@@ -248,6 +341,13 @@ class ChatOllama(BaseChatModel):
     model: str
     """Model name to use."""
+    extract_reasoning: Optional[Union[bool, Tuple[str, str]]] = False
+    """Whether to extract the reasoning tokens in think blocks.
+    Extracts `chunk.content` to `chunk.additional_kwargs.reasoning_content`.
+    If a tuple is supplied, they are assumed to be the (start, end) tokens.
+    If `extract_reasoning=True`, the tokens will default to (<think>, </think>).
+    """
     mirostat: Optional[int] = None
     """Enable Mirostat sampling for controlling perplexity.
     (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
@@ -317,8 +417,8 @@ class ChatOllama(BaseChatModel):
     to more diverse text, while a lower value (e.g., 0.5) will
     generate more focused and conservative text. (Default: 0.9)"""
-    format: Literal["", "json"] = ""
-    """Specify the format of the output (options: json)"""
+    format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
+    """Specify the format of the output (options: "json", JSON schema)."""
     keep_alive: Optional[Union[int, str]] = None
     """How long the model will stay loaded into memory."""
@@ -375,12 +475,9 @@ class ChatOllama(BaseChatModel):
             },
         )
-        tools = kwargs.get("tools")
-        default_stream = not bool(tools)
         params = {
             "messages": ollama_messages,
-            "stream": kwargs.pop("stream", default_stream),
+            "stream": kwargs.pop("stream", True),
             "model": kwargs.pop("model", self.model),
             "format": kwargs.pop("format", self.format),
             "options": Options(**options_dict),
@@ -388,7 +485,7 @@ class ChatOllama(BaseChatModel):
             **kwargs,
         }
-        if tools:
+        if tools := kwargs.get("tools"):
             params["tools"] = tools
         return params
@@ -484,6 +581,28 @@ class ChatOllama(BaseChatModel):
         return ollama_messages
+    def _extract_reasoning(
+        self, message_chunk: BaseMessageChunk, is_thinking: bool
+    ) -> Tuple[BaseMessageChunk, bool]:
+        """Mutate a message chunk to extract reasoning content."""
+        if not self.extract_reasoning:
+            return message_chunk, is_thinking
+        elif self.extract_reasoning is True:
+            start_token = DEFAULT_THINK_TOKEN_START
+            end_token = DEFAULT_THINK_TOKEN_END
+        else:
+            start_token, end_token = cast(tuple, self.extract_reasoning)
+        if start_token in message_chunk.content:
+            is_thinking = True
+        content = message_chunk.content
+        if is_thinking:
+            message_chunk.additional_kwargs["reasoning_content"] = content
+            message_chunk.content = ""
+        if end_token in content:
+            is_thinking = False
+        return message_chunk, is_thinking
     async def _acreate_chat_stream(
         self,
         messages: List[BaseMessage],
@@ -520,35 +639,17 @@ class ChatOllama(BaseChatModel):
         **kwargs: Any,
     ) -> ChatGenerationChunk:
         final_chunk = None
-        for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
-            if not isinstance(stream_resp, str):
-                chunk = ChatGenerationChunk(
-                    message=AIMessageChunk(
-                        content=(
-                            stream_resp["message"]["content"]
-                            if "message" in stream_resp
-                            and "content" in stream_resp["message"]
-                            else ""
-                        ),
-                        usage_metadata=_get_usage_metadata_from_generation_info(
-                            stream_resp
-                        ),
-                        tool_calls=_get_tool_calls_from_response(stream_resp),
-                    ),
-                    generation_info=(
-                        dict(stream_resp) if stream_resp.get("done") is True else None
-                    ),
+        for chunk in self._iterate_over_stream(messages, stop, **kwargs):
+            if final_chunk is None:
+                final_chunk = chunk
+            else:
+                final_chunk += chunk
+            if run_manager:
+                run_manager.on_llm_new_token(
+                    chunk.text,
+                    chunk=chunk,
+                    verbose=verbose,
                 )
-                if final_chunk is None:
-                    final_chunk = chunk
-                else:
-                    final_chunk += chunk
-                if run_manager:
-                    run_manager.on_llm_new_token(
-                        chunk.text,
-                        chunk=chunk,
-                        verbose=verbose,
-                    )
         if final_chunk is None:
             raise ValueError("No data received from Ollama stream.")
@@ -563,35 +664,17 @@ class ChatOllama(BaseChatModel):
         **kwargs: Any,
     ) -> ChatGenerationChunk:
         final_chunk = None
-        async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
-            if not isinstance(stream_resp, str):
-                chunk = ChatGenerationChunk(
-                    message=AIMessageChunk(
-                        content=(
-                            stream_resp["message"]["content"]
-                            if "message" in stream_resp
-                            and "content" in stream_resp["message"]
-                            else ""
-                        ),
-                        usage_metadata=_get_usage_metadata_from_generation_info(
-                            stream_resp
-                        ),
-                        tool_calls=_get_tool_calls_from_response(stream_resp),
-                    ),
-                    generation_info=(
-                        dict(stream_resp) if stream_resp.get("done") is True else None
-                    ),
+        async for chunk in self._aiterate_over_stream(messages, stop, **kwargs):
+            if final_chunk is None:
+                final_chunk = chunk
+            else:
+                final_chunk += chunk
+            if run_manager:
+                await run_manager.on_llm_new_token(
+                    chunk.text,
+                    chunk=chunk,
+                    verbose=verbose,
                 )
-                if final_chunk is None:
-                    final_chunk = chunk
-                else:
-                    final_chunk += chunk
-                if run_manager:
-                    await run_manager.on_llm_new_token(
-                        chunk.text,
-                        chunk=chunk,
-                        verbose=verbose,
-                    )
         if final_chunk is None:
             raise ValueError("No data received from Ollama stream.")
@@ -628,18 +711,19 @@ class ChatOllama(BaseChatModel):
                 content=final_chunk.text,
                 usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
                 tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
+                additional_kwargs=final_chunk.message.additional_kwargs,
             ),
             generation_info=generation_info,
         )
         return ChatResult(generations=[chat_generation])
-    def _stream(
+    def _iterate_over_stream(
         self,
         messages: List[BaseMessage],
         stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> Iterator[ChatGenerationChunk]:
+        is_thinking = False
         for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
             if not isinstance(stream_resp, str):
                 chunk = ChatGenerationChunk(
@@ -659,20 +743,35 @@ class ChatOllama(BaseChatModel):
                         dict(stream_resp) if stream_resp.get("done") is True else None
                     ),
                 )
-                if run_manager:
-                    run_manager.on_llm_new_token(
-                        chunk.text,
-                        verbose=self.verbose,
+                if self.extract_reasoning:
+                    message, is_thinking = self._extract_reasoning(
+                        chunk.message, is_thinking
                     )
+                    chunk.message = message
                 yield chunk
-    async def _astream(
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        for chunk in self._iterate_over_stream(messages, stop, **kwargs):
+            if run_manager:
+                run_manager.on_llm_new_token(
+                    chunk.text,
+                    verbose=self.verbose,
+                )
+            yield chunk
+    async def _aiterate_over_stream(
         self,
         messages: List[BaseMessage],
         stop: Optional[List[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> AsyncIterator[ChatGenerationChunk]:
+        is_thinking = False
         async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
             if not isinstance(stream_resp, str):
                 chunk = ChatGenerationChunk(
@@ -692,13 +791,28 @@ class ChatOllama(BaseChatModel):
                         dict(stream_resp) if stream_resp.get("done") is True else None
                     ),
                 )
-                if run_manager:
-                    await run_manager.on_llm_new_token(
-                        chunk.text,
-                        verbose=self.verbose,
+                if self.extract_reasoning:
+                    message, is_thinking = self._extract_reasoning(
+                        chunk.message, is_thinking
                     )
+                    chunk.message = message
                 yield chunk
+    async def _astream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[ChatGenerationChunk]:
+        async for chunk in self._aiterate_over_stream(messages, stop, **kwargs):
+            if run_manager:
+                await run_manager.on_llm_new_token(
+                    chunk.text,
+                    verbose=self.verbose,
+                )
+            yield chunk
     async def _agenerate(
         self,
         messages: List[BaseMessage],
@@ -715,6 +829,7 @@ class ChatOllama(BaseChatModel):
                 content=final_chunk.text,
                 usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
                 tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
+                additional_kwargs=final_chunk.message.additional_kwargs,
             ),
             generation_info=generation_info,
         )
@@ -747,3 +862,352 @@ class ChatOllama(BaseChatModel):
         """  # noqa: E501
         formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
         return super().bind(tools=formatted_tools, **kwargs)
+    def with_structured_output(
+        self,
+        schema: Union[Dict, type],
+        *,
+        method: Literal["function_calling", "json_mode", "json_schema"] = "json_schema",
+        include_raw: bool = False,
+        **kwargs: Any,
+    ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
+        """Model wrapper that returns outputs formatted to match the given schema.
+        Args:
+            schema:
+                The output schema. Can be passed in as:
+                - a Pydantic class,
+                - a JSON schema
+                - a TypedDict class
+                - an OpenAI function/tool schema.
+                If ``schema`` is a Pydantic class then the model output will be a
+                Pydantic instance of that class, and the model-generated fields will be
+                validated by the Pydantic class. Otherwise the model output will be a
+                dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`
+                for more on how to properly specify types and descriptions of
+                schema fields when specifying a Pydantic or TypedDict class.
+            method: The method for steering model generation, one of:
+                - "json_schema":
+                    Uses Ollama's structured output API: https://ollama.com/blog/structured-outputs
+                - "function_calling":
+                    Uses Ollama's tool-calling API
+                - "json_mode":
+                    Specifies ``format="json"``. Note that if using JSON mode then you
+                    must include instructions for formatting the output into the
+                    desired schema into the model call.
+            include_raw:
+                If False then only the parsed structured output is returned. If
+                an error occurs during model output parsing it will be raised. If True
+                then both the raw model response (a BaseMessage) and the parsed model
+                response will be returned. If an error occurs during output parsing it
+                will be caught and returned as well. The final output is always a dict
+                with keys "raw", "parsed", and "parsing_error".
+            kwargs: Additional keyword args aren't supported.
+        Returns:
+            A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
+            | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
+            | If ``include_raw`` is True, then Runnable outputs a dict with keys:
+            - "raw": BaseMessage
+            - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
+            - "parsing_error": Optional[BaseException]
+        .. versionchanged:: 0.2.2
+            Added support for structured output API via ``format`` parameter.
+        .. versionchanged:: 0.3.0
+            Updated default ``method`` to ``"json_schema"``.
+        .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False
+            .. code-block:: python
+                from typing import Optional
+                from langchain_ollama import ChatOllama
+                from pydantic import BaseModel, Field
+                class AnswerWithJustification(BaseModel):
+                    '''An answer to the user question along with justification for the answer.'''
+                    answer: str
+                    justification: Optional[str] = Field(
+                        default=..., description="A justification for the answer."
+                    )
+                llm = ChatOllama(model="llama3.1", temperature=0)
+                structured_llm = llm.with_structured_output(
+                    AnswerWithJustification
+                )
+                structured_llm.invoke(
+                    "What weighs more a pound of bricks or a pound of feathers"
+                )
+                # -> AnswerWithJustification(
+                #     answer='They weigh the same',
+                #     justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
+                # )
+        .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=True
+            .. code-block:: python
+                from langchain_ollama import ChatOllama
+                from pydantic import BaseModel
+                class AnswerWithJustification(BaseModel):
+                    '''An answer to the user question along with justification for the answer.'''
+                    answer: str
+                    justification: str
+                llm = ChatOllama(model="llama3.1", temperature=0)
+                structured_llm = llm.with_structured_output(
+                    AnswerWithJustification, include_raw=True
+                )
+                structured_llm.invoke(
+                    "What weighs more a pound of bricks or a pound of feathers"
+                )
+                # -> {
+                #     'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
+                #     'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
+                #     'parsing_error': None
+                # }
+        .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False
+            .. code-block:: python
+                from typing import Optional
+                from langchain_ollama import ChatOllama
+                from pydantic import BaseModel, Field
+                class AnswerWithJustification(BaseModel):
+                    '''An answer to the user question along with justification for the answer.'''
+                    answer: str
+                    justification: Optional[str] = Field(
+                        default=..., description="A justification for the answer."
+                    )
+                llm = ChatOllama(model="llama3.1", temperature=0)
+                structured_llm = llm.with_structured_output(
+                    AnswerWithJustification, method="function_calling"
+                )
+                structured_llm.invoke(
+                    "What weighs more a pound of bricks or a pound of feathers"
+                )
+                # -> AnswerWithJustification(
+                #     answer='They weigh the same',
+                #     justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
+                # )
+        .. dropdown:: Example: schema=TypedDict class, method="function_calling", include_raw=False
+            .. code-block:: python
+                # IMPORTANT: If you are using Python <=3.8, you need to import Annotated
+                # from typing_extensions, not from typing.
+                from typing_extensions import Annotated, TypedDict
+                from langchain_ollama import ChatOllama
+                class AnswerWithJustification(TypedDict):
+                    '''An answer to the user question along with justification for the answer.'''
+                    answer: str
+                    justification: Annotated[
+                        Optional[str], None, "A justification for the answer."
+                    ]
+                llm = ChatOllama(model="llama3.1", temperature=0)
+                structured_llm = llm.with_structured_output(AnswerWithJustification)
+                structured_llm.invoke(
+                    "What weighs more a pound of bricks or a pound of feathers"
+                )
+                # -> {
+                #     'answer': 'They weigh the same',
+                #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
+                # }
+        .. dropdown:: Example: schema=OpenAI function schema, method="function_calling", include_raw=False
+            .. code-block:: python
+                from langchain_ollama import ChatOllama
+                oai_schema = {
+                    'name': 'AnswerWithJustification',
+                    'description': 'An answer to the user question along with justification for the answer.',
+                    'parameters': {
+                        'type': 'object',
+                        'properties': {
+                            'answer': {'type': 'string'},
+                            'justification': {'description': 'A justification for the answer.', 'type': 'string'}
+                        },
+                       'required': ['answer']
+                   }
+               }
+                llm = ChatOllama(model="llama3.1", temperature=0)
+                structured_llm = llm.with_structured_output(oai_schema)
+                structured_llm.invoke(
+                    "What weighs more a pound of bricks or a pound of feathers"
+                )
+                # -> {
+                #     'answer': 'They weigh the same',
+                #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
+                # }
+        .. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True
+            .. code-block::
+                from langchain_ollama import ChatOllama
+                from pydantic import BaseModel
+                class AnswerWithJustification(BaseModel):
+                    answer: str
+                    justification: str
+                llm = ChatOllama(model="llama3.1", temperature=0)
+                structured_llm = llm.with_structured_output(
+                    AnswerWithJustification,
+                    method="json_mode",
+                    include_raw=True
+                )
+                structured_llm.invoke(
+                    "Answer the following question. "
+                    "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n"
+                    "What's heavier a pound of bricks or a pound of feathers?"
+                )
+                # -> {
+                #     'raw': AIMessage(content='{\\n    "answer": "They are both the same weight.",\\n    "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'),
+                #     'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
+                #     'parsing_error': None
+                # }
+        """  # noqa: E501, D301
+        _ = kwargs.pop("strict", None)
+        if kwargs:
+            raise ValueError(f"Received unsupported arguments {kwargs}")
+        is_pydantic_schema = _is_pydantic_class(schema)
+        if method == "function_calling":
+            if schema is None:
+                raise ValueError(
+                    "schema must be specified when method is not 'json_mode'. "
+                    "Received None."
+                )
+            formatted_tool = convert_to_openai_tool(schema)
+            tool_name = formatted_tool["function"]["name"]
+            llm = self.bind_tools(
+                [schema],
+                tool_choice=tool_name,
+                ls_structured_output_format={
+                    "kwargs": {"method": method},
+                    "schema": formatted_tool,
+                },
+            )
+            if is_pydantic_schema:
+                output_parser: Runnable = PydanticToolsParser(
+                    tools=[schema],  # type: ignore[list-item]
+                    first_tool_only=True,
+                )
+            else:
+                output_parser = JsonOutputKeyToolsParser(
+                    key_name=tool_name, first_tool_only=True
+                )
+        elif method == "json_mode":
+            llm = self.bind(
+                format="json",
+                ls_structured_output_format={
+                    "kwargs": {"method": method},
+                    "schema": schema,
+                },
+            )
+            output_parser = (
+                PydanticOutputParser(pydantic_object=schema)  # type: ignore[arg-type]
+                if is_pydantic_schema
+                else JsonOutputParser()
+            )
+        elif method == "json_schema":
+            if schema is None:
+                raise ValueError(
+                    "schema must be specified when method is not 'json_mode'. "
+                    "Received None."
+                )
+            if is_pydantic_schema:
+                schema = cast(TypeBaseModel, schema)
+                if issubclass(schema, BaseModelV1):
+                    response_format = schema.schema()
+                else:
+                    response_format = schema.model_json_schema()
+                llm = self.bind(
+                    format=response_format,
+                    ls_structured_output_format={
+                        "kwargs": {"method": method},
+                        "schema": schema,
+                    },
+                )
+                output_parser = PydanticOutputParser(pydantic_object=schema)
+            else:
+                if is_typeddict(schema):
+                    response_format = convert_to_json_schema(schema)
+                    if "required" not in response_format:
+                        response_format["required"] = list(
+                            response_format["properties"].keys()
+                        )
+                else:
+                    # is JSON schema
+                    response_format = cast(dict, schema)
+                llm = self.bind(
+                    format=response_format,
+                    ls_structured_output_format={
+                        "kwargs": {"method": method},
+                        "schema": response_format,
+                    },
+                )
+                output_parser = JsonOutputParser()
+        else:
+            raise ValueError(
+                f"Unrecognized method argument. Expected one of 'function_calling', "
+                f"'json_schema', or 'json_mode'. Received: '{method}'"
+            )
+        if include_raw:
+            parser_assign = RunnablePassthrough.assign(
+                parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
+            )
+            parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
+            parser_with_fallback = parser_assign.with_fallbacks(
+                [parser_none], exception_key="parsing_error"
+            )
+            return RunnableMap(raw=llm) | parser_with_fallback
+        else:
+            return llm | output_parser

langchain_ollama/embeddings.py CHANGED Viewed

@@ -1,9 +1,6 @@
 """Ollama embeddings models."""
-from typing import (
-    List,
-    Optional,
-)
+from typing import Any, Dict, List, Optional
 from langchain_core.embeddings import Embeddings
 from ollama import AsyncClient, Client
@@ -144,10 +141,94 @@ class OllamaEmbeddings(BaseModel, Embeddings):
     The async client to use for making requests.
     """
+    mirostat: Optional[int] = None
+    """Enable Mirostat sampling for controlling perplexity.
+    (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
+    mirostat_eta: Optional[float] = None
+    """Influences how quickly the algorithm responds to feedback
+    from the generated text. A lower learning rate will result in
+    slower adjustments, while a higher learning rate will make
+    the algorithm more responsive. (Default: 0.1)"""
+    mirostat_tau: Optional[float] = None
+    """Controls the balance between coherence and diversity
+    of the output. A lower value will result in more focused and
+    coherent text. (Default: 5.0)"""
+    num_ctx: Optional[int] = None
+    """Sets the size of the context window used to generate the
+    next token. (Default: 2048)	"""
+    num_gpu: Optional[int] = None
+    """The number of GPUs to use. On macOS it defaults to 1 to
+    enable metal support, 0 to disable."""
+    keep_alive: Optional[int] = None
+    """controls how long the model will stay loaded into memory
+    following the request (default: 5m)
+    """
+    num_thread: Optional[int] = None
+    """Sets the number of threads to use during computation.
+    By default, Ollama will detect this for optimal performance.
+    It is recommended to set this value to the number of physical
+    CPU cores your system has (as opposed to the logical number of cores)."""
+    repeat_last_n: Optional[int] = None
+    """Sets how far back for the model to look back to prevent
+    repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
+    repeat_penalty: Optional[float] = None
+    """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
+    will penalize repetitions more strongly, while a lower value (e.g., 0.9)
+    will be more lenient. (Default: 1.1)"""
+    temperature: Optional[float] = None
+    """The temperature of the model. Increasing the temperature will
+    make the model answer more creatively. (Default: 0.8)"""
+    stop: Optional[List[str]] = None
+    """Sets the stop tokens to use."""
+    tfs_z: Optional[float] = None
+    """Tail free sampling is used to reduce the impact of less probable
+    tokens from the output. A higher value (e.g., 2.0) will reduce the
+    impact more, while a value of 1.0 disables this setting. (default: 1)"""
+    top_k: Optional[int] = None
+    """Reduces the probability of generating nonsense. A higher value (e.g. 100)
+    will give more diverse answers, while a lower value (e.g. 10)
+    will be more conservative. (Default: 40)"""
+    top_p: Optional[float] = None
+    """Works together with top-k. A higher value (e.g., 0.95) will lead
+    to more diverse text, while a lower value (e.g., 0.5) will
+    generate more focused and conservative text. (Default: 0.9)"""
     model_config = ConfigDict(
         extra="forbid",
     )
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling Ollama."""
+        return {
+            "mirostat": self.mirostat,
+            "mirostat_eta": self.mirostat_eta,
+            "mirostat_tau": self.mirostat_tau,
+            "num_ctx": self.num_ctx,
+            "num_gpu": self.num_gpu,
+            "num_thread": self.num_thread,
+            "repeat_last_n": self.repeat_last_n,
+            "repeat_penalty": self.repeat_penalty,
+            "temperature": self.temperature,
+            "stop": self.stop,
+            "tfs_z": self.tfs_z,
+            "top_k": self.top_k,
+            "top_p": self.top_p,
+        }
     @model_validator(mode="after")
     def _set_clients(self) -> Self:
         """Set clients to use for ollama."""
@@ -158,7 +239,9 @@ class OllamaEmbeddings(BaseModel, Embeddings):
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
         """Embed search docs."""
-        embedded_docs = self._client.embed(self.model, texts)["embeddings"]
+        embedded_docs = self._client.embed(
+            self.model, texts, options=self._default_params, keep_alive=self.keep_alive
+        )["embeddings"]
         return embedded_docs
     def embed_query(self, text: str) -> List[float]:
@@ -167,9 +250,11 @@ class OllamaEmbeddings(BaseModel, Embeddings):
     async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
         """Embed search docs."""
-        embedded_docs = (await self._async_client.embed(self.model, texts))[
-            "embeddings"
-        ]
+        embedded_docs = (
+            await self._async_client.embed(
+                self.model, texts, keep_alive=self.keep_alive
+            )
+        )["embeddings"]
         return embedded_docs
     async def aembed_query(self, text: str) -> List[float]:

{langchain_ollama-0.2.2rc1.dist-info → langchain_ollama-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,21 +1,14 @@
 Metadata-Version: 2.1
 Name: langchain-ollama
-Version: 0.2.2rc1
+Version: 0.3.0
 Summary: An integration package connecting Ollama and LangChain
-Home-page: https://github.com/langchain-ai/langchain
 License: MIT
-Requires-Python: >=3.9,<4.0
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Requires-Dist: langchain-core (>=0.3.20,<0.4.0)
-Requires-Dist: ollama (>=0.3.0,<1)
-Project-URL: Repository, https://github.com/langchain-ai/langchain
-Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
 Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
+Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
+Project-URL: repository, https://github.com/langchain-ai/langchain
+Requires-Python: <4.0,>=3.9
+Requires-Dist: ollama<1,>=0.4.4
+Requires-Dist: langchain-core<1.0.0,>=0.3.47
 Description-Content-Type: text/markdown
 # langchain-ollama
@@ -62,4 +55,3 @@ from langchain_ollama import OllamaLLM
 llm = OllamaLLM(model="llama3")
 llm.invoke("The meaning of life is")
 ```

langchain_ollama-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+langchain_ollama-0.3.0.dist-info/METADATA,sha256=VcLxoKw-32dqWPuJrjPGq2HwweTu_v3ZEtLNIRNUBRc,1463
+langchain_ollama-0.3.0.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
+langchain_ollama-0.3.0.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
+langchain_ollama-0.3.0.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
+langchain_ollama/__init__.py,sha256=SxPRrWcPayJpbwhheTtlqCaPp9ffiAAgZMM5Wc1yYpM,634
+langchain_ollama/chat_models.py,sha256=VMk5GnKiyPQ5TERQDhdSe2uiBOKtCP0GmYlcJs4CC14,49328
+langchain_ollama/embeddings.py,sha256=d0jSB-T8Awv0razTUA_iD-ZvTma82Nw44YtiVu983u0,8633
+langchain_ollama/llms.py,sha256=ojnYU0efhN10xhUINu1dCR2Erw79J_mYS6_l45J7Vls,12778
+langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+langchain_ollama-0.3.0.dist-info/RECORD,,

{langchain_ollama-0.2.2rc1.dist-info → langchain_ollama-0.3.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 1.8.1
+Generator: pdm-backend (2.4.3)
 Root-Is-Purelib: true
 Tag: py3-none-any

langchain_ollama-0.3.0.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,4 @@
+[console_scripts]
+[gui_scripts]

langchain_ollama-0.2.2rc1.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-langchain_ollama/__init__.py,sha256=SxPRrWcPayJpbwhheTtlqCaPp9ffiAAgZMM5Wc1yYpM,634
-langchain_ollama/chat_models.py,sha256=BS28WEnDBq0aUrlOyABbcMkvIk4C-oV_Zj6bnhQoJkM,29902
-langchain_ollama/embeddings.py,sha256=svqdPF44qX5qbFpZmLiXrzTC-AldmMlZRS5wBfY-EmA,5056
-langchain_ollama/llms.py,sha256=ojnYU0efhN10xhUINu1dCR2Erw79J_mYS6_l45J7Vls,12778
-langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-langchain_ollama-0.2.2rc1.dist-info/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
-langchain_ollama-0.2.2rc1.dist-info/METADATA,sha256=E9wttWytUkVCrJtbUjYA0nMxIt8tTkZOQZDFCU6Z_nc,1828
-langchain_ollama-0.2.2rc1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-langchain_ollama-0.2.2rc1.dist-info/RECORD,,

{langchain_ollama-0.2.2rc1.dist-info → langchain_ollama-0.3.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

langchain-ollama 0.2.2rc1__py3-none-any.whl → 0.3.0__py3-none-any.whl

langchain-ollama 0.2.2rc1py3-none-any.whl → 0.3.0py3-none-any.whl