langchain-ollama 0.3.5__tar.gz → 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/PKG-INFO +3 -3
  2. langchain_ollama-0.3.7/langchain_ollama/__init__.py +36 -0
  3. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/langchain_ollama/_utils.py +6 -3
  4. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/langchain_ollama/chat_models.py +154 -100
  5. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/langchain_ollama/embeddings.py +44 -37
  6. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/langchain_ollama/llms.py +37 -31
  7. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/pyproject.toml +9 -13
  8. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/integration_tests/chat_models/test_chat_models.py +29 -25
  9. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/integration_tests/chat_models/test_chat_models_reasoning.py +58 -38
  10. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/integration_tests/chat_models/test_chat_models_standard.py +2 -2
  11. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/integration_tests/test_embeddings.py +3 -1
  12. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/integration_tests/test_llms.py +12 -9
  13. langchain_ollama-0.3.7/tests/unit_tests/test_chat_models.py +313 -0
  14. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/unit_tests/test_embeddings.py +1 -1
  15. langchain_ollama-0.3.5/langchain_ollama/__init__.py +0 -24
  16. langchain_ollama-0.3.5/tests/unit_tests/test_chat_models.py +0 -85
  17. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/LICENSE +0 -0
  18. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/README.md +0 -0
  19. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/langchain_ollama/py.typed +0 -0
  20. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/__init__.py +0 -0
  21. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/integration_tests/__init__.py +0 -0
  22. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/integration_tests/chat_models/cassettes/test_chat_models_standard/TestChatOllama.test_stream_time.yaml +0 -0
  23. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/integration_tests/test_compile.py +0 -0
  24. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/unit_tests/__init__.py +0 -0
  25. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/unit_tests/test_imports.py +0 -0
  26. {langchain_ollama-0.3.5 → langchain_ollama-0.3.7}/tests/unit_tests/test_llms.py +0 -0
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langchain-ollama
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: An integration package connecting Ollama and LangChain
5
5
  License: MIT
6
6
  Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
7
7
  Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
8
8
  Project-URL: repository, https://github.com/langchain-ai/langchain
9
9
  Requires-Python: >=3.9
10
- Requires-Dist: ollama<1.0.0,>=0.5.1
11
- Requires-Dist: langchain-core<1.0.0,>=0.3.69
10
+ Requires-Dist: ollama<1.0.0,>=0.5.3
11
+ Requires-Dist: langchain-core<1.0.0,>=0.3.74
12
12
  Description-Content-Type: text/markdown
13
13
 
14
14
  # langchain-ollama
@@ -0,0 +1,36 @@
1
+ """This is the langchain_ollama package.
2
+
3
+ Provides infrastructure for interacting with the `Ollama <https://ollama.com/>`__
4
+ service.
5
+
6
+ .. note::
7
+ **Newly added in 0.3.4:** ``validate_model_on_init`` param on all models.
8
+ This parameter allows you to validate the model exists in Ollama locally on
9
+ initialization. If set to ``True``, it will raise an error if the model does not
10
+ exist locally. This is useful for ensuring that the model is available before
11
+ attempting to use it, especially in environments where models may not be
12
+ pre-downloaded.
13
+
14
+ """
15
+
16
+ from importlib import metadata
17
+
18
+ from langchain_ollama.chat_models import ChatOllama
19
+ from langchain_ollama.embeddings import OllamaEmbeddings
20
+ from langchain_ollama.llms import OllamaLLM
21
+
22
+ try:
23
+ if __package__ is None:
24
+ raise metadata.PackageNotFoundError
25
+ __version__ = metadata.version(__package__)
26
+ except metadata.PackageNotFoundError:
27
+ # Case where package metadata is not available.
28
+ __version__ = ""
29
+ del metadata # optional, avoids polluting the results of dir(__package__)
30
+
31
+ __all__ = [
32
+ "ChatOllama",
33
+ "OllamaEmbeddings",
34
+ "OllamaLLM",
35
+ "__version__",
36
+ ]
@@ -1,11 +1,11 @@
1
- """Utility functions for validating Ollama models."""
1
+ """Utility function to validate Ollama models."""
2
2
 
3
3
  from httpx import ConnectError
4
4
  from ollama import Client, ResponseError
5
5
 
6
6
 
7
7
  def validate_model(client: Client, model_name: str) -> None:
8
- """Validate that a model exists in the Ollama instance.
8
+ """Validate that a model exists in the local Ollama instance.
9
9
 
10
10
  Args:
11
11
  client: The Ollama client.
@@ -29,7 +29,10 @@ def validate_model(client: Client, model_name: str) -> None:
29
29
  )
30
30
  raise ValueError(msg)
31
31
  except ConnectError as e:
32
- msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download" # noqa: E501
32
+ msg = (
33
+ "Failed to connect to Ollama. Please check that Ollama is downloaded, "
34
+ "running and accessible. https://ollama.com/download"
35
+ )
33
36
  raise ValueError(msg) from e
34
37
  except ResponseError as e:
35
38
  msg = (
@@ -2,7 +2,9 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import ast
5
6
  import json
7
+ import logging
6
8
  from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
7
9
  from operator import itemgetter
8
10
  from typing import (
@@ -57,6 +59,8 @@ from typing_extensions import Self, is_typeddict
57
59
 
58
60
  from ._utils import validate_model
59
61
 
62
+ log = logging.getLogger(__name__)
63
+
60
64
 
61
65
  def _get_usage_metadata_from_generation_info(
62
66
  generation_info: Optional[Mapping[str, Any]],
@@ -77,33 +81,46 @@ def _get_usage_metadata_from_generation_info(
77
81
 
78
82
  def _parse_json_string(
79
83
  json_string: str,
84
+ *,
80
85
  raw_tool_call: dict[str, Any],
81
- skip: bool, # noqa: FBT001
86
+ skip: bool,
82
87
  ) -> Any:
83
88
  """Attempt to parse a JSON string for tool calling.
84
89
 
90
+ It first tries to use the standard ``json.loads``. If that fails, it falls
91
+ back to ``ast.literal_eval`` to safely parse Python literals, which is more
92
+ robust against models using single quotes or containing apostrophes.
93
+
85
94
  Args:
86
95
  json_string: JSON string to parse.
87
- skip: Whether to ignore parsing errors and return the value anyways.
88
96
  raw_tool_call: Raw tool call to include in error message.
97
+ skip: Whether to ignore parsing errors and return the value anyways.
89
98
 
90
99
  Returns:
91
- The parsed JSON string.
100
+ The parsed JSON string or Python literal.
92
101
 
93
102
  Raises:
94
- OutputParserException: If the JSON string wrong invalid and skip=False.
103
+ OutputParserException: If the string is invalid and ``skip=False``.
104
+
95
105
  """
96
106
  try:
97
107
  return json.loads(json_string)
98
- except json.JSONDecodeError as e:
99
- if skip:
100
- return json_string
101
- msg = (
102
- f"Function {raw_tool_call['function']['name']} arguments:\n\n"
103
- f"{raw_tool_call['function']['arguments']}\n\nare not valid JSON. "
104
- f"Received JSONDecodeError {e}"
105
- )
106
- raise OutputParserException(msg) from e
108
+ except json.JSONDecodeError:
109
+ try:
110
+ # Use ast.literal_eval to safely parse Python-style dicts
111
+ # (e.g. with single quotes)
112
+ return ast.literal_eval(json_string)
113
+ except (SyntaxError, ValueError) as e:
114
+ # If both fail, and we're not skipping, raise an informative error.
115
+ if skip:
116
+ return json_string
117
+ msg = (
118
+ f"Function {raw_tool_call['function']['name']} arguments:\n\n"
119
+ f"{raw_tool_call['function']['arguments']}"
120
+ "\n\nare not valid JSON or a Python literal. "
121
+ f"Received error: {e}"
122
+ )
123
+ raise OutputParserException(msg) from e
107
124
  except TypeError as e:
108
125
  if skip:
109
126
  return json_string
@@ -122,14 +139,20 @@ def _parse_arguments_from_tool_call(
122
139
 
123
140
  Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
124
141
  Should be removed/changed if fixed upstream.
142
+
125
143
  See https://github.com/ollama/ollama/issues/6155
144
+
126
145
  """
127
146
  if "function" not in raw_tool_call:
128
147
  return None
148
+ function_name = raw_tool_call["function"]["name"]
129
149
  arguments = raw_tool_call["function"]["arguments"]
130
150
  parsed_arguments: dict = {}
131
151
  if isinstance(arguments, dict):
132
152
  for key, value in arguments.items():
153
+ # Filter out metadata fields like 'functionName' that echo function name
154
+ if key == "functionName" and value == function_name:
155
+ continue
133
156
  if isinstance(value, str):
134
157
  parsed_value = _parse_json_string(
135
158
  value, skip=True, raw_tool_call=raw_tool_call
@@ -206,7 +229,7 @@ class ChatOllama(BaseChatModel):
206
229
 
207
230
  .. code-block:: bash
208
231
 
209
- ollama pull mistral:v0.3
232
+ ollama pull gpt-oss:20b
210
233
  pip install -U langchain-ollama
211
234
 
212
235
  Key init args — completion params:
@@ -239,7 +262,8 @@ class ChatOllama(BaseChatModel):
239
262
  from langchain_ollama import ChatOllama
240
263
 
241
264
  llm = ChatOllama(
242
- model = "llama3",
265
+ model = "gpt-oss:20b",
266
+ validate_model_on_init = True,
243
267
  temperature = 0.8,
244
268
  num_predict = 256,
245
269
  # other params ...
@@ -261,10 +285,7 @@ class ChatOllama(BaseChatModel):
261
285
  Stream:
262
286
  .. code-block:: python
263
287
 
264
- messages = [
265
- ("human", "Return the words Hello World!"),
266
- ]
267
- for chunk in llm.stream(messages):
288
+ for chunk in llm.stream("Return the words Hello World!"):
268
289
  print(chunk.text(), end="")
269
290
 
270
291
 
@@ -291,10 +312,7 @@ class ChatOllama(BaseChatModel):
291
312
  Async:
292
313
  .. code-block:: python
293
314
 
294
- messages = [
295
- ("human", "Hello how are you!"),
296
- ]
297
- await llm.ainvoke(messages)
315
+ await llm.ainvoke("Hello how are you!")
298
316
 
299
317
  .. code-block:: python
300
318
 
@@ -302,10 +320,7 @@ class ChatOllama(BaseChatModel):
302
320
 
303
321
  .. code-block:: python
304
322
 
305
- messages = [
306
- ("human", "Say hello world!"),
307
- ]
308
- async for chunk in llm.astream(messages):
323
+ async for chunk in llm.astream("Say hello world!"):
309
324
  print(chunk.content)
310
325
 
311
326
  .. code-block:: python
@@ -333,10 +348,7 @@ class ChatOllama(BaseChatModel):
333
348
 
334
349
 
335
350
  json_llm = ChatOllama(format="json")
336
- messages = [
337
- ("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."),
338
- ]
339
- llm.invoke(messages).content
351
+ llm.invoke("Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only.").content
340
352
 
341
353
  .. code-block:: python
342
354
 
@@ -383,17 +395,16 @@ class ChatOllama(BaseChatModel):
383
395
 
384
396
  llm = ChatOllama(
385
397
  model = "deepseek-r1:8b",
398
+ validate_model_on_init = True,
386
399
  reasoning= True,
387
400
  )
388
401
 
389
- user_message = HumanMessage(content="how many r in the word strawberry?")
390
- messages: List[Any] = [user_message]
391
- llm.invoke(messages)
402
+ llm.invoke("how many r in the word strawberry?")
392
403
 
393
404
  # or, on an invocation basis:
394
405
 
395
- llm.invoke(messages, reasoning=True)
396
- # or llm.stream(messages, reasoning=True)
406
+ llm.invoke("how many r in the word strawberry?", reasoning=True)
407
+ # or llm.stream("how many r in the word strawberry?", reasoning=True)
397
408
 
398
409
  # If not provided, the invocation will default to the ChatOllama reasoning
399
410
  # param provided (None by default).
@@ -402,13 +413,12 @@ class ChatOllama(BaseChatModel):
402
413
 
403
414
  AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
404
415
 
405
-
406
416
  """ # noqa: E501, pylint: disable=line-too-long
407
417
 
408
418
  model: str
409
419
  """Model name to use."""
410
420
 
411
- reasoning: Optional[bool] = None
421
+ reasoning: Optional[Union[bool, str]] = None
412
422
  """Controls the reasoning/thinking mode for
413
423
  `supported models <https://ollama.com/search?c=thinking>`__.
414
424
 
@@ -421,33 +431,42 @@ class ChatOllama(BaseChatModel):
421
431
  - ``None`` (Default): The model will use its default reasoning behavior. Note
422
432
  however, if the model's default behavior *is* to perform reasoning, think tags
423
433
  ()``<think>`` and ``</think>``) will be present within the main response content
424
- unless you set ``reasoning`` to ``True``."""
434
+ unless you set ``reasoning`` to ``True``.
435
+ - ``str``: e.g. ``'low'``, ``'medium'``, ``'high'``. Enables reasoning with a custom
436
+ intensity level. Currently, this is only supported ``gpt-oss``. See the
437
+ `Ollama docs <https://github.com/ollama/ollama-python/blob/da79e987f0ac0a4986bf396f043b36ef840370bc/ollama/_types.py#L210>`__
438
+ for more information.
439
+
440
+ """
425
441
 
426
442
  validate_model_on_init: bool = False
427
- """Whether to validate the model exists in Ollama locally on initialization."""
443
+ """Whether to validate the model exists in Ollama locally on initialization.
444
+
445
+ .. versionadded:: 0.3.4
446
+ """
428
447
 
429
448
  mirostat: Optional[int] = None
430
449
  """Enable Mirostat sampling for controlling perplexity.
431
- (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
450
+ (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
432
451
 
433
452
  mirostat_eta: Optional[float] = None
434
453
  """Influences how quickly the algorithm responds to feedback
435
454
  from the generated text. A lower learning rate will result in
436
455
  slower adjustments, while a higher learning rate will make
437
- the algorithm more responsive. (Default: 0.1)"""
456
+ the algorithm more responsive. (Default: ``0.1``)"""
438
457
 
439
458
  mirostat_tau: Optional[float] = None
440
459
  """Controls the balance between coherence and diversity
441
460
  of the output. A lower value will result in more focused and
442
- coherent text. (Default: 5.0)"""
461
+ coherent text. (Default: ``5.0``)"""
443
462
 
444
463
  num_ctx: Optional[int] = None
445
464
  """Sets the size of the context window used to generate the
446
- next token. (Default: 2048) """
465
+ next token. (Default: ``2048``) """
447
466
 
448
467
  num_gpu: Optional[int] = None
449
- """The number of GPUs to use. On macOS it defaults to 1 to
450
- enable metal support, 0 to disable."""
468
+ """The number of GPUs to use. On macOS it defaults to ``1`` to
469
+ enable metal support, ``0`` to disable."""
451
470
 
452
471
  num_thread: Optional[int] = None
453
472
  """Sets the number of threads to use during computation.
@@ -457,20 +476,20 @@ class ChatOllama(BaseChatModel):
457
476
 
458
477
  num_predict: Optional[int] = None
459
478
  """Maximum number of tokens to predict when generating text.
460
- (Default: 128, -1 = infinite generation, -2 = fill context)"""
479
+ (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
461
480
 
462
481
  repeat_last_n: Optional[int] = None
463
482
  """Sets how far back for the model to look back to prevent
464
- repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
483
+ repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
465
484
 
466
485
  repeat_penalty: Optional[float] = None
467
- """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
468
- will penalize repetitions more strongly, while a lower value (e.g., 0.9)
469
- will be more lenient. (Default: 1.1)"""
486
+ """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
487
+ will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
488
+ will be more lenient. (Default: ``1.1``)"""
470
489
 
471
490
  temperature: Optional[float] = None
472
491
  """The temperature of the model. Increasing the temperature will
473
- make the model answer more creatively. (Default: 0.8)"""
492
+ make the model answer more creatively. (Default: ``0.8``)"""
474
493
 
475
494
  seed: Optional[int] = None
476
495
  """Sets the random number seed to use for generation. Setting this
@@ -482,21 +501,21 @@ class ChatOllama(BaseChatModel):
482
501
 
483
502
  tfs_z: Optional[float] = None
484
503
  """Tail free sampling is used to reduce the impact of less probable
485
- tokens from the output. A higher value (e.g., 2.0) will reduce the
486
- impact more, while a value of 1.0 disables this setting. (default: 1)"""
504
+ tokens from the output. A higher value (e.g., ``2.0``) will reduce the
505
+ impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
487
506
 
488
507
  top_k: Optional[int] = None
489
- """Reduces the probability of generating nonsense. A higher value (e.g. 100)
490
- will give more diverse answers, while a lower value (e.g. 10)
491
- will be more conservative. (Default: 40)"""
508
+ """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
509
+ will give more diverse answers, while a lower value (e.g. ``10``)
510
+ will be more conservative. (Default: ``40``)"""
492
511
 
493
512
  top_p: Optional[float] = None
494
- """Works together with top-k. A higher value (e.g., 0.95) will lead
495
- to more diverse text, while a lower value (e.g., 0.5) will
496
- generate more focused and conservative text. (Default: 0.9)"""
513
+ """Works together with top-k. A higher value (e.g., ``0.95``) will lead
514
+ to more diverse text, while a lower value (e.g., ``0.5``) will
515
+ generate more focused and conservative text. (Default: ``0.9``)"""
497
516
 
498
517
  format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
499
- """Specify the format of the output (options: "json", JSON schema)."""
518
+ """Specify the format of the output (options: ``'json'``, JSON schema)."""
500
519
 
501
520
  keep_alive: Optional[Union[int, str]] = None
502
521
  """How long the model will stay loaded into memory."""
@@ -506,32 +525,35 @@ class ChatOllama(BaseChatModel):
506
525
 
507
526
  client_kwargs: Optional[dict] = {}
508
527
  """Additional kwargs to pass to the httpx clients.
528
+
509
529
  These arguments are passed to both synchronous and async clients.
510
- Use sync_client_kwargs and async_client_kwargs to pass different arguments
530
+
531
+ Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
511
532
  to synchronous and asynchronous clients.
533
+
512
534
  """
513
535
 
514
536
  async_client_kwargs: Optional[dict] = {}
515
- """Additional kwargs to merge with client_kwargs before
537
+ """Additional kwargs to merge with ``client_kwargs`` before
516
538
  passing to the httpx AsyncClient.
539
+
517
540
  `Full list of params. <https://www.python-httpx.org/api/#asyncclient>`__
541
+
518
542
  """
519
543
 
520
544
  sync_client_kwargs: Optional[dict] = {}
521
- """Additional kwargs to merge with client_kwargs before
545
+ """Additional kwargs to merge with ``client_kwargs`` before
522
546
  passing to the httpx Client.
547
+
523
548
  `Full list of params. <https://www.python-httpx.org/api/#client>`__
549
+
524
550
  """
525
551
 
526
552
  _client: Client = PrivateAttr()
527
- """
528
- The client to use for making requests.
529
- """
553
+ """The client to use for making requests."""
530
554
 
531
555
  _async_client: AsyncClient = PrivateAttr()
532
- """
533
- The async client to use for making requests.
534
- """
556
+ """The async client to use for making requests."""
535
557
 
536
558
  def _chat_params(
537
559
  self,
@@ -821,6 +843,28 @@ class ChatOllama(BaseChatModel):
821
843
  reasoning = kwargs.get("reasoning", self.reasoning)
822
844
  for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
823
845
  if not isinstance(stream_resp, str):
846
+ content = (
847
+ stream_resp["message"]["content"]
848
+ if "message" in stream_resp and "content" in stream_resp["message"]
849
+ else ""
850
+ )
851
+
852
+ # Warn and skip responses with done_reason: 'load' and empty content
853
+ # These indicate the model was loaded but no actual generation occurred
854
+ is_load_response_with_empty_content = (
855
+ stream_resp.get("done") is True
856
+ and stream_resp.get("done_reason") == "load"
857
+ and not content.strip()
858
+ )
859
+
860
+ if is_load_response_with_empty_content:
861
+ log.warning(
862
+ "Ollama returned empty response with done_reason='load'."
863
+ "This typically indicates the model was loaded but no content "
864
+ "was generated. Skipping this response."
865
+ )
866
+ continue
867
+
824
868
  if stream_resp.get("done") is True:
825
869
  generation_info = dict(stream_resp)
826
870
  if "model" in generation_info:
@@ -829,12 +873,6 @@ class ChatOllama(BaseChatModel):
829
873
  else:
830
874
  generation_info = None
831
875
 
832
- content = (
833
- stream_resp["message"]["content"]
834
- if "message" in stream_resp and "content" in stream_resp["message"]
835
- else ""
836
- )
837
-
838
876
  additional_kwargs = {}
839
877
  if (
840
878
  reasoning
@@ -881,6 +919,28 @@ class ChatOllama(BaseChatModel):
881
919
  reasoning = kwargs.get("reasoning", self.reasoning)
882
920
  async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
883
921
  if not isinstance(stream_resp, str):
922
+ content = (
923
+ stream_resp["message"]["content"]
924
+ if "message" in stream_resp and "content" in stream_resp["message"]
925
+ else ""
926
+ )
927
+
928
+ # Warn and skip responses with done_reason: 'load' and empty content
929
+ # These indicate the model was loaded but no actual generation occurred
930
+ is_load_response_with_empty_content = (
931
+ stream_resp.get("done") is True
932
+ and stream_resp.get("done_reason") == "load"
933
+ and not content.strip()
934
+ )
935
+
936
+ if is_load_response_with_empty_content:
937
+ log.warning(
938
+ "Ollama returned empty response with done_reason='load'. "
939
+ "This typically indicates the model was loaded but no content "
940
+ "was generated. Skipping this response."
941
+ )
942
+ continue
943
+
884
944
  if stream_resp.get("done") is True:
885
945
  generation_info = dict(stream_resp)
886
946
  if "model" in generation_info:
@@ -889,12 +949,6 @@ class ChatOllama(BaseChatModel):
889
949
  else:
890
950
  generation_info = None
891
951
 
892
- content = (
893
- stream_resp["message"]["content"]
894
- if "message" in stream_resp and "content" in stream_resp["message"]
895
- else ""
896
- )
897
-
898
952
  additional_kwargs = {}
899
953
  if (
900
954
  reasoning
@@ -993,8 +1047,7 @@ class ChatOllama(BaseChatModel):
993
1047
  """Model wrapper that returns outputs formatted to match the given schema.
994
1048
 
995
1049
  Args:
996
- schema:
997
- The output schema. Can be passed in as:
1050
+ schema: The output schema. Can be passed in as:
998
1051
 
999
1052
  - a Pydantic class,
1000
1053
  - a JSON schema
@@ -1010,35 +1063,35 @@ class ChatOllama(BaseChatModel):
1010
1063
 
1011
1064
  method: The method for steering model generation, one of:
1012
1065
 
1013
- - "json_schema":
1066
+ - ``'json_schema'``:
1014
1067
  Uses Ollama's `structured output API <https://ollama.com/blog/structured-outputs>`__
1015
- - "function_calling":
1068
+ - ``'function_calling'``:
1016
1069
  Uses Ollama's tool-calling API
1017
- - "json_mode":
1018
- Specifies ``format="json"``. Note that if using JSON mode then you
1070
+ - ``'json_mode'``:
1071
+ Specifies ``format='json'``. Note that if using JSON mode then you
1019
1072
  must include instructions for formatting the output into the
1020
1073
  desired schema into the model call.
1021
1074
 
1022
1075
  include_raw:
1023
1076
  If False then only the parsed structured output is returned. If
1024
1077
  an error occurs during model output parsing it will be raised. If True
1025
- then both the raw model response (a BaseMessage) and the parsed model
1078
+ then both the raw model response (a ``BaseMessage``) and the parsed model
1026
1079
  response will be returned. If an error occurs during output parsing it
1027
1080
  will be caught and returned as well. The final output is always a dict
1028
- with keys "raw", "parsed", and "parsing_error".
1081
+ with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
1029
1082
 
1030
1083
  kwargs: Additional keyword args aren't supported.
1031
1084
 
1032
1085
  Returns:
1033
1086
  A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
1034
1087
 
1035
- | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
1088
+ If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
1036
1089
 
1037
- | If ``include_raw`` is True, then Runnable outputs a dict with keys:
1090
+ If ``include_raw`` is True, then Runnable outputs a dict with keys:
1038
1091
 
1039
- - "raw": BaseMessage
1040
- - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
1041
- - "parsing_error": Optional[BaseException]
1092
+ - ``'raw'``: ``BaseMessage``
1093
+ - ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
1094
+ - ``'parsing_error'``: Optional[BaseException]
1042
1095
 
1043
1096
  .. versionchanged:: 0.2.2
1044
1097
 
@@ -1046,7 +1099,7 @@ class ChatOllama(BaseChatModel):
1046
1099
 
1047
1100
  .. versionchanged:: 0.3.0
1048
1101
 
1049
- Updated default ``method`` to ``"json_schema"``.
1102
+ Updated default ``method`` to ``'json_schema'``.
1050
1103
 
1051
1104
  .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False
1052
1105
 
@@ -1081,7 +1134,7 @@ class ChatOllama(BaseChatModel):
1081
1134
  # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
1082
1135
  # )
1083
1136
 
1084
- .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=True
1137
+ .. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_schema'``, ``include_raw=True``
1085
1138
 
1086
1139
  .. code-block:: python
1087
1140
 
@@ -1110,7 +1163,7 @@ class ChatOllama(BaseChatModel):
1110
1163
  # 'parsing_error': None
1111
1164
  # }
1112
1165
 
1113
- .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False
1166
+ .. dropdown:: Example: ``schema=Pydantic`` class, ``method='function_calling'``, ``include_raw=False``
1114
1167
 
1115
1168
  .. code-block:: python
1116
1169
 
@@ -1174,7 +1227,7 @@ class ChatOllama(BaseChatModel):
1174
1227
  # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
1175
1228
  # }
1176
1229
 
1177
- .. dropdown:: Example: schema=OpenAI function schema, method="function_calling", include_raw=False
1230
+ .. dropdown:: Example: ``schema=OpenAI`` function schema, ``method='function_calling'``, ``include_raw=False``
1178
1231
 
1179
1232
  .. code-block:: python
1180
1233
 
@@ -1204,7 +1257,7 @@ class ChatOllama(BaseChatModel):
1204
1257
  # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
1205
1258
  # }
1206
1259
 
1207
- .. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True
1260
+ .. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_mode'``, ``include_raw=True``
1208
1261
 
1209
1262
  .. code-block::
1210
1263
 
@@ -1232,6 +1285,7 @@ class ChatOllama(BaseChatModel):
1232
1285
  # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
1233
1286
  # 'parsing_error': None
1234
1287
  # }
1288
+
1235
1289
  """ # noqa: E501, D301
1236
1290
  _ = kwargs.pop("strict", None)
1237
1291
  if kwargs: