langchain-ollama 0.3.6__tar.gz → 0.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/PKG-INFO +3 -3
  2. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/__init__.py +3 -0
  3. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/_utils.py +6 -3
  4. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/chat_models.py +84 -79
  5. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/embeddings.py +40 -36
  6. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/llms.py +116 -35
  7. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/pyproject.toml +9 -13
  8. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/chat_models/test_chat_models.py +29 -25
  9. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/chat_models/test_chat_models_reasoning.py +58 -38
  10. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/chat_models/test_chat_models_standard.py +2 -2
  11. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/test_embeddings.py +3 -1
  12. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/test_llms.py +12 -9
  13. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/test_chat_models.py +50 -7
  14. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/test_embeddings.py +1 -1
  15. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/LICENSE +0 -0
  16. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/README.md +0 -0
  17. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/py.typed +0 -0
  18. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/__init__.py +0 -0
  19. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/__init__.py +0 -0
  20. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/chat_models/cassettes/test_chat_models_standard/TestChatOllama.test_stream_time.yaml +0 -0
  21. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/test_compile.py +0 -0
  22. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/__init__.py +0 -0
  23. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/test_imports.py +0 -0
  24. {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/test_llms.py +0 -0
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langchain-ollama
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: An integration package connecting Ollama and LangChain
5
5
  License: MIT
6
6
  Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
7
7
  Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
8
8
  Project-URL: repository, https://github.com/langchain-ai/langchain
9
9
  Requires-Python: >=3.9
10
- Requires-Dist: ollama<1.0.0,>=0.5.1
11
- Requires-Dist: langchain-core<1.0.0,>=0.3.70
10
+ Requires-Dist: ollama<1.0.0,>=0.5.3
11
+ Requires-Dist: langchain-core<1.0.0,>=0.3.76
12
12
  Description-Content-Type: text/markdown
13
13
 
14
14
  # langchain-ollama
@@ -10,6 +10,7 @@ service.
10
10
  exist locally. This is useful for ensuring that the model is available before
11
11
  attempting to use it, especially in environments where models may not be
12
12
  pre-downloaded.
13
+
13
14
  """
14
15
 
15
16
  from importlib import metadata
@@ -19,6 +20,8 @@ from langchain_ollama.embeddings import OllamaEmbeddings
19
20
  from langchain_ollama.llms import OllamaLLM
20
21
 
21
22
  try:
23
+ if __package__ is None:
24
+ raise metadata.PackageNotFoundError
22
25
  __version__ = metadata.version(__package__)
23
26
  except metadata.PackageNotFoundError:
24
27
  # Case where package metadata is not available.
@@ -1,11 +1,11 @@
1
- """Utility functions for validating Ollama models."""
1
+ """Utility function to validate Ollama models."""
2
2
 
3
3
  from httpx import ConnectError
4
4
  from ollama import Client, ResponseError
5
5
 
6
6
 
7
7
  def validate_model(client: Client, model_name: str) -> None:
8
- """Validate that a model exists in the Ollama instance.
8
+ """Validate that a model exists in the local Ollama instance.
9
9
 
10
10
  Args:
11
11
  client: The Ollama client.
@@ -29,7 +29,10 @@ def validate_model(client: Client, model_name: str) -> None:
29
29
  )
30
30
  raise ValueError(msg)
31
31
  except ConnectError as e:
32
- msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download" # noqa: E501
32
+ msg = (
33
+ "Failed to connect to Ollama. Please check that Ollama is downloaded, "
34
+ "running and accessible. https://ollama.com/download"
35
+ )
33
36
  raise ValueError(msg) from e
34
37
  except ResponseError as e:
35
38
  msg = (
@@ -87,8 +87,8 @@ def _parse_json_string(
87
87
  ) -> Any:
88
88
  """Attempt to parse a JSON string for tool calling.
89
89
 
90
- It first tries to use the standard json.loads. If that fails, it falls
91
- back to ast.literal_eval to safely parse Python literals, which is more
90
+ It first tries to use the standard ``json.loads``. If that fails, it falls
91
+ back to ``ast.literal_eval`` to safely parse Python literals, which is more
92
92
  robust against models using single quotes or containing apostrophes.
93
93
 
94
94
  Args:
@@ -100,7 +100,8 @@ def _parse_json_string(
100
100
  The parsed JSON string or Python literal.
101
101
 
102
102
  Raises:
103
- OutputParserException: If the string is invalid and skip=False.
103
+ OutputParserException: If the string is invalid and ``skip=False``.
104
+
104
105
  """
105
106
  try:
106
107
  return json.loads(json_string)
@@ -138,14 +139,20 @@ def _parse_arguments_from_tool_call(
138
139
 
139
140
  Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
140
141
  Should be removed/changed if fixed upstream.
142
+
141
143
  See https://github.com/ollama/ollama/issues/6155
144
+
142
145
  """
143
146
  if "function" not in raw_tool_call:
144
147
  return None
148
+ function_name = raw_tool_call["function"]["name"]
145
149
  arguments = raw_tool_call["function"]["arguments"]
146
150
  parsed_arguments: dict = {}
147
151
  if isinstance(arguments, dict):
148
152
  for key, value in arguments.items():
153
+ # Filter out metadata fields like 'functionName' that echo function name
154
+ if key == "functionName" and value == function_name:
155
+ continue
149
156
  if isinstance(value, str):
150
157
  parsed_value = _parse_json_string(
151
158
  value, skip=True, raw_tool_call=raw_tool_call
@@ -222,7 +229,7 @@ class ChatOllama(BaseChatModel):
222
229
 
223
230
  .. code-block:: bash
224
231
 
225
- ollama pull mistral:v0.3
232
+ ollama pull gpt-oss:20b
226
233
  pip install -U langchain-ollama
227
234
 
228
235
  Key init args — completion params:
@@ -255,7 +262,8 @@ class ChatOllama(BaseChatModel):
255
262
  from langchain_ollama import ChatOllama
256
263
 
257
264
  llm = ChatOllama(
258
- model = "llama3",
265
+ model = "gpt-oss:20b",
266
+ validate_model_on_init = True,
259
267
  temperature = 0.8,
260
268
  num_predict = 256,
261
269
  # other params ...
@@ -277,10 +285,7 @@ class ChatOllama(BaseChatModel):
277
285
  Stream:
278
286
  .. code-block:: python
279
287
 
280
- messages = [
281
- ("human", "Return the words Hello World!"),
282
- ]
283
- for chunk in llm.stream(messages):
288
+ for chunk in llm.stream("Return the words Hello World!"):
284
289
  print(chunk.text(), end="")
285
290
 
286
291
 
@@ -307,10 +312,7 @@ class ChatOllama(BaseChatModel):
307
312
  Async:
308
313
  .. code-block:: python
309
314
 
310
- messages = [
311
- ("human", "Hello how are you!"),
312
- ]
313
- await llm.ainvoke(messages)
315
+ await llm.ainvoke("Hello how are you!")
314
316
 
315
317
  .. code-block:: python
316
318
 
@@ -318,10 +320,7 @@ class ChatOllama(BaseChatModel):
318
320
 
319
321
  .. code-block:: python
320
322
 
321
- messages = [
322
- ("human", "Say hello world!"),
323
- ]
324
- async for chunk in llm.astream(messages):
323
+ async for chunk in llm.astream("Say hello world!"):
325
324
  print(chunk.content)
326
325
 
327
326
  .. code-block:: python
@@ -349,10 +348,7 @@ class ChatOllama(BaseChatModel):
349
348
 
350
349
 
351
350
  json_llm = ChatOllama(format="json")
352
- messages = [
353
- ("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."),
354
- ]
355
- llm.invoke(messages).content
351
+ llm.invoke("Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only.").content
356
352
 
357
353
  .. code-block:: python
358
354
 
@@ -399,17 +395,16 @@ class ChatOllama(BaseChatModel):
399
395
 
400
396
  llm = ChatOllama(
401
397
  model = "deepseek-r1:8b",
398
+ validate_model_on_init = True,
402
399
  reasoning= True,
403
400
  )
404
401
 
405
- user_message = HumanMessage(content="how many r in the word strawberry?")
406
- messages: List[Any] = [user_message]
407
- llm.invoke(messages)
402
+ llm.invoke("how many r in the word strawberry?")
408
403
 
409
404
  # or, on an invocation basis:
410
405
 
411
- llm.invoke(messages, reasoning=True)
412
- # or llm.stream(messages, reasoning=True)
406
+ llm.invoke("how many r in the word strawberry?", reasoning=True)
407
+ # or llm.stream("how many r in the word strawberry?", reasoning=True)
413
408
 
414
409
  # If not provided, the invocation will default to the ChatOllama reasoning
415
410
  # param provided (None by default).
@@ -418,13 +413,12 @@ class ChatOllama(BaseChatModel):
418
413
 
419
414
  AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
420
415
 
421
-
422
416
  """ # noqa: E501, pylint: disable=line-too-long
423
417
 
424
418
  model: str
425
419
  """Model name to use."""
426
420
 
427
- reasoning: Optional[bool] = None
421
+ reasoning: Optional[Union[bool, str]] = None
428
422
  """Controls the reasoning/thinking mode for
429
423
  `supported models <https://ollama.com/search?c=thinking>`__.
430
424
 
@@ -437,7 +431,13 @@ class ChatOllama(BaseChatModel):
437
431
  - ``None`` (Default): The model will use its default reasoning behavior. Note
438
432
  however, if the model's default behavior *is* to perform reasoning, think tags
439
433
  ()``<think>`` and ``</think>``) will be present within the main response content
440
- unless you set ``reasoning`` to ``True``."""
434
+ unless you set ``reasoning`` to ``True``.
435
+ - ``str``: e.g. ``'low'``, ``'medium'``, ``'high'``. Enables reasoning with a custom
436
+ intensity level. Currently, this is only supported ``gpt-oss``. See the
437
+ `Ollama docs <https://github.com/ollama/ollama-python/blob/da79e987f0ac0a4986bf396f043b36ef840370bc/ollama/_types.py#L210>`__
438
+ for more information.
439
+
440
+ """
441
441
 
442
442
  validate_model_on_init: bool = False
443
443
  """Whether to validate the model exists in Ollama locally on initialization.
@@ -447,26 +447,26 @@ class ChatOllama(BaseChatModel):
447
447
 
448
448
  mirostat: Optional[int] = None
449
449
  """Enable Mirostat sampling for controlling perplexity.
450
- (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
450
+ (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
451
451
 
452
452
  mirostat_eta: Optional[float] = None
453
453
  """Influences how quickly the algorithm responds to feedback
454
454
  from the generated text. A lower learning rate will result in
455
455
  slower adjustments, while a higher learning rate will make
456
- the algorithm more responsive. (Default: 0.1)"""
456
+ the algorithm more responsive. (Default: ``0.1``)"""
457
457
 
458
458
  mirostat_tau: Optional[float] = None
459
459
  """Controls the balance between coherence and diversity
460
460
  of the output. A lower value will result in more focused and
461
- coherent text. (Default: 5.0)"""
461
+ coherent text. (Default: ``5.0``)"""
462
462
 
463
463
  num_ctx: Optional[int] = None
464
464
  """Sets the size of the context window used to generate the
465
- next token. (Default: 2048) """
465
+ next token. (Default: ``2048``) """
466
466
 
467
467
  num_gpu: Optional[int] = None
468
- """The number of GPUs to use. On macOS it defaults to 1 to
469
- enable metal support, 0 to disable."""
468
+ """The number of GPUs to use. On macOS it defaults to ``1`` to
469
+ enable metal support, ``0`` to disable."""
470
470
 
471
471
  num_thread: Optional[int] = None
472
472
  """Sets the number of threads to use during computation.
@@ -476,20 +476,20 @@ class ChatOllama(BaseChatModel):
476
476
 
477
477
  num_predict: Optional[int] = None
478
478
  """Maximum number of tokens to predict when generating text.
479
- (Default: 128, -1 = infinite generation, -2 = fill context)"""
479
+ (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
480
480
 
481
481
  repeat_last_n: Optional[int] = None
482
482
  """Sets how far back for the model to look back to prevent
483
- repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
483
+ repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
484
484
 
485
485
  repeat_penalty: Optional[float] = None
486
- """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
487
- will penalize repetitions more strongly, while a lower value (e.g., 0.9)
488
- will be more lenient. (Default: 1.1)"""
486
+ """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
487
+ will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
488
+ will be more lenient. (Default: ``1.1``)"""
489
489
 
490
490
  temperature: Optional[float] = None
491
491
  """The temperature of the model. Increasing the temperature will
492
- make the model answer more creatively. (Default: 0.8)"""
492
+ make the model answer more creatively. (Default: ``0.8``)"""
493
493
 
494
494
  seed: Optional[int] = None
495
495
  """Sets the random number seed to use for generation. Setting this
@@ -501,21 +501,21 @@ class ChatOllama(BaseChatModel):
501
501
 
502
502
  tfs_z: Optional[float] = None
503
503
  """Tail free sampling is used to reduce the impact of less probable
504
- tokens from the output. A higher value (e.g., 2.0) will reduce the
505
- impact more, while a value of 1.0 disables this setting. (default: 1)"""
504
+ tokens from the output. A higher value (e.g., ``2.0``) will reduce the
505
+ impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
506
506
 
507
507
  top_k: Optional[int] = None
508
- """Reduces the probability of generating nonsense. A higher value (e.g. 100)
509
- will give more diverse answers, while a lower value (e.g. 10)
510
- will be more conservative. (Default: 40)"""
508
+ """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
509
+ will give more diverse answers, while a lower value (e.g. ``10``)
510
+ will be more conservative. (Default: ``40``)"""
511
511
 
512
512
  top_p: Optional[float] = None
513
- """Works together with top-k. A higher value (e.g., 0.95) will lead
514
- to more diverse text, while a lower value (e.g., 0.5) will
515
- generate more focused and conservative text. (Default: 0.9)"""
513
+ """Works together with top-k. A higher value (e.g., ``0.95``) will lead
514
+ to more diverse text, while a lower value (e.g., ``0.5``) will
515
+ generate more focused and conservative text. (Default: ``0.9``)"""
516
516
 
517
517
  format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
518
- """Specify the format of the output (options: "json", JSON schema)."""
518
+ """Specify the format of the output (options: ``'json'``, JSON schema)."""
519
519
 
520
520
  keep_alive: Optional[Union[int, str]] = None
521
521
  """How long the model will stay loaded into memory."""
@@ -525,32 +525,35 @@ class ChatOllama(BaseChatModel):
525
525
 
526
526
  client_kwargs: Optional[dict] = {}
527
527
  """Additional kwargs to pass to the httpx clients.
528
+
528
529
  These arguments are passed to both synchronous and async clients.
529
- Use sync_client_kwargs and async_client_kwargs to pass different arguments
530
+
531
+ Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
530
532
  to synchronous and asynchronous clients.
533
+
531
534
  """
532
535
 
533
536
  async_client_kwargs: Optional[dict] = {}
534
- """Additional kwargs to merge with client_kwargs before
537
+ """Additional kwargs to merge with ``client_kwargs`` before
535
538
  passing to the httpx AsyncClient.
539
+
536
540
  `Full list of params. <https://www.python-httpx.org/api/#asyncclient>`__
541
+
537
542
  """
538
543
 
539
544
  sync_client_kwargs: Optional[dict] = {}
540
- """Additional kwargs to merge with client_kwargs before
545
+ """Additional kwargs to merge with ``client_kwargs`` before
541
546
  passing to the httpx Client.
547
+
542
548
  `Full list of params. <https://www.python-httpx.org/api/#client>`__
549
+
543
550
  """
544
551
 
545
552
  _client: Client = PrivateAttr()
546
- """
547
- The client to use for making requests.
548
- """
553
+ """The client to use for making requests."""
549
554
 
550
555
  _async_client: AsyncClient = PrivateAttr()
551
- """
552
- The async client to use for making requests.
553
- """
556
+ """The async client to use for making requests."""
554
557
 
555
558
  def _chat_params(
556
559
  self,
@@ -658,8 +661,10 @@ class ChatOllama(BaseChatModel):
658
661
  if isinstance(message.content, str):
659
662
  content = message.content
660
663
  else:
661
- for content_part in cast(list[dict], message.content):
662
- if content_part.get("type") == "text":
664
+ for content_part in message.content:
665
+ if isinstance(content_part, str):
666
+ content += f"\n{content_part}"
667
+ elif content_part.get("type") == "text":
663
668
  content += f"\n{content_part['text']}"
664
669
  elif content_part.get("type") == "tool_use":
665
670
  continue
@@ -1044,8 +1049,7 @@ class ChatOllama(BaseChatModel):
1044
1049
  """Model wrapper that returns outputs formatted to match the given schema.
1045
1050
 
1046
1051
  Args:
1047
- schema:
1048
- The output schema. Can be passed in as:
1052
+ schema: The output schema. Can be passed in as:
1049
1053
 
1050
1054
  - a Pydantic class,
1051
1055
  - a JSON schema
@@ -1061,35 +1065,35 @@ class ChatOllama(BaseChatModel):
1061
1065
 
1062
1066
  method: The method for steering model generation, one of:
1063
1067
 
1064
- - "json_schema":
1068
+ - ``'json_schema'``:
1065
1069
  Uses Ollama's `structured output API <https://ollama.com/blog/structured-outputs>`__
1066
- - "function_calling":
1070
+ - ``'function_calling'``:
1067
1071
  Uses Ollama's tool-calling API
1068
- - "json_mode":
1069
- Specifies ``format="json"``. Note that if using JSON mode then you
1072
+ - ``'json_mode'``:
1073
+ Specifies ``format='json'``. Note that if using JSON mode then you
1070
1074
  must include instructions for formatting the output into the
1071
1075
  desired schema into the model call.
1072
1076
 
1073
1077
  include_raw:
1074
1078
  If False then only the parsed structured output is returned. If
1075
1079
  an error occurs during model output parsing it will be raised. If True
1076
- then both the raw model response (a BaseMessage) and the parsed model
1080
+ then both the raw model response (a ``BaseMessage``) and the parsed model
1077
1081
  response will be returned. If an error occurs during output parsing it
1078
1082
  will be caught and returned as well. The final output is always a dict
1079
- with keys "raw", "parsed", and "parsing_error".
1083
+ with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
1080
1084
 
1081
1085
  kwargs: Additional keyword args aren't supported.
1082
1086
 
1083
1087
  Returns:
1084
1088
  A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
1085
1089
 
1086
- | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
1090
+ If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
1087
1091
 
1088
- | If ``include_raw`` is True, then Runnable outputs a dict with keys:
1092
+ If ``include_raw`` is True, then Runnable outputs a dict with keys:
1089
1093
 
1090
- - "raw": BaseMessage
1091
- - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
1092
- - "parsing_error": Optional[BaseException]
1094
+ - ``'raw'``: ``BaseMessage``
1095
+ - ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
1096
+ - ``'parsing_error'``: Optional[BaseException]
1093
1097
 
1094
1098
  .. versionchanged:: 0.2.2
1095
1099
 
@@ -1097,7 +1101,7 @@ class ChatOllama(BaseChatModel):
1097
1101
 
1098
1102
  .. versionchanged:: 0.3.0
1099
1103
 
1100
- Updated default ``method`` to ``"json_schema"``.
1104
+ Updated default ``method`` to ``'json_schema'``.
1101
1105
 
1102
1106
  .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False
1103
1107
 
@@ -1132,7 +1136,7 @@ class ChatOllama(BaseChatModel):
1132
1136
  # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
1133
1137
  # )
1134
1138
 
1135
- .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=True
1139
+ .. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_schema'``, ``include_raw=True``
1136
1140
 
1137
1141
  .. code-block:: python
1138
1142
 
@@ -1161,7 +1165,7 @@ class ChatOllama(BaseChatModel):
1161
1165
  # 'parsing_error': None
1162
1166
  # }
1163
1167
 
1164
- .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False
1168
+ .. dropdown:: Example: ``schema=Pydantic`` class, ``method='function_calling'``, ``include_raw=False``
1165
1169
 
1166
1170
  .. code-block:: python
1167
1171
 
@@ -1225,7 +1229,7 @@ class ChatOllama(BaseChatModel):
1225
1229
  # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
1226
1230
  # }
1227
1231
 
1228
- .. dropdown:: Example: schema=OpenAI function schema, method="function_calling", include_raw=False
1232
+ .. dropdown:: Example: ``schema=OpenAI`` function schema, ``method='function_calling'``, ``include_raw=False``
1229
1233
 
1230
1234
  .. code-block:: python
1231
1235
 
@@ -1255,7 +1259,7 @@ class ChatOllama(BaseChatModel):
1255
1259
  # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
1256
1260
  # }
1257
1261
 
1258
- .. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True
1262
+ .. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_mode'``, ``include_raw=True``
1259
1263
 
1260
1264
  .. code-block::
1261
1265
 
@@ -1283,6 +1287,7 @@ class ChatOllama(BaseChatModel):
1283
1287
  # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
1284
1288
  # 'parsing_error': None
1285
1289
  # }
1290
+
1286
1291
  """ # noqa: E501, D301
1287
1292
  _ = kwargs.pop("strict", None)
1288
1293
  if kwargs:
@@ -21,12 +21,12 @@ class OllamaEmbeddings(BaseModel, Embeddings):
21
21
  """Ollama embedding model integration.
22
22
 
23
23
  Set up a local Ollama instance:
24
- Install the Ollama package and set up a local Ollama instance
25
- using the instructions here: https://github.com/ollama/ollama .
24
+ `Install the Ollama package <https://github.com/ollama/ollama>`__ and set up a
25
+ local Ollama instance.
26
26
 
27
27
  You will need to choose a model to serve.
28
28
 
29
- You can view a list of available models via the model library (https://ollama.com/library).
29
+ You can view a list of available models via `the model library <https://ollama.com/library>`__.
30
30
 
31
31
  To fetch a model from the Ollama model library use ``ollama pull <name-of-model>``.
32
32
 
@@ -39,8 +39,8 @@ class OllamaEmbeddings(BaseModel, Embeddings):
39
39
  This will download the default tagged version of the model.
40
40
  Typically, the default points to the latest, smallest sized-parameter model.
41
41
 
42
- * On Mac, the models will be downloaded to ~/.ollama/models
43
- * On Linux (or WSL), the models will be stored at /usr/share/ollama/.ollama/models
42
+ * On Mac, the models will be downloaded to ``~/.ollama/models``
43
+ * On Linux (or WSL), the models will be stored at ``/usr/share/ollama/.ollama/models``
44
44
 
45
45
  You can specify the exact version of the model of interest
46
46
  as such ``ollama pull vicuna:13b-v1.5-16k-q4_0``.
@@ -122,6 +122,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
122
122
  .. code-block:: python
123
123
 
124
124
  [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188]
125
+
125
126
  """ # noqa: E501
126
127
 
127
128
  model: str
@@ -131,6 +132,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
131
132
  """Whether to validate the model exists in ollama locally on initialization.
132
133
 
133
134
  .. versionadded:: 0.3.4
135
+
134
136
  """
135
137
 
136
138
  base_url: Optional[str] = None
@@ -138,60 +140,62 @@ class OllamaEmbeddings(BaseModel, Embeddings):
138
140
 
139
141
  client_kwargs: Optional[dict] = {}
140
142
  """Additional kwargs to pass to the httpx clients.
143
+
141
144
  These arguments are passed to both synchronous and async clients.
142
- Use sync_client_kwargs and async_client_kwargs to pass different arguments
145
+
146
+ Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
143
147
  to synchronous and asynchronous clients.
148
+
144
149
  """
145
150
 
146
151
  async_client_kwargs: Optional[dict] = {}
147
- """Additional kwargs to merge with client_kwargs before passing to the httpx
152
+ """Additional kwargs to merge with ``client_kwargs`` before passing to the httpx
148
153
  AsyncClient.
149
154
 
150
155
  For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
156
+
151
157
  """
152
158
 
153
159
  sync_client_kwargs: Optional[dict] = {}
154
- """Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
160
+ """Additional kwargs to merge with ``client_kwargs`` before
161
+ passing to the HTTPX Client.
155
162
 
156
163
  For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
164
+
157
165
  """
158
166
 
159
167
  _client: Optional[Client] = PrivateAttr(default=None)
160
- """
161
- The client to use for making requests.
162
- """
168
+ """The client to use for making requests."""
163
169
 
164
170
  _async_client: Optional[AsyncClient] = PrivateAttr(default=None)
165
- """
166
- The async client to use for making requests.
167
- """
171
+ """The async client to use for making requests."""
168
172
 
169
173
  mirostat: Optional[int] = None
170
174
  """Enable Mirostat sampling for controlling perplexity.
171
- (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
175
+ (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
172
176
 
173
177
  mirostat_eta: Optional[float] = None
174
178
  """Influences how quickly the algorithm responds to feedback
175
179
  from the generated text. A lower learning rate will result in
176
180
  slower adjustments, while a higher learning rate will make
177
- the algorithm more responsive. (Default: 0.1)"""
181
+ the algorithm more responsive. (Default: ``0.1``)"""
178
182
 
179
183
  mirostat_tau: Optional[float] = None
180
184
  """Controls the balance between coherence and diversity
181
185
  of the output. A lower value will result in more focused and
182
- coherent text. (Default: 5.0)"""
186
+ coherent text. (Default: ``5.0``)"""
183
187
 
184
188
  num_ctx: Optional[int] = None
185
189
  """Sets the size of the context window used to generate the
186
- next token. (Default: 2048) """
190
+ next token. (Default: ``2048``) """
187
191
 
188
192
  num_gpu: Optional[int] = None
189
- """The number of GPUs to use. On macOS it defaults to 1 to
190
- enable metal support, 0 to disable."""
193
+ """The number of GPUs to use. On macOS it defaults to ``1`` to
194
+ enable metal support, ``0`` to disable."""
191
195
 
192
196
  keep_alive: Optional[int] = None
193
- """controls how long the model will stay loaded into memory
194
- following the request (default: 5m)
197
+ """Controls how long the model will stay loaded into memory
198
+ following the request (default: ``5m``)
195
199
  """
196
200
 
197
201
  num_thread: Optional[int] = None
@@ -202,34 +206,34 @@ class OllamaEmbeddings(BaseModel, Embeddings):
202
206
 
203
207
  repeat_last_n: Optional[int] = None
204
208
  """Sets how far back for the model to look back to prevent
205
- repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
209
+ repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
206
210
 
207
211
  repeat_penalty: Optional[float] = None
208
- """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
209
- will penalize repetitions more strongly, while a lower value (e.g., 0.9)
210
- will be more lenient. (Default: 1.1)"""
212
+ """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
213
+ will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
214
+ will be more lenient. (Default: ``1.1``)"""
211
215
 
212
216
  temperature: Optional[float] = None
213
217
  """The temperature of the model. Increasing the temperature will
214
- make the model answer more creatively. (Default: 0.8)"""
218
+ make the model answer more creatively. (Default: ``0.8``)"""
215
219
 
216
220
  stop: Optional[list[str]] = None
217
221
  """Sets the stop tokens to use."""
218
222
 
219
223
  tfs_z: Optional[float] = None
220
224
  """Tail free sampling is used to reduce the impact of less probable
221
- tokens from the output. A higher value (e.g., 2.0) will reduce the
222
- impact more, while a value of 1.0 disables this setting. (default: 1)"""
225
+ tokens from the output. A higher value (e.g., ``2.0``) will reduce the
226
+ impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
223
227
 
224
228
  top_k: Optional[int] = None
225
- """Reduces the probability of generating nonsense. A higher value (e.g. 100)
226
- will give more diverse answers, while a lower value (e.g. 10)
227
- will be more conservative. (Default: 40)"""
229
+ """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
230
+ will give more diverse answers, while a lower value (e.g. ``10``)
231
+ will be more conservative. (Default: ``40``)"""
228
232
 
229
233
  top_p: Optional[float] = None
230
- """Works together with top-k. A higher value (e.g., 0.95) will lead
231
- to more diverse text, while a lower value (e.g., 0.5) will
232
- generate more focused and conservative text. (Default: 0.9)"""
234
+ """Works together with top-k. A higher value (e.g., ``0.95``) will lead
235
+ to more diverse text, while a lower value (e.g., ``0.5``) will
236
+ generate more focused and conservative text. (Default: ``0.9``)"""
233
237
 
234
238
  model_config = ConfigDict(
235
239
  extra="forbid",
@@ -256,7 +260,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
256
260
 
257
261
  @model_validator(mode="after")
258
262
  def _set_clients(self) -> Self:
259
- """Set clients to use for ollama."""
263
+ """Set clients to use for Ollama."""
260
264
  client_kwargs = self.client_kwargs or {}
261
265
 
262
266
  sync_client_kwargs = client_kwargs