langchain-ollama 0.3.6__tar.gz → 0.3.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/PKG-INFO +3 -3
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/__init__.py +3 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/_utils.py +6 -3
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/chat_models.py +84 -79
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/embeddings.py +40 -36
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/llms.py +116 -35
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/pyproject.toml +9 -13
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/chat_models/test_chat_models.py +29 -25
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/chat_models/test_chat_models_reasoning.py +58 -38
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/chat_models/test_chat_models_standard.py +2 -2
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/test_embeddings.py +3 -1
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/test_llms.py +12 -9
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/test_chat_models.py +50 -7
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/test_embeddings.py +1 -1
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/LICENSE +0 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/README.md +0 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/langchain_ollama/py.typed +0 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/__init__.py +0 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/__init__.py +0 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/chat_models/cassettes/test_chat_models_standard/TestChatOllama.test_stream_time.yaml +0 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/integration_tests/test_compile.py +0 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/__init__.py +0 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/test_imports.py +0 -0
- {langchain_ollama-0.3.6 → langchain_ollama-0.3.8}/tests/unit_tests/test_llms.py +0 -0
@@ -1,14 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langchain-ollama
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.8
|
4
4
|
Summary: An integration package connecting Ollama and LangChain
|
5
5
|
License: MIT
|
6
6
|
Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
|
7
7
|
Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
|
8
8
|
Project-URL: repository, https://github.com/langchain-ai/langchain
|
9
9
|
Requires-Python: >=3.9
|
10
|
-
Requires-Dist: ollama<1.0.0,>=0.5.
|
11
|
-
Requires-Dist: langchain-core<1.0.0,>=0.3.
|
10
|
+
Requires-Dist: ollama<1.0.0,>=0.5.3
|
11
|
+
Requires-Dist: langchain-core<1.0.0,>=0.3.76
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
|
14
14
|
# langchain-ollama
|
@@ -10,6 +10,7 @@ service.
|
|
10
10
|
exist locally. This is useful for ensuring that the model is available before
|
11
11
|
attempting to use it, especially in environments where models may not be
|
12
12
|
pre-downloaded.
|
13
|
+
|
13
14
|
"""
|
14
15
|
|
15
16
|
from importlib import metadata
|
@@ -19,6 +20,8 @@ from langchain_ollama.embeddings import OllamaEmbeddings
|
|
19
20
|
from langchain_ollama.llms import OllamaLLM
|
20
21
|
|
21
22
|
try:
|
23
|
+
if __package__ is None:
|
24
|
+
raise metadata.PackageNotFoundError
|
22
25
|
__version__ = metadata.version(__package__)
|
23
26
|
except metadata.PackageNotFoundError:
|
24
27
|
# Case where package metadata is not available.
|
@@ -1,11 +1,11 @@
|
|
1
|
-
"""Utility
|
1
|
+
"""Utility function to validate Ollama models."""
|
2
2
|
|
3
3
|
from httpx import ConnectError
|
4
4
|
from ollama import Client, ResponseError
|
5
5
|
|
6
6
|
|
7
7
|
def validate_model(client: Client, model_name: str) -> None:
|
8
|
-
"""Validate that a model exists in the Ollama instance.
|
8
|
+
"""Validate that a model exists in the local Ollama instance.
|
9
9
|
|
10
10
|
Args:
|
11
11
|
client: The Ollama client.
|
@@ -29,7 +29,10 @@ def validate_model(client: Client, model_name: str) -> None:
|
|
29
29
|
)
|
30
30
|
raise ValueError(msg)
|
31
31
|
except ConnectError as e:
|
32
|
-
msg =
|
32
|
+
msg = (
|
33
|
+
"Failed to connect to Ollama. Please check that Ollama is downloaded, "
|
34
|
+
"running and accessible. https://ollama.com/download"
|
35
|
+
)
|
33
36
|
raise ValueError(msg) from e
|
34
37
|
except ResponseError as e:
|
35
38
|
msg = (
|
@@ -87,8 +87,8 @@ def _parse_json_string(
|
|
87
87
|
) -> Any:
|
88
88
|
"""Attempt to parse a JSON string for tool calling.
|
89
89
|
|
90
|
-
It first tries to use the standard json.loads
|
91
|
-
back to ast.literal_eval to safely parse Python literals, which is more
|
90
|
+
It first tries to use the standard ``json.loads``. If that fails, it falls
|
91
|
+
back to ``ast.literal_eval`` to safely parse Python literals, which is more
|
92
92
|
robust against models using single quotes or containing apostrophes.
|
93
93
|
|
94
94
|
Args:
|
@@ -100,7 +100,8 @@ def _parse_json_string(
|
|
100
100
|
The parsed JSON string or Python literal.
|
101
101
|
|
102
102
|
Raises:
|
103
|
-
OutputParserException: If the string is invalid and skip=False
|
103
|
+
OutputParserException: If the string is invalid and ``skip=False``.
|
104
|
+
|
104
105
|
"""
|
105
106
|
try:
|
106
107
|
return json.loads(json_string)
|
@@ -138,14 +139,20 @@ def _parse_arguments_from_tool_call(
|
|
138
139
|
|
139
140
|
Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
|
140
141
|
Should be removed/changed if fixed upstream.
|
142
|
+
|
141
143
|
See https://github.com/ollama/ollama/issues/6155
|
144
|
+
|
142
145
|
"""
|
143
146
|
if "function" not in raw_tool_call:
|
144
147
|
return None
|
148
|
+
function_name = raw_tool_call["function"]["name"]
|
145
149
|
arguments = raw_tool_call["function"]["arguments"]
|
146
150
|
parsed_arguments: dict = {}
|
147
151
|
if isinstance(arguments, dict):
|
148
152
|
for key, value in arguments.items():
|
153
|
+
# Filter out metadata fields like 'functionName' that echo function name
|
154
|
+
if key == "functionName" and value == function_name:
|
155
|
+
continue
|
149
156
|
if isinstance(value, str):
|
150
157
|
parsed_value = _parse_json_string(
|
151
158
|
value, skip=True, raw_tool_call=raw_tool_call
|
@@ -222,7 +229,7 @@ class ChatOllama(BaseChatModel):
|
|
222
229
|
|
223
230
|
.. code-block:: bash
|
224
231
|
|
225
|
-
ollama pull
|
232
|
+
ollama pull gpt-oss:20b
|
226
233
|
pip install -U langchain-ollama
|
227
234
|
|
228
235
|
Key init args — completion params:
|
@@ -255,7 +262,8 @@ class ChatOllama(BaseChatModel):
|
|
255
262
|
from langchain_ollama import ChatOllama
|
256
263
|
|
257
264
|
llm = ChatOllama(
|
258
|
-
model = "
|
265
|
+
model = "gpt-oss:20b",
|
266
|
+
validate_model_on_init = True,
|
259
267
|
temperature = 0.8,
|
260
268
|
num_predict = 256,
|
261
269
|
# other params ...
|
@@ -277,10 +285,7 @@ class ChatOllama(BaseChatModel):
|
|
277
285
|
Stream:
|
278
286
|
.. code-block:: python
|
279
287
|
|
280
|
-
|
281
|
-
("human", "Return the words Hello World!"),
|
282
|
-
]
|
283
|
-
for chunk in llm.stream(messages):
|
288
|
+
for chunk in llm.stream("Return the words Hello World!"):
|
284
289
|
print(chunk.text(), end="")
|
285
290
|
|
286
291
|
|
@@ -307,10 +312,7 @@ class ChatOllama(BaseChatModel):
|
|
307
312
|
Async:
|
308
313
|
.. code-block:: python
|
309
314
|
|
310
|
-
|
311
|
-
("human", "Hello how are you!"),
|
312
|
-
]
|
313
|
-
await llm.ainvoke(messages)
|
315
|
+
await llm.ainvoke("Hello how are you!")
|
314
316
|
|
315
317
|
.. code-block:: python
|
316
318
|
|
@@ -318,10 +320,7 @@ class ChatOllama(BaseChatModel):
|
|
318
320
|
|
319
321
|
.. code-block:: python
|
320
322
|
|
321
|
-
|
322
|
-
("human", "Say hello world!"),
|
323
|
-
]
|
324
|
-
async for chunk in llm.astream(messages):
|
323
|
+
async for chunk in llm.astream("Say hello world!"):
|
325
324
|
print(chunk.content)
|
326
325
|
|
327
326
|
.. code-block:: python
|
@@ -349,10 +348,7 @@ class ChatOllama(BaseChatModel):
|
|
349
348
|
|
350
349
|
|
351
350
|
json_llm = ChatOllama(format="json")
|
352
|
-
|
353
|
-
("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."),
|
354
|
-
]
|
355
|
-
llm.invoke(messages).content
|
351
|
+
llm.invoke("Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only.").content
|
356
352
|
|
357
353
|
.. code-block:: python
|
358
354
|
|
@@ -399,17 +395,16 @@ class ChatOllama(BaseChatModel):
|
|
399
395
|
|
400
396
|
llm = ChatOllama(
|
401
397
|
model = "deepseek-r1:8b",
|
398
|
+
validate_model_on_init = True,
|
402
399
|
reasoning= True,
|
403
400
|
)
|
404
401
|
|
405
|
-
|
406
|
-
messages: List[Any] = [user_message]
|
407
|
-
llm.invoke(messages)
|
402
|
+
llm.invoke("how many r in the word strawberry?")
|
408
403
|
|
409
404
|
# or, on an invocation basis:
|
410
405
|
|
411
|
-
llm.invoke(
|
412
|
-
# or llm.stream(
|
406
|
+
llm.invoke("how many r in the word strawberry?", reasoning=True)
|
407
|
+
# or llm.stream("how many r in the word strawberry?", reasoning=True)
|
413
408
|
|
414
409
|
# If not provided, the invocation will default to the ChatOllama reasoning
|
415
410
|
# param provided (None by default).
|
@@ -418,13 +413,12 @@ class ChatOllama(BaseChatModel):
|
|
418
413
|
|
419
414
|
AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
|
420
415
|
|
421
|
-
|
422
416
|
""" # noqa: E501, pylint: disable=line-too-long
|
423
417
|
|
424
418
|
model: str
|
425
419
|
"""Model name to use."""
|
426
420
|
|
427
|
-
reasoning: Optional[bool] = None
|
421
|
+
reasoning: Optional[Union[bool, str]] = None
|
428
422
|
"""Controls the reasoning/thinking mode for
|
429
423
|
`supported models <https://ollama.com/search?c=thinking>`__.
|
430
424
|
|
@@ -437,7 +431,13 @@ class ChatOllama(BaseChatModel):
|
|
437
431
|
- ``None`` (Default): The model will use its default reasoning behavior. Note
|
438
432
|
however, if the model's default behavior *is* to perform reasoning, think tags
|
439
433
|
()``<think>`` and ``</think>``) will be present within the main response content
|
440
|
-
unless you set ``reasoning`` to ``True``.
|
434
|
+
unless you set ``reasoning`` to ``True``.
|
435
|
+
- ``str``: e.g. ``'low'``, ``'medium'``, ``'high'``. Enables reasoning with a custom
|
436
|
+
intensity level. Currently, this is only supported ``gpt-oss``. See the
|
437
|
+
`Ollama docs <https://github.com/ollama/ollama-python/blob/da79e987f0ac0a4986bf396f043b36ef840370bc/ollama/_types.py#L210>`__
|
438
|
+
for more information.
|
439
|
+
|
440
|
+
"""
|
441
441
|
|
442
442
|
validate_model_on_init: bool = False
|
443
443
|
"""Whether to validate the model exists in Ollama locally on initialization.
|
@@ -447,26 +447,26 @@ class ChatOllama(BaseChatModel):
|
|
447
447
|
|
448
448
|
mirostat: Optional[int] = None
|
449
449
|
"""Enable Mirostat sampling for controlling perplexity.
|
450
|
-
(default: 0
|
450
|
+
(default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
|
451
451
|
|
452
452
|
mirostat_eta: Optional[float] = None
|
453
453
|
"""Influences how quickly the algorithm responds to feedback
|
454
454
|
from the generated text. A lower learning rate will result in
|
455
455
|
slower adjustments, while a higher learning rate will make
|
456
|
-
the algorithm more responsive. (Default: 0.1)"""
|
456
|
+
the algorithm more responsive. (Default: ``0.1``)"""
|
457
457
|
|
458
458
|
mirostat_tau: Optional[float] = None
|
459
459
|
"""Controls the balance between coherence and diversity
|
460
460
|
of the output. A lower value will result in more focused and
|
461
|
-
coherent text. (Default: 5.0)"""
|
461
|
+
coherent text. (Default: ``5.0``)"""
|
462
462
|
|
463
463
|
num_ctx: Optional[int] = None
|
464
464
|
"""Sets the size of the context window used to generate the
|
465
|
-
next token. (Default: 2048) """
|
465
|
+
next token. (Default: ``2048``) """
|
466
466
|
|
467
467
|
num_gpu: Optional[int] = None
|
468
|
-
"""The number of GPUs to use. On macOS it defaults to 1 to
|
469
|
-
enable metal support, 0 to disable."""
|
468
|
+
"""The number of GPUs to use. On macOS it defaults to ``1`` to
|
469
|
+
enable metal support, ``0`` to disable."""
|
470
470
|
|
471
471
|
num_thread: Optional[int] = None
|
472
472
|
"""Sets the number of threads to use during computation.
|
@@ -476,20 +476,20 @@ class ChatOllama(BaseChatModel):
|
|
476
476
|
|
477
477
|
num_predict: Optional[int] = None
|
478
478
|
"""Maximum number of tokens to predict when generating text.
|
479
|
-
(Default: 128
|
479
|
+
(Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
|
480
480
|
|
481
481
|
repeat_last_n: Optional[int] = None
|
482
482
|
"""Sets how far back for the model to look back to prevent
|
483
|
-
repetition. (Default: 64
|
483
|
+
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
|
484
484
|
|
485
485
|
repeat_penalty: Optional[float] = None
|
486
|
-
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
487
|
-
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
488
|
-
will be more lenient. (Default: 1.1)"""
|
486
|
+
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
|
487
|
+
will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
|
488
|
+
will be more lenient. (Default: ``1.1``)"""
|
489
489
|
|
490
490
|
temperature: Optional[float] = None
|
491
491
|
"""The temperature of the model. Increasing the temperature will
|
492
|
-
make the model answer more creatively. (Default: 0.8)"""
|
492
|
+
make the model answer more creatively. (Default: ``0.8``)"""
|
493
493
|
|
494
494
|
seed: Optional[int] = None
|
495
495
|
"""Sets the random number seed to use for generation. Setting this
|
@@ -501,21 +501,21 @@ class ChatOllama(BaseChatModel):
|
|
501
501
|
|
502
502
|
tfs_z: Optional[float] = None
|
503
503
|
"""Tail free sampling is used to reduce the impact of less probable
|
504
|
-
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
505
|
-
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
504
|
+
tokens from the output. A higher value (e.g., ``2.0``) will reduce the
|
505
|
+
impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
|
506
506
|
|
507
507
|
top_k: Optional[int] = None
|
508
|
-
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
509
|
-
will give more diverse answers, while a lower value (e.g. 10)
|
510
|
-
will be more conservative. (Default: 40)"""
|
508
|
+
"""Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
|
509
|
+
will give more diverse answers, while a lower value (e.g. ``10``)
|
510
|
+
will be more conservative. (Default: ``40``)"""
|
511
511
|
|
512
512
|
top_p: Optional[float] = None
|
513
|
-
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
514
|
-
to more diverse text, while a lower value (e.g., 0.5) will
|
515
|
-
generate more focused and conservative text. (Default: 0.9)"""
|
513
|
+
"""Works together with top-k. A higher value (e.g., ``0.95``) will lead
|
514
|
+
to more diverse text, while a lower value (e.g., ``0.5``) will
|
515
|
+
generate more focused and conservative text. (Default: ``0.9``)"""
|
516
516
|
|
517
517
|
format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
|
518
|
-
"""Specify the format of the output (options:
|
518
|
+
"""Specify the format of the output (options: ``'json'``, JSON schema)."""
|
519
519
|
|
520
520
|
keep_alive: Optional[Union[int, str]] = None
|
521
521
|
"""How long the model will stay loaded into memory."""
|
@@ -525,32 +525,35 @@ class ChatOllama(BaseChatModel):
|
|
525
525
|
|
526
526
|
client_kwargs: Optional[dict] = {}
|
527
527
|
"""Additional kwargs to pass to the httpx clients.
|
528
|
+
|
528
529
|
These arguments are passed to both synchronous and async clients.
|
529
|
-
|
530
|
+
|
531
|
+
Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
|
530
532
|
to synchronous and asynchronous clients.
|
533
|
+
|
531
534
|
"""
|
532
535
|
|
533
536
|
async_client_kwargs: Optional[dict] = {}
|
534
|
-
"""Additional kwargs to merge with client_kwargs before
|
537
|
+
"""Additional kwargs to merge with ``client_kwargs`` before
|
535
538
|
passing to the httpx AsyncClient.
|
539
|
+
|
536
540
|
`Full list of params. <https://www.python-httpx.org/api/#asyncclient>`__
|
541
|
+
|
537
542
|
"""
|
538
543
|
|
539
544
|
sync_client_kwargs: Optional[dict] = {}
|
540
|
-
"""Additional kwargs to merge with client_kwargs before
|
545
|
+
"""Additional kwargs to merge with ``client_kwargs`` before
|
541
546
|
passing to the httpx Client.
|
547
|
+
|
542
548
|
`Full list of params. <https://www.python-httpx.org/api/#client>`__
|
549
|
+
|
543
550
|
"""
|
544
551
|
|
545
552
|
_client: Client = PrivateAttr()
|
546
|
-
"""
|
547
|
-
The client to use for making requests.
|
548
|
-
"""
|
553
|
+
"""The client to use for making requests."""
|
549
554
|
|
550
555
|
_async_client: AsyncClient = PrivateAttr()
|
551
|
-
"""
|
552
|
-
The async client to use for making requests.
|
553
|
-
"""
|
556
|
+
"""The async client to use for making requests."""
|
554
557
|
|
555
558
|
def _chat_params(
|
556
559
|
self,
|
@@ -658,8 +661,10 @@ class ChatOllama(BaseChatModel):
|
|
658
661
|
if isinstance(message.content, str):
|
659
662
|
content = message.content
|
660
663
|
else:
|
661
|
-
for content_part in
|
662
|
-
if content_part
|
664
|
+
for content_part in message.content:
|
665
|
+
if isinstance(content_part, str):
|
666
|
+
content += f"\n{content_part}"
|
667
|
+
elif content_part.get("type") == "text":
|
663
668
|
content += f"\n{content_part['text']}"
|
664
669
|
elif content_part.get("type") == "tool_use":
|
665
670
|
continue
|
@@ -1044,8 +1049,7 @@ class ChatOllama(BaseChatModel):
|
|
1044
1049
|
"""Model wrapper that returns outputs formatted to match the given schema.
|
1045
1050
|
|
1046
1051
|
Args:
|
1047
|
-
schema:
|
1048
|
-
The output schema. Can be passed in as:
|
1052
|
+
schema: The output schema. Can be passed in as:
|
1049
1053
|
|
1050
1054
|
- a Pydantic class,
|
1051
1055
|
- a JSON schema
|
@@ -1061,35 +1065,35 @@ class ChatOllama(BaseChatModel):
|
|
1061
1065
|
|
1062
1066
|
method: The method for steering model generation, one of:
|
1063
1067
|
|
1064
|
-
-
|
1068
|
+
- ``'json_schema'``:
|
1065
1069
|
Uses Ollama's `structured output API <https://ollama.com/blog/structured-outputs>`__
|
1066
|
-
-
|
1070
|
+
- ``'function_calling'``:
|
1067
1071
|
Uses Ollama's tool-calling API
|
1068
|
-
-
|
1069
|
-
Specifies ``format=
|
1072
|
+
- ``'json_mode'``:
|
1073
|
+
Specifies ``format='json'``. Note that if using JSON mode then you
|
1070
1074
|
must include instructions for formatting the output into the
|
1071
1075
|
desired schema into the model call.
|
1072
1076
|
|
1073
1077
|
include_raw:
|
1074
1078
|
If False then only the parsed structured output is returned. If
|
1075
1079
|
an error occurs during model output parsing it will be raised. If True
|
1076
|
-
then both the raw model response (a BaseMessage) and the parsed model
|
1080
|
+
then both the raw model response (a ``BaseMessage``) and the parsed model
|
1077
1081
|
response will be returned. If an error occurs during output parsing it
|
1078
1082
|
will be caught and returned as well. The final output is always a dict
|
1079
|
-
with keys
|
1083
|
+
with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
|
1080
1084
|
|
1081
1085
|
kwargs: Additional keyword args aren't supported.
|
1082
1086
|
|
1083
1087
|
Returns:
|
1084
1088
|
A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
|
1085
1089
|
|
1086
|
-
|
1090
|
+
If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
|
1087
1091
|
|
1088
|
-
|
1092
|
+
If ``include_raw`` is True, then Runnable outputs a dict with keys:
|
1089
1093
|
|
1090
|
-
-
|
1091
|
-
-
|
1092
|
-
-
|
1094
|
+
- ``'raw'``: ``BaseMessage``
|
1095
|
+
- ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
|
1096
|
+
- ``'parsing_error'``: Optional[BaseException]
|
1093
1097
|
|
1094
1098
|
.. versionchanged:: 0.2.2
|
1095
1099
|
|
@@ -1097,7 +1101,7 @@ class ChatOllama(BaseChatModel):
|
|
1097
1101
|
|
1098
1102
|
.. versionchanged:: 0.3.0
|
1099
1103
|
|
1100
|
-
Updated default ``method`` to ``
|
1104
|
+
Updated default ``method`` to ``'json_schema'``.
|
1101
1105
|
|
1102
1106
|
.. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False
|
1103
1107
|
|
@@ -1132,7 +1136,7 @@ class ChatOllama(BaseChatModel):
|
|
1132
1136
|
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
|
1133
1137
|
# )
|
1134
1138
|
|
1135
|
-
.. dropdown:: Example: schema=Pydantic class, method=
|
1139
|
+
.. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_schema'``, ``include_raw=True``
|
1136
1140
|
|
1137
1141
|
.. code-block:: python
|
1138
1142
|
|
@@ -1161,7 +1165,7 @@ class ChatOllama(BaseChatModel):
|
|
1161
1165
|
# 'parsing_error': None
|
1162
1166
|
# }
|
1163
1167
|
|
1164
|
-
.. dropdown:: Example: schema=Pydantic class, method=
|
1168
|
+
.. dropdown:: Example: ``schema=Pydantic`` class, ``method='function_calling'``, ``include_raw=False``
|
1165
1169
|
|
1166
1170
|
.. code-block:: python
|
1167
1171
|
|
@@ -1225,7 +1229,7 @@ class ChatOllama(BaseChatModel):
|
|
1225
1229
|
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
1226
1230
|
# }
|
1227
1231
|
|
1228
|
-
.. dropdown:: Example: schema=OpenAI function schema, method=
|
1232
|
+
.. dropdown:: Example: ``schema=OpenAI`` function schema, ``method='function_calling'``, ``include_raw=False``
|
1229
1233
|
|
1230
1234
|
.. code-block:: python
|
1231
1235
|
|
@@ -1255,7 +1259,7 @@ class ChatOllama(BaseChatModel):
|
|
1255
1259
|
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
1256
1260
|
# }
|
1257
1261
|
|
1258
|
-
.. dropdown:: Example: schema=Pydantic class, method=
|
1262
|
+
.. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_mode'``, ``include_raw=True``
|
1259
1263
|
|
1260
1264
|
.. code-block::
|
1261
1265
|
|
@@ -1283,6 +1287,7 @@ class ChatOllama(BaseChatModel):
|
|
1283
1287
|
# 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
|
1284
1288
|
# 'parsing_error': None
|
1285
1289
|
# }
|
1290
|
+
|
1286
1291
|
""" # noqa: E501, D301
|
1287
1292
|
_ = kwargs.pop("strict", None)
|
1288
1293
|
if kwargs:
|
@@ -21,12 +21,12 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
21
21
|
"""Ollama embedding model integration.
|
22
22
|
|
23
23
|
Set up a local Ollama instance:
|
24
|
-
Install the Ollama package and set up a
|
25
|
-
|
24
|
+
`Install the Ollama package <https://github.com/ollama/ollama>`__ and set up a
|
25
|
+
local Ollama instance.
|
26
26
|
|
27
27
|
You will need to choose a model to serve.
|
28
28
|
|
29
|
-
You can view a list of available models via the model library
|
29
|
+
You can view a list of available models via `the model library <https://ollama.com/library>`__.
|
30
30
|
|
31
31
|
To fetch a model from the Ollama model library use ``ollama pull <name-of-model>``.
|
32
32
|
|
@@ -39,8 +39,8 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
39
39
|
This will download the default tagged version of the model.
|
40
40
|
Typically, the default points to the latest, smallest sized-parameter model.
|
41
41
|
|
42
|
-
* On Mac, the models will be downloaded to
|
43
|
-
* On Linux (or WSL), the models will be stored at
|
42
|
+
* On Mac, the models will be downloaded to ``~/.ollama/models``
|
43
|
+
* On Linux (or WSL), the models will be stored at ``/usr/share/ollama/.ollama/models``
|
44
44
|
|
45
45
|
You can specify the exact version of the model of interest
|
46
46
|
as such ``ollama pull vicuna:13b-v1.5-16k-q4_0``.
|
@@ -122,6 +122,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
122
122
|
.. code-block:: python
|
123
123
|
|
124
124
|
[-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188]
|
125
|
+
|
125
126
|
""" # noqa: E501
|
126
127
|
|
127
128
|
model: str
|
@@ -131,6 +132,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
131
132
|
"""Whether to validate the model exists in ollama locally on initialization.
|
132
133
|
|
133
134
|
.. versionadded:: 0.3.4
|
135
|
+
|
134
136
|
"""
|
135
137
|
|
136
138
|
base_url: Optional[str] = None
|
@@ -138,60 +140,62 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
138
140
|
|
139
141
|
client_kwargs: Optional[dict] = {}
|
140
142
|
"""Additional kwargs to pass to the httpx clients.
|
143
|
+
|
141
144
|
These arguments are passed to both synchronous and async clients.
|
142
|
-
|
145
|
+
|
146
|
+
Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
|
143
147
|
to synchronous and asynchronous clients.
|
148
|
+
|
144
149
|
"""
|
145
150
|
|
146
151
|
async_client_kwargs: Optional[dict] = {}
|
147
|
-
"""Additional kwargs to merge with client_kwargs before passing to the httpx
|
152
|
+
"""Additional kwargs to merge with ``client_kwargs`` before passing to the httpx
|
148
153
|
AsyncClient.
|
149
154
|
|
150
155
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
156
|
+
|
151
157
|
"""
|
152
158
|
|
153
159
|
sync_client_kwargs: Optional[dict] = {}
|
154
|
-
"""Additional kwargs to merge with client_kwargs before
|
160
|
+
"""Additional kwargs to merge with ``client_kwargs`` before
|
161
|
+
passing to the HTTPX Client.
|
155
162
|
|
156
163
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
164
|
+
|
157
165
|
"""
|
158
166
|
|
159
167
|
_client: Optional[Client] = PrivateAttr(default=None)
|
160
|
-
"""
|
161
|
-
The client to use for making requests.
|
162
|
-
"""
|
168
|
+
"""The client to use for making requests."""
|
163
169
|
|
164
170
|
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
165
|
-
"""
|
166
|
-
The async client to use for making requests.
|
167
|
-
"""
|
171
|
+
"""The async client to use for making requests."""
|
168
172
|
|
169
173
|
mirostat: Optional[int] = None
|
170
174
|
"""Enable Mirostat sampling for controlling perplexity.
|
171
|
-
(default: 0
|
175
|
+
(default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
|
172
176
|
|
173
177
|
mirostat_eta: Optional[float] = None
|
174
178
|
"""Influences how quickly the algorithm responds to feedback
|
175
179
|
from the generated text. A lower learning rate will result in
|
176
180
|
slower adjustments, while a higher learning rate will make
|
177
|
-
the algorithm more responsive. (Default: 0.1)"""
|
181
|
+
the algorithm more responsive. (Default: ``0.1``)"""
|
178
182
|
|
179
183
|
mirostat_tau: Optional[float] = None
|
180
184
|
"""Controls the balance between coherence and diversity
|
181
185
|
of the output. A lower value will result in more focused and
|
182
|
-
coherent text. (Default: 5.0)"""
|
186
|
+
coherent text. (Default: ``5.0``)"""
|
183
187
|
|
184
188
|
num_ctx: Optional[int] = None
|
185
189
|
"""Sets the size of the context window used to generate the
|
186
|
-
next token. (Default: 2048) """
|
190
|
+
next token. (Default: ``2048``) """
|
187
191
|
|
188
192
|
num_gpu: Optional[int] = None
|
189
|
-
"""The number of GPUs to use. On macOS it defaults to 1 to
|
190
|
-
enable metal support, 0 to disable."""
|
193
|
+
"""The number of GPUs to use. On macOS it defaults to ``1`` to
|
194
|
+
enable metal support, ``0`` to disable."""
|
191
195
|
|
192
196
|
keep_alive: Optional[int] = None
|
193
|
-
"""
|
194
|
-
following the request (default: 5m)
|
197
|
+
"""Controls how long the model will stay loaded into memory
|
198
|
+
following the request (default: ``5m``)
|
195
199
|
"""
|
196
200
|
|
197
201
|
num_thread: Optional[int] = None
|
@@ -202,34 +206,34 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
202
206
|
|
203
207
|
repeat_last_n: Optional[int] = None
|
204
208
|
"""Sets how far back for the model to look back to prevent
|
205
|
-
repetition. (Default: 64
|
209
|
+
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
|
206
210
|
|
207
211
|
repeat_penalty: Optional[float] = None
|
208
|
-
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
209
|
-
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
210
|
-
will be more lenient. (Default: 1.1)"""
|
212
|
+
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
|
213
|
+
will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
|
214
|
+
will be more lenient. (Default: ``1.1``)"""
|
211
215
|
|
212
216
|
temperature: Optional[float] = None
|
213
217
|
"""The temperature of the model. Increasing the temperature will
|
214
|
-
make the model answer more creatively. (Default: 0.8)"""
|
218
|
+
make the model answer more creatively. (Default: ``0.8``)"""
|
215
219
|
|
216
220
|
stop: Optional[list[str]] = None
|
217
221
|
"""Sets the stop tokens to use."""
|
218
222
|
|
219
223
|
tfs_z: Optional[float] = None
|
220
224
|
"""Tail free sampling is used to reduce the impact of less probable
|
221
|
-
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
222
|
-
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
225
|
+
tokens from the output. A higher value (e.g., ``2.0``) will reduce the
|
226
|
+
impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
|
223
227
|
|
224
228
|
top_k: Optional[int] = None
|
225
|
-
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
226
|
-
will give more diverse answers, while a lower value (e.g. 10)
|
227
|
-
will be more conservative. (Default: 40)"""
|
229
|
+
"""Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
|
230
|
+
will give more diverse answers, while a lower value (e.g. ``10``)
|
231
|
+
will be more conservative. (Default: ``40``)"""
|
228
232
|
|
229
233
|
top_p: Optional[float] = None
|
230
|
-
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
231
|
-
to more diverse text, while a lower value (e.g., 0.5) will
|
232
|
-
generate more focused and conservative text. (Default: 0.9)"""
|
234
|
+
"""Works together with top-k. A higher value (e.g., ``0.95``) will lead
|
235
|
+
to more diverse text, while a lower value (e.g., ``0.5``) will
|
236
|
+
generate more focused and conservative text. (Default: ``0.9``)"""
|
233
237
|
|
234
238
|
model_config = ConfigDict(
|
235
239
|
extra="forbid",
|
@@ -256,7 +260,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
256
260
|
|
257
261
|
@model_validator(mode="after")
|
258
262
|
def _set_clients(self) -> Self:
|
259
|
-
"""Set clients to use for
|
263
|
+
"""Set clients to use for Ollama."""
|
260
264
|
client_kwargs = self.client_kwargs or {}
|
261
265
|
|
262
266
|
sync_client_kwargs = client_kwargs
|