langchain-ollama 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_ollama/__init__.py +13 -1
- langchain_ollama/_utils.py +6 -3
- langchain_ollama/chat_models.py +154 -100
- langchain_ollama/embeddings.py +44 -37
- langchain_ollama/llms.py +37 -31
- {langchain_ollama-0.3.5.dist-info → langchain_ollama-0.3.7.dist-info}/METADATA +3 -3
- langchain_ollama-0.3.7.dist-info/RECORD +11 -0
- langchain_ollama-0.3.5.dist-info/RECORD +0 -11
- {langchain_ollama-0.3.5.dist-info → langchain_ollama-0.3.7.dist-info}/WHEEL +0 -0
- {langchain_ollama-0.3.5.dist-info → langchain_ollama-0.3.7.dist-info}/entry_points.txt +0 -0
- {langchain_ollama-0.3.5.dist-info → langchain_ollama-0.3.7.dist-info}/licenses/LICENSE +0 -0
langchain_ollama/__init__.py
CHANGED
@@ -1,6 +1,16 @@
|
|
1
1
|
"""This is the langchain_ollama package.
|
2
2
|
|
3
|
-
|
3
|
+
Provides infrastructure for interacting with the `Ollama <https://ollama.com/>`__
|
4
|
+
service.
|
5
|
+
|
6
|
+
.. note::
|
7
|
+
**Newly added in 0.3.4:** ``validate_model_on_init`` param on all models.
|
8
|
+
This parameter allows you to validate the model exists in Ollama locally on
|
9
|
+
initialization. If set to ``True``, it will raise an error if the model does not
|
10
|
+
exist locally. This is useful for ensuring that the model is available before
|
11
|
+
attempting to use it, especially in environments where models may not be
|
12
|
+
pre-downloaded.
|
13
|
+
|
4
14
|
"""
|
5
15
|
|
6
16
|
from importlib import metadata
|
@@ -10,6 +20,8 @@ from langchain_ollama.embeddings import OllamaEmbeddings
|
|
10
20
|
from langchain_ollama.llms import OllamaLLM
|
11
21
|
|
12
22
|
try:
|
23
|
+
if __package__ is None:
|
24
|
+
raise metadata.PackageNotFoundError
|
13
25
|
__version__ = metadata.version(__package__)
|
14
26
|
except metadata.PackageNotFoundError:
|
15
27
|
# Case where package metadata is not available.
|
langchain_ollama/_utils.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
"""Utility
|
1
|
+
"""Utility function to validate Ollama models."""
|
2
2
|
|
3
3
|
from httpx import ConnectError
|
4
4
|
from ollama import Client, ResponseError
|
5
5
|
|
6
6
|
|
7
7
|
def validate_model(client: Client, model_name: str) -> None:
|
8
|
-
"""Validate that a model exists in the Ollama instance.
|
8
|
+
"""Validate that a model exists in the local Ollama instance.
|
9
9
|
|
10
10
|
Args:
|
11
11
|
client: The Ollama client.
|
@@ -29,7 +29,10 @@ def validate_model(client: Client, model_name: str) -> None:
|
|
29
29
|
)
|
30
30
|
raise ValueError(msg)
|
31
31
|
except ConnectError as e:
|
32
|
-
msg =
|
32
|
+
msg = (
|
33
|
+
"Failed to connect to Ollama. Please check that Ollama is downloaded, "
|
34
|
+
"running and accessible. https://ollama.com/download"
|
35
|
+
)
|
33
36
|
raise ValueError(msg) from e
|
34
37
|
except ResponseError as e:
|
35
38
|
msg = (
|
langchain_ollama/chat_models.py
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
|
+
import ast
|
5
6
|
import json
|
7
|
+
import logging
|
6
8
|
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
|
7
9
|
from operator import itemgetter
|
8
10
|
from typing import (
|
@@ -57,6 +59,8 @@ from typing_extensions import Self, is_typeddict
|
|
57
59
|
|
58
60
|
from ._utils import validate_model
|
59
61
|
|
62
|
+
log = logging.getLogger(__name__)
|
63
|
+
|
60
64
|
|
61
65
|
def _get_usage_metadata_from_generation_info(
|
62
66
|
generation_info: Optional[Mapping[str, Any]],
|
@@ -77,33 +81,46 @@ def _get_usage_metadata_from_generation_info(
|
|
77
81
|
|
78
82
|
def _parse_json_string(
|
79
83
|
json_string: str,
|
84
|
+
*,
|
80
85
|
raw_tool_call: dict[str, Any],
|
81
|
-
skip: bool,
|
86
|
+
skip: bool,
|
82
87
|
) -> Any:
|
83
88
|
"""Attempt to parse a JSON string for tool calling.
|
84
89
|
|
90
|
+
It first tries to use the standard ``json.loads``. If that fails, it falls
|
91
|
+
back to ``ast.literal_eval`` to safely parse Python literals, which is more
|
92
|
+
robust against models using single quotes or containing apostrophes.
|
93
|
+
|
85
94
|
Args:
|
86
95
|
json_string: JSON string to parse.
|
87
|
-
skip: Whether to ignore parsing errors and return the value anyways.
|
88
96
|
raw_tool_call: Raw tool call to include in error message.
|
97
|
+
skip: Whether to ignore parsing errors and return the value anyways.
|
89
98
|
|
90
99
|
Returns:
|
91
|
-
The parsed JSON string.
|
100
|
+
The parsed JSON string or Python literal.
|
92
101
|
|
93
102
|
Raises:
|
94
|
-
OutputParserException: If the
|
103
|
+
OutputParserException: If the string is invalid and ``skip=False``.
|
104
|
+
|
95
105
|
"""
|
96
106
|
try:
|
97
107
|
return json.loads(json_string)
|
98
|
-
except json.JSONDecodeError
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
108
|
+
except json.JSONDecodeError:
|
109
|
+
try:
|
110
|
+
# Use ast.literal_eval to safely parse Python-style dicts
|
111
|
+
# (e.g. with single quotes)
|
112
|
+
return ast.literal_eval(json_string)
|
113
|
+
except (SyntaxError, ValueError) as e:
|
114
|
+
# If both fail, and we're not skipping, raise an informative error.
|
115
|
+
if skip:
|
116
|
+
return json_string
|
117
|
+
msg = (
|
118
|
+
f"Function {raw_tool_call['function']['name']} arguments:\n\n"
|
119
|
+
f"{raw_tool_call['function']['arguments']}"
|
120
|
+
"\n\nare not valid JSON or a Python literal. "
|
121
|
+
f"Received error: {e}"
|
122
|
+
)
|
123
|
+
raise OutputParserException(msg) from e
|
107
124
|
except TypeError as e:
|
108
125
|
if skip:
|
109
126
|
return json_string
|
@@ -122,14 +139,20 @@ def _parse_arguments_from_tool_call(
|
|
122
139
|
|
123
140
|
Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
|
124
141
|
Should be removed/changed if fixed upstream.
|
142
|
+
|
125
143
|
See https://github.com/ollama/ollama/issues/6155
|
144
|
+
|
126
145
|
"""
|
127
146
|
if "function" not in raw_tool_call:
|
128
147
|
return None
|
148
|
+
function_name = raw_tool_call["function"]["name"]
|
129
149
|
arguments = raw_tool_call["function"]["arguments"]
|
130
150
|
parsed_arguments: dict = {}
|
131
151
|
if isinstance(arguments, dict):
|
132
152
|
for key, value in arguments.items():
|
153
|
+
# Filter out metadata fields like 'functionName' that echo function name
|
154
|
+
if key == "functionName" and value == function_name:
|
155
|
+
continue
|
133
156
|
if isinstance(value, str):
|
134
157
|
parsed_value = _parse_json_string(
|
135
158
|
value, skip=True, raw_tool_call=raw_tool_call
|
@@ -206,7 +229,7 @@ class ChatOllama(BaseChatModel):
|
|
206
229
|
|
207
230
|
.. code-block:: bash
|
208
231
|
|
209
|
-
ollama pull
|
232
|
+
ollama pull gpt-oss:20b
|
210
233
|
pip install -U langchain-ollama
|
211
234
|
|
212
235
|
Key init args — completion params:
|
@@ -239,7 +262,8 @@ class ChatOllama(BaseChatModel):
|
|
239
262
|
from langchain_ollama import ChatOllama
|
240
263
|
|
241
264
|
llm = ChatOllama(
|
242
|
-
model = "
|
265
|
+
model = "gpt-oss:20b",
|
266
|
+
validate_model_on_init = True,
|
243
267
|
temperature = 0.8,
|
244
268
|
num_predict = 256,
|
245
269
|
# other params ...
|
@@ -261,10 +285,7 @@ class ChatOllama(BaseChatModel):
|
|
261
285
|
Stream:
|
262
286
|
.. code-block:: python
|
263
287
|
|
264
|
-
|
265
|
-
("human", "Return the words Hello World!"),
|
266
|
-
]
|
267
|
-
for chunk in llm.stream(messages):
|
288
|
+
for chunk in llm.stream("Return the words Hello World!"):
|
268
289
|
print(chunk.text(), end="")
|
269
290
|
|
270
291
|
|
@@ -291,10 +312,7 @@ class ChatOllama(BaseChatModel):
|
|
291
312
|
Async:
|
292
313
|
.. code-block:: python
|
293
314
|
|
294
|
-
|
295
|
-
("human", "Hello how are you!"),
|
296
|
-
]
|
297
|
-
await llm.ainvoke(messages)
|
315
|
+
await llm.ainvoke("Hello how are you!")
|
298
316
|
|
299
317
|
.. code-block:: python
|
300
318
|
|
@@ -302,10 +320,7 @@ class ChatOllama(BaseChatModel):
|
|
302
320
|
|
303
321
|
.. code-block:: python
|
304
322
|
|
305
|
-
|
306
|
-
("human", "Say hello world!"),
|
307
|
-
]
|
308
|
-
async for chunk in llm.astream(messages):
|
323
|
+
async for chunk in llm.astream("Say hello world!"):
|
309
324
|
print(chunk.content)
|
310
325
|
|
311
326
|
.. code-block:: python
|
@@ -333,10 +348,7 @@ class ChatOllama(BaseChatModel):
|
|
333
348
|
|
334
349
|
|
335
350
|
json_llm = ChatOllama(format="json")
|
336
|
-
|
337
|
-
("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."),
|
338
|
-
]
|
339
|
-
llm.invoke(messages).content
|
351
|
+
llm.invoke("Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only.").content
|
340
352
|
|
341
353
|
.. code-block:: python
|
342
354
|
|
@@ -383,17 +395,16 @@ class ChatOllama(BaseChatModel):
|
|
383
395
|
|
384
396
|
llm = ChatOllama(
|
385
397
|
model = "deepseek-r1:8b",
|
398
|
+
validate_model_on_init = True,
|
386
399
|
reasoning= True,
|
387
400
|
)
|
388
401
|
|
389
|
-
|
390
|
-
messages: List[Any] = [user_message]
|
391
|
-
llm.invoke(messages)
|
402
|
+
llm.invoke("how many r in the word strawberry?")
|
392
403
|
|
393
404
|
# or, on an invocation basis:
|
394
405
|
|
395
|
-
llm.invoke(
|
396
|
-
# or llm.stream(
|
406
|
+
llm.invoke("how many r in the word strawberry?", reasoning=True)
|
407
|
+
# or llm.stream("how many r in the word strawberry?", reasoning=True)
|
397
408
|
|
398
409
|
# If not provided, the invocation will default to the ChatOllama reasoning
|
399
410
|
# param provided (None by default).
|
@@ -402,13 +413,12 @@ class ChatOllama(BaseChatModel):
|
|
402
413
|
|
403
414
|
AIMessage(content='The word "strawberry" contains **three \'r\' letters**. Here\'s a breakdown for clarity:\n\n- The spelling of "strawberry" has two parts ... be 3.\n\nTo be thorough, let\'s confirm with an online source or common knowledge.\n\nI can recall that "strawberry" has: s-t-r-a-w-b-e-r-r-y — yes, three r\'s.\n\nPerhaps it\'s misspelled by some, but standard is correct.\n\nSo I think the response should be 3.\n'}, response_metadata={'model': 'deepseek-r1:8b', 'created_at': '2025-07-08T19:33:55.891269Z', 'done': True, 'done_reason': 'stop', 'total_duration': 98232561292, 'load_duration': 28036792, 'prompt_eval_count': 10, 'prompt_eval_duration': 40171834, 'eval_count': 3615, 'eval_duration': 98163832416, 'model_name': 'deepseek-r1:8b'}, id='run--18f8269f-6a35-4a7c-826d-b89d52c753b3-0', usage_metadata={'input_tokens': 10, 'output_tokens': 3615, 'total_tokens': 3625})
|
404
415
|
|
405
|
-
|
406
416
|
""" # noqa: E501, pylint: disable=line-too-long
|
407
417
|
|
408
418
|
model: str
|
409
419
|
"""Model name to use."""
|
410
420
|
|
411
|
-
reasoning: Optional[bool] = None
|
421
|
+
reasoning: Optional[Union[bool, str]] = None
|
412
422
|
"""Controls the reasoning/thinking mode for
|
413
423
|
`supported models <https://ollama.com/search?c=thinking>`__.
|
414
424
|
|
@@ -421,33 +431,42 @@ class ChatOllama(BaseChatModel):
|
|
421
431
|
- ``None`` (Default): The model will use its default reasoning behavior. Note
|
422
432
|
however, if the model's default behavior *is* to perform reasoning, think tags
|
423
433
|
()``<think>`` and ``</think>``) will be present within the main response content
|
424
|
-
unless you set ``reasoning`` to ``True``.
|
434
|
+
unless you set ``reasoning`` to ``True``.
|
435
|
+
- ``str``: e.g. ``'low'``, ``'medium'``, ``'high'``. Enables reasoning with a custom
|
436
|
+
intensity level. Currently, this is only supported ``gpt-oss``. See the
|
437
|
+
`Ollama docs <https://github.com/ollama/ollama-python/blob/da79e987f0ac0a4986bf396f043b36ef840370bc/ollama/_types.py#L210>`__
|
438
|
+
for more information.
|
439
|
+
|
440
|
+
"""
|
425
441
|
|
426
442
|
validate_model_on_init: bool = False
|
427
|
-
"""Whether to validate the model exists in Ollama locally on initialization.
|
443
|
+
"""Whether to validate the model exists in Ollama locally on initialization.
|
444
|
+
|
445
|
+
.. versionadded:: 0.3.4
|
446
|
+
"""
|
428
447
|
|
429
448
|
mirostat: Optional[int] = None
|
430
449
|
"""Enable Mirostat sampling for controlling perplexity.
|
431
|
-
(default: 0
|
450
|
+
(default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
|
432
451
|
|
433
452
|
mirostat_eta: Optional[float] = None
|
434
453
|
"""Influences how quickly the algorithm responds to feedback
|
435
454
|
from the generated text. A lower learning rate will result in
|
436
455
|
slower adjustments, while a higher learning rate will make
|
437
|
-
the algorithm more responsive. (Default: 0.1)"""
|
456
|
+
the algorithm more responsive. (Default: ``0.1``)"""
|
438
457
|
|
439
458
|
mirostat_tau: Optional[float] = None
|
440
459
|
"""Controls the balance between coherence and diversity
|
441
460
|
of the output. A lower value will result in more focused and
|
442
|
-
coherent text. (Default: 5.0)"""
|
461
|
+
coherent text. (Default: ``5.0``)"""
|
443
462
|
|
444
463
|
num_ctx: Optional[int] = None
|
445
464
|
"""Sets the size of the context window used to generate the
|
446
|
-
next token. (Default: 2048) """
|
465
|
+
next token. (Default: ``2048``) """
|
447
466
|
|
448
467
|
num_gpu: Optional[int] = None
|
449
|
-
"""The number of GPUs to use. On macOS it defaults to 1 to
|
450
|
-
enable metal support, 0 to disable."""
|
468
|
+
"""The number of GPUs to use. On macOS it defaults to ``1`` to
|
469
|
+
enable metal support, ``0`` to disable."""
|
451
470
|
|
452
471
|
num_thread: Optional[int] = None
|
453
472
|
"""Sets the number of threads to use during computation.
|
@@ -457,20 +476,20 @@ class ChatOllama(BaseChatModel):
|
|
457
476
|
|
458
477
|
num_predict: Optional[int] = None
|
459
478
|
"""Maximum number of tokens to predict when generating text.
|
460
|
-
(Default: 128
|
479
|
+
(Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
|
461
480
|
|
462
481
|
repeat_last_n: Optional[int] = None
|
463
482
|
"""Sets how far back for the model to look back to prevent
|
464
|
-
repetition. (Default: 64
|
483
|
+
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
|
465
484
|
|
466
485
|
repeat_penalty: Optional[float] = None
|
467
|
-
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
468
|
-
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
469
|
-
will be more lenient. (Default: 1.1)"""
|
486
|
+
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
|
487
|
+
will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
|
488
|
+
will be more lenient. (Default: ``1.1``)"""
|
470
489
|
|
471
490
|
temperature: Optional[float] = None
|
472
491
|
"""The temperature of the model. Increasing the temperature will
|
473
|
-
make the model answer more creatively. (Default: 0.8)"""
|
492
|
+
make the model answer more creatively. (Default: ``0.8``)"""
|
474
493
|
|
475
494
|
seed: Optional[int] = None
|
476
495
|
"""Sets the random number seed to use for generation. Setting this
|
@@ -482,21 +501,21 @@ class ChatOllama(BaseChatModel):
|
|
482
501
|
|
483
502
|
tfs_z: Optional[float] = None
|
484
503
|
"""Tail free sampling is used to reduce the impact of less probable
|
485
|
-
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
486
|
-
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
504
|
+
tokens from the output. A higher value (e.g., ``2.0``) will reduce the
|
505
|
+
impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
|
487
506
|
|
488
507
|
top_k: Optional[int] = None
|
489
|
-
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
490
|
-
will give more diverse answers, while a lower value (e.g. 10)
|
491
|
-
will be more conservative. (Default: 40)"""
|
508
|
+
"""Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
|
509
|
+
will give more diverse answers, while a lower value (e.g. ``10``)
|
510
|
+
will be more conservative. (Default: ``40``)"""
|
492
511
|
|
493
512
|
top_p: Optional[float] = None
|
494
|
-
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
495
|
-
to more diverse text, while a lower value (e.g., 0.5) will
|
496
|
-
generate more focused and conservative text. (Default: 0.9)"""
|
513
|
+
"""Works together with top-k. A higher value (e.g., ``0.95``) will lead
|
514
|
+
to more diverse text, while a lower value (e.g., ``0.5``) will
|
515
|
+
generate more focused and conservative text. (Default: ``0.9``)"""
|
497
516
|
|
498
517
|
format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
|
499
|
-
"""Specify the format of the output (options:
|
518
|
+
"""Specify the format of the output (options: ``'json'``, JSON schema)."""
|
500
519
|
|
501
520
|
keep_alive: Optional[Union[int, str]] = None
|
502
521
|
"""How long the model will stay loaded into memory."""
|
@@ -506,32 +525,35 @@ class ChatOllama(BaseChatModel):
|
|
506
525
|
|
507
526
|
client_kwargs: Optional[dict] = {}
|
508
527
|
"""Additional kwargs to pass to the httpx clients.
|
528
|
+
|
509
529
|
These arguments are passed to both synchronous and async clients.
|
510
|
-
|
530
|
+
|
531
|
+
Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
|
511
532
|
to synchronous and asynchronous clients.
|
533
|
+
|
512
534
|
"""
|
513
535
|
|
514
536
|
async_client_kwargs: Optional[dict] = {}
|
515
|
-
"""Additional kwargs to merge with client_kwargs before
|
537
|
+
"""Additional kwargs to merge with ``client_kwargs`` before
|
516
538
|
passing to the httpx AsyncClient.
|
539
|
+
|
517
540
|
`Full list of params. <https://www.python-httpx.org/api/#asyncclient>`__
|
541
|
+
|
518
542
|
"""
|
519
543
|
|
520
544
|
sync_client_kwargs: Optional[dict] = {}
|
521
|
-
"""Additional kwargs to merge with client_kwargs before
|
545
|
+
"""Additional kwargs to merge with ``client_kwargs`` before
|
522
546
|
passing to the httpx Client.
|
547
|
+
|
523
548
|
`Full list of params. <https://www.python-httpx.org/api/#client>`__
|
549
|
+
|
524
550
|
"""
|
525
551
|
|
526
552
|
_client: Client = PrivateAttr()
|
527
|
-
"""
|
528
|
-
The client to use for making requests.
|
529
|
-
"""
|
553
|
+
"""The client to use for making requests."""
|
530
554
|
|
531
555
|
_async_client: AsyncClient = PrivateAttr()
|
532
|
-
"""
|
533
|
-
The async client to use for making requests.
|
534
|
-
"""
|
556
|
+
"""The async client to use for making requests."""
|
535
557
|
|
536
558
|
def _chat_params(
|
537
559
|
self,
|
@@ -821,6 +843,28 @@ class ChatOllama(BaseChatModel):
|
|
821
843
|
reasoning = kwargs.get("reasoning", self.reasoning)
|
822
844
|
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
|
823
845
|
if not isinstance(stream_resp, str):
|
846
|
+
content = (
|
847
|
+
stream_resp["message"]["content"]
|
848
|
+
if "message" in stream_resp and "content" in stream_resp["message"]
|
849
|
+
else ""
|
850
|
+
)
|
851
|
+
|
852
|
+
# Warn and skip responses with done_reason: 'load' and empty content
|
853
|
+
# These indicate the model was loaded but no actual generation occurred
|
854
|
+
is_load_response_with_empty_content = (
|
855
|
+
stream_resp.get("done") is True
|
856
|
+
and stream_resp.get("done_reason") == "load"
|
857
|
+
and not content.strip()
|
858
|
+
)
|
859
|
+
|
860
|
+
if is_load_response_with_empty_content:
|
861
|
+
log.warning(
|
862
|
+
"Ollama returned empty response with done_reason='load'."
|
863
|
+
"This typically indicates the model was loaded but no content "
|
864
|
+
"was generated. Skipping this response."
|
865
|
+
)
|
866
|
+
continue
|
867
|
+
|
824
868
|
if stream_resp.get("done") is True:
|
825
869
|
generation_info = dict(stream_resp)
|
826
870
|
if "model" in generation_info:
|
@@ -829,12 +873,6 @@ class ChatOllama(BaseChatModel):
|
|
829
873
|
else:
|
830
874
|
generation_info = None
|
831
875
|
|
832
|
-
content = (
|
833
|
-
stream_resp["message"]["content"]
|
834
|
-
if "message" in stream_resp and "content" in stream_resp["message"]
|
835
|
-
else ""
|
836
|
-
)
|
837
|
-
|
838
876
|
additional_kwargs = {}
|
839
877
|
if (
|
840
878
|
reasoning
|
@@ -881,6 +919,28 @@ class ChatOllama(BaseChatModel):
|
|
881
919
|
reasoning = kwargs.get("reasoning", self.reasoning)
|
882
920
|
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
|
883
921
|
if not isinstance(stream_resp, str):
|
922
|
+
content = (
|
923
|
+
stream_resp["message"]["content"]
|
924
|
+
if "message" in stream_resp and "content" in stream_resp["message"]
|
925
|
+
else ""
|
926
|
+
)
|
927
|
+
|
928
|
+
# Warn and skip responses with done_reason: 'load' and empty content
|
929
|
+
# These indicate the model was loaded but no actual generation occurred
|
930
|
+
is_load_response_with_empty_content = (
|
931
|
+
stream_resp.get("done") is True
|
932
|
+
and stream_resp.get("done_reason") == "load"
|
933
|
+
and not content.strip()
|
934
|
+
)
|
935
|
+
|
936
|
+
if is_load_response_with_empty_content:
|
937
|
+
log.warning(
|
938
|
+
"Ollama returned empty response with done_reason='load'. "
|
939
|
+
"This typically indicates the model was loaded but no content "
|
940
|
+
"was generated. Skipping this response."
|
941
|
+
)
|
942
|
+
continue
|
943
|
+
|
884
944
|
if stream_resp.get("done") is True:
|
885
945
|
generation_info = dict(stream_resp)
|
886
946
|
if "model" in generation_info:
|
@@ -889,12 +949,6 @@ class ChatOllama(BaseChatModel):
|
|
889
949
|
else:
|
890
950
|
generation_info = None
|
891
951
|
|
892
|
-
content = (
|
893
|
-
stream_resp["message"]["content"]
|
894
|
-
if "message" in stream_resp and "content" in stream_resp["message"]
|
895
|
-
else ""
|
896
|
-
)
|
897
|
-
|
898
952
|
additional_kwargs = {}
|
899
953
|
if (
|
900
954
|
reasoning
|
@@ -993,8 +1047,7 @@ class ChatOllama(BaseChatModel):
|
|
993
1047
|
"""Model wrapper that returns outputs formatted to match the given schema.
|
994
1048
|
|
995
1049
|
Args:
|
996
|
-
schema:
|
997
|
-
The output schema. Can be passed in as:
|
1050
|
+
schema: The output schema. Can be passed in as:
|
998
1051
|
|
999
1052
|
- a Pydantic class,
|
1000
1053
|
- a JSON schema
|
@@ -1010,35 +1063,35 @@ class ChatOllama(BaseChatModel):
|
|
1010
1063
|
|
1011
1064
|
method: The method for steering model generation, one of:
|
1012
1065
|
|
1013
|
-
-
|
1066
|
+
- ``'json_schema'``:
|
1014
1067
|
Uses Ollama's `structured output API <https://ollama.com/blog/structured-outputs>`__
|
1015
|
-
-
|
1068
|
+
- ``'function_calling'``:
|
1016
1069
|
Uses Ollama's tool-calling API
|
1017
|
-
-
|
1018
|
-
Specifies ``format=
|
1070
|
+
- ``'json_mode'``:
|
1071
|
+
Specifies ``format='json'``. Note that if using JSON mode then you
|
1019
1072
|
must include instructions for formatting the output into the
|
1020
1073
|
desired schema into the model call.
|
1021
1074
|
|
1022
1075
|
include_raw:
|
1023
1076
|
If False then only the parsed structured output is returned. If
|
1024
1077
|
an error occurs during model output parsing it will be raised. If True
|
1025
|
-
then both the raw model response (a BaseMessage) and the parsed model
|
1078
|
+
then both the raw model response (a ``BaseMessage``) and the parsed model
|
1026
1079
|
response will be returned. If an error occurs during output parsing it
|
1027
1080
|
will be caught and returned as well. The final output is always a dict
|
1028
|
-
with keys
|
1081
|
+
with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
|
1029
1082
|
|
1030
1083
|
kwargs: Additional keyword args aren't supported.
|
1031
1084
|
|
1032
1085
|
Returns:
|
1033
1086
|
A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
|
1034
1087
|
|
1035
|
-
|
1088
|
+
If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
|
1036
1089
|
|
1037
|
-
|
1090
|
+
If ``include_raw`` is True, then Runnable outputs a dict with keys:
|
1038
1091
|
|
1039
|
-
-
|
1040
|
-
-
|
1041
|
-
-
|
1092
|
+
- ``'raw'``: ``BaseMessage``
|
1093
|
+
- ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
|
1094
|
+
- ``'parsing_error'``: Optional[BaseException]
|
1042
1095
|
|
1043
1096
|
.. versionchanged:: 0.2.2
|
1044
1097
|
|
@@ -1046,7 +1099,7 @@ class ChatOllama(BaseChatModel):
|
|
1046
1099
|
|
1047
1100
|
.. versionchanged:: 0.3.0
|
1048
1101
|
|
1049
|
-
Updated default ``method`` to ``
|
1102
|
+
Updated default ``method`` to ``'json_schema'``.
|
1050
1103
|
|
1051
1104
|
.. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False
|
1052
1105
|
|
@@ -1081,7 +1134,7 @@ class ChatOllama(BaseChatModel):
|
|
1081
1134
|
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
|
1082
1135
|
# )
|
1083
1136
|
|
1084
|
-
.. dropdown:: Example: schema=Pydantic class, method=
|
1137
|
+
.. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_schema'``, ``include_raw=True``
|
1085
1138
|
|
1086
1139
|
.. code-block:: python
|
1087
1140
|
|
@@ -1110,7 +1163,7 @@ class ChatOllama(BaseChatModel):
|
|
1110
1163
|
# 'parsing_error': None
|
1111
1164
|
# }
|
1112
1165
|
|
1113
|
-
.. dropdown:: Example: schema=Pydantic class, method=
|
1166
|
+
.. dropdown:: Example: ``schema=Pydantic`` class, ``method='function_calling'``, ``include_raw=False``
|
1114
1167
|
|
1115
1168
|
.. code-block:: python
|
1116
1169
|
|
@@ -1174,7 +1227,7 @@ class ChatOllama(BaseChatModel):
|
|
1174
1227
|
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
1175
1228
|
# }
|
1176
1229
|
|
1177
|
-
.. dropdown:: Example: schema=OpenAI function schema, method=
|
1230
|
+
.. dropdown:: Example: ``schema=OpenAI`` function schema, ``method='function_calling'``, ``include_raw=False``
|
1178
1231
|
|
1179
1232
|
.. code-block:: python
|
1180
1233
|
|
@@ -1204,7 +1257,7 @@ class ChatOllama(BaseChatModel):
|
|
1204
1257
|
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
|
1205
1258
|
# }
|
1206
1259
|
|
1207
|
-
.. dropdown:: Example: schema=Pydantic class, method=
|
1260
|
+
.. dropdown:: Example: ``schema=Pydantic`` class, ``method='json_mode'``, ``include_raw=True``
|
1208
1261
|
|
1209
1262
|
.. code-block::
|
1210
1263
|
|
@@ -1232,6 +1285,7 @@ class ChatOllama(BaseChatModel):
|
|
1232
1285
|
# 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
|
1233
1286
|
# 'parsing_error': None
|
1234
1287
|
# }
|
1288
|
+
|
1235
1289
|
""" # noqa: E501, D301
|
1236
1290
|
_ = kwargs.pop("strict", None)
|
1237
1291
|
if kwargs:
|
langchain_ollama/embeddings.py
CHANGED
@@ -21,12 +21,12 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
21
21
|
"""Ollama embedding model integration.
|
22
22
|
|
23
23
|
Set up a local Ollama instance:
|
24
|
-
Install the Ollama package and set up a
|
25
|
-
|
24
|
+
`Install the Ollama package <https://github.com/ollama/ollama>`__ and set up a
|
25
|
+
local Ollama instance.
|
26
26
|
|
27
27
|
You will need to choose a model to serve.
|
28
28
|
|
29
|
-
You can view a list of available models via the model library
|
29
|
+
You can view a list of available models via `the model library <https://ollama.com/library>`__.
|
30
30
|
|
31
31
|
To fetch a model from the Ollama model library use ``ollama pull <name-of-model>``.
|
32
32
|
|
@@ -39,8 +39,8 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
39
39
|
This will download the default tagged version of the model.
|
40
40
|
Typically, the default points to the latest, smallest sized-parameter model.
|
41
41
|
|
42
|
-
* On Mac, the models will be downloaded to
|
43
|
-
* On Linux (or WSL), the models will be stored at
|
42
|
+
* On Mac, the models will be downloaded to ``~/.ollama/models``
|
43
|
+
* On Linux (or WSL), the models will be stored at ``/usr/share/ollama/.ollama/models``
|
44
44
|
|
45
45
|
You can specify the exact version of the model of interest
|
46
46
|
as such ``ollama pull vicuna:13b-v1.5-16k-q4_0``.
|
@@ -122,73 +122,80 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
122
122
|
.. code-block:: python
|
123
123
|
|
124
124
|
[-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188]
|
125
|
+
|
125
126
|
""" # noqa: E501
|
126
127
|
|
127
128
|
model: str
|
128
129
|
"""Model name to use."""
|
129
130
|
|
130
131
|
validate_model_on_init: bool = False
|
131
|
-
"""Whether to validate the model exists in ollama locally on initialization.
|
132
|
+
"""Whether to validate the model exists in ollama locally on initialization.
|
133
|
+
|
134
|
+
.. versionadded:: 0.3.4
|
135
|
+
|
136
|
+
"""
|
132
137
|
|
133
138
|
base_url: Optional[str] = None
|
134
139
|
"""Base url the model is hosted under."""
|
135
140
|
|
136
141
|
client_kwargs: Optional[dict] = {}
|
137
142
|
"""Additional kwargs to pass to the httpx clients.
|
143
|
+
|
138
144
|
These arguments are passed to both synchronous and async clients.
|
139
|
-
|
145
|
+
|
146
|
+
Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
|
140
147
|
to synchronous and asynchronous clients.
|
148
|
+
|
141
149
|
"""
|
142
150
|
|
143
151
|
async_client_kwargs: Optional[dict] = {}
|
144
|
-
"""Additional kwargs to merge with client_kwargs before passing to the httpx
|
152
|
+
"""Additional kwargs to merge with ``client_kwargs`` before passing to the httpx
|
145
153
|
AsyncClient.
|
146
154
|
|
147
155
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
156
|
+
|
148
157
|
"""
|
149
158
|
|
150
159
|
sync_client_kwargs: Optional[dict] = {}
|
151
|
-
"""Additional kwargs to merge with client_kwargs before
|
160
|
+
"""Additional kwargs to merge with ``client_kwargs`` before
|
161
|
+
passing to the HTTPX Client.
|
152
162
|
|
153
163
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
164
|
+
|
154
165
|
"""
|
155
166
|
|
156
167
|
_client: Optional[Client] = PrivateAttr(default=None)
|
157
|
-
"""
|
158
|
-
The client to use for making requests.
|
159
|
-
"""
|
168
|
+
"""The client to use for making requests."""
|
160
169
|
|
161
170
|
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
162
|
-
"""
|
163
|
-
The async client to use for making requests.
|
164
|
-
"""
|
171
|
+
"""The async client to use for making requests."""
|
165
172
|
|
166
173
|
mirostat: Optional[int] = None
|
167
174
|
"""Enable Mirostat sampling for controlling perplexity.
|
168
|
-
(default: 0
|
175
|
+
(default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
|
169
176
|
|
170
177
|
mirostat_eta: Optional[float] = None
|
171
178
|
"""Influences how quickly the algorithm responds to feedback
|
172
179
|
from the generated text. A lower learning rate will result in
|
173
180
|
slower adjustments, while a higher learning rate will make
|
174
|
-
the algorithm more responsive. (Default: 0.1)"""
|
181
|
+
the algorithm more responsive. (Default: ``0.1``)"""
|
175
182
|
|
176
183
|
mirostat_tau: Optional[float] = None
|
177
184
|
"""Controls the balance between coherence and diversity
|
178
185
|
of the output. A lower value will result in more focused and
|
179
|
-
coherent text. (Default: 5.0)"""
|
186
|
+
coherent text. (Default: ``5.0``)"""
|
180
187
|
|
181
188
|
num_ctx: Optional[int] = None
|
182
189
|
"""Sets the size of the context window used to generate the
|
183
|
-
next token. (Default: 2048) """
|
190
|
+
next token. (Default: ``2048``) """
|
184
191
|
|
185
192
|
num_gpu: Optional[int] = None
|
186
|
-
"""The number of GPUs to use. On macOS it defaults to 1 to
|
187
|
-
enable metal support, 0 to disable."""
|
193
|
+
"""The number of GPUs to use. On macOS it defaults to ``1`` to
|
194
|
+
enable metal support, ``0`` to disable."""
|
188
195
|
|
189
196
|
keep_alive: Optional[int] = None
|
190
|
-
"""
|
191
|
-
following the request (default: 5m)
|
197
|
+
"""Controls how long the model will stay loaded into memory
|
198
|
+
following the request (default: ``5m``)
|
192
199
|
"""
|
193
200
|
|
194
201
|
num_thread: Optional[int] = None
|
@@ -199,34 +206,34 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
199
206
|
|
200
207
|
repeat_last_n: Optional[int] = None
|
201
208
|
"""Sets how far back for the model to look back to prevent
|
202
|
-
repetition. (Default: 64
|
209
|
+
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
|
203
210
|
|
204
211
|
repeat_penalty: Optional[float] = None
|
205
|
-
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
206
|
-
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
207
|
-
will be more lenient. (Default: 1.1)"""
|
212
|
+
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
|
213
|
+
will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
|
214
|
+
will be more lenient. (Default: ``1.1``)"""
|
208
215
|
|
209
216
|
temperature: Optional[float] = None
|
210
217
|
"""The temperature of the model. Increasing the temperature will
|
211
|
-
make the model answer more creatively. (Default: 0.8)"""
|
218
|
+
make the model answer more creatively. (Default: ``0.8``)"""
|
212
219
|
|
213
220
|
stop: Optional[list[str]] = None
|
214
221
|
"""Sets the stop tokens to use."""
|
215
222
|
|
216
223
|
tfs_z: Optional[float] = None
|
217
224
|
"""Tail free sampling is used to reduce the impact of less probable
|
218
|
-
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
219
|
-
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
225
|
+
tokens from the output. A higher value (e.g., ``2.0``) will reduce the
|
226
|
+
impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
|
220
227
|
|
221
228
|
top_k: Optional[int] = None
|
222
|
-
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
223
|
-
will give more diverse answers, while a lower value (e.g. 10)
|
224
|
-
will be more conservative. (Default: 40)"""
|
229
|
+
"""Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
|
230
|
+
will give more diverse answers, while a lower value (e.g. ``10``)
|
231
|
+
will be more conservative. (Default: ``40``)"""
|
225
232
|
|
226
233
|
top_p: Optional[float] = None
|
227
|
-
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
228
|
-
to more diverse text, while a lower value (e.g., 0.5) will
|
229
|
-
generate more focused and conservative text. (Default: 0.9)"""
|
234
|
+
"""Works together with top-k. A higher value (e.g., ``0.95``) will lead
|
235
|
+
to more diverse text, while a lower value (e.g., ``0.5``) will
|
236
|
+
generate more focused and conservative text. (Default: ``0.9``)"""
|
230
237
|
|
231
238
|
model_config = ConfigDict(
|
232
239
|
extra="forbid",
|
@@ -253,7 +260,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|
253
260
|
|
254
261
|
@model_validator(mode="after")
|
255
262
|
def _set_clients(self) -> Self:
|
256
|
-
"""Set clients to use for
|
263
|
+
"""Set clients to use for Ollama."""
|
257
264
|
client_kwargs = self.client_kwargs or {}
|
258
265
|
|
259
266
|
sync_client_kwargs = client_kwargs
|
langchain_ollama/llms.py
CHANGED
@@ -33,6 +33,7 @@ class OllamaLLM(BaseLLM):
|
|
33
33
|
|
34
34
|
model = OllamaLLM(model="llama3")
|
35
35
|
print(model.invoke("Come up with 10 names for a song about parrots"))
|
36
|
+
|
36
37
|
"""
|
37
38
|
|
38
39
|
model: str
|
@@ -53,30 +54,33 @@ class OllamaLLM(BaseLLM):
|
|
53
54
|
be present directly within the main response content."""
|
54
55
|
|
55
56
|
validate_model_on_init: bool = False
|
56
|
-
"""Whether to validate the model exists in ollama locally on initialization.
|
57
|
+
"""Whether to validate the model exists in ollama locally on initialization.
|
58
|
+
|
59
|
+
.. versionadded:: 0.3.4
|
60
|
+
"""
|
57
61
|
|
58
62
|
mirostat: Optional[int] = None
|
59
63
|
"""Enable Mirostat sampling for controlling perplexity.
|
60
|
-
(default: 0
|
64
|
+
(default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
|
61
65
|
|
62
66
|
mirostat_eta: Optional[float] = None
|
63
67
|
"""Influences how quickly the algorithm responds to feedback
|
64
68
|
from the generated text. A lower learning rate will result in
|
65
69
|
slower adjustments, while a higher learning rate will make
|
66
|
-
the algorithm more responsive. (Default: 0.1)"""
|
70
|
+
the algorithm more responsive. (Default: ``0.1``)"""
|
67
71
|
|
68
72
|
mirostat_tau: Optional[float] = None
|
69
73
|
"""Controls the balance between coherence and diversity
|
70
74
|
of the output. A lower value will result in more focused and
|
71
|
-
coherent text. (Default: 5.0)"""
|
75
|
+
coherent text. (Default: ``5.0``)"""
|
72
76
|
|
73
77
|
num_ctx: Optional[int] = None
|
74
78
|
"""Sets the size of the context window used to generate the
|
75
|
-
next token. (Default: 2048)"""
|
79
|
+
next token. (Default: ``2048``)"""
|
76
80
|
|
77
81
|
num_gpu: Optional[int] = None
|
78
|
-
"""The number of GPUs to use. On macOS it defaults to 1 to
|
79
|
-
enable metal support, 0 to disable."""
|
82
|
+
"""The number of GPUs to use. On macOS it defaults to ``1`` to
|
83
|
+
enable metal support, ``0`` to disable."""
|
80
84
|
|
81
85
|
num_thread: Optional[int] = None
|
82
86
|
"""Sets the number of threads to use during computation.
|
@@ -86,20 +90,20 @@ class OllamaLLM(BaseLLM):
|
|
86
90
|
|
87
91
|
num_predict: Optional[int] = None
|
88
92
|
"""Maximum number of tokens to predict when generating text.
|
89
|
-
(Default: 128
|
93
|
+
(Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
|
90
94
|
|
91
95
|
repeat_last_n: Optional[int] = None
|
92
96
|
"""Sets how far back for the model to look back to prevent
|
93
|
-
repetition. (Default: 64
|
97
|
+
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
|
94
98
|
|
95
99
|
repeat_penalty: Optional[float] = None
|
96
|
-
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
97
|
-
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
98
|
-
will be more lenient. (Default: 1.1)"""
|
100
|
+
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
|
101
|
+
will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
|
102
|
+
will be more lenient. (Default: ``1.1``)"""
|
99
103
|
|
100
104
|
temperature: Optional[float] = None
|
101
105
|
"""The temperature of the model. Increasing the temperature will
|
102
|
-
make the model answer more creatively. (Default: 0.8)"""
|
106
|
+
make the model answer more creatively. (Default: ``0.8``)"""
|
103
107
|
|
104
108
|
seed: Optional[int] = None
|
105
109
|
"""Sets the random number seed to use for generation. Setting this
|
@@ -111,21 +115,21 @@ class OllamaLLM(BaseLLM):
|
|
111
115
|
|
112
116
|
tfs_z: Optional[float] = None
|
113
117
|
"""Tail free sampling is used to reduce the impact of less probable
|
114
|
-
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
115
|
-
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
118
|
+
tokens from the output. A higher value (e.g., ``2.0``) will reduce the
|
119
|
+
impact more, while a value of 1.0 disables this setting. (default: ``1``)"""
|
116
120
|
|
117
121
|
top_k: Optional[int] = None
|
118
|
-
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
119
|
-
will give more diverse answers, while a lower value (e.g. 10)
|
120
|
-
will be more conservative. (Default: 40)"""
|
122
|
+
"""Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
|
123
|
+
will give more diverse answers, while a lower value (e.g. ``10``)
|
124
|
+
will be more conservative. (Default: ``40``)"""
|
121
125
|
|
122
126
|
top_p: Optional[float] = None
|
123
|
-
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
124
|
-
to more diverse text, while a lower value (e.g., 0.5) will
|
125
|
-
generate more focused and conservative text. (Default: 0.9)"""
|
127
|
+
"""Works together with top-k. A higher value (e.g., ``0.95``) will lead
|
128
|
+
to more diverse text, while a lower value (e.g., ``0.5``) will
|
129
|
+
generate more focused and conservative text. (Default: ``0.9``)"""
|
126
130
|
|
127
131
|
format: Literal["", "json"] = ""
|
128
|
-
"""Specify the format of the output (options: json)"""
|
132
|
+
"""Specify the format of the output (options: ``'json'``)"""
|
129
133
|
|
130
134
|
keep_alive: Optional[Union[int, str]] = None
|
131
135
|
"""How long the model will stay loaded into memory."""
|
@@ -135,33 +139,35 @@ class OllamaLLM(BaseLLM):
|
|
135
139
|
|
136
140
|
client_kwargs: Optional[dict] = {}
|
137
141
|
"""Additional kwargs to pass to the httpx clients.
|
142
|
+
|
138
143
|
These arguments are passed to both synchronous and async clients.
|
139
|
-
|
144
|
+
|
145
|
+
Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
|
140
146
|
to synchronous and asynchronous clients.
|
147
|
+
|
141
148
|
"""
|
142
149
|
|
143
150
|
async_client_kwargs: Optional[dict] = {}
|
144
|
-
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX
|
151
|
+
"""Additional kwargs to merge with ``client_kwargs`` before passing to the HTTPX
|
145
152
|
AsyncClient.
|
146
153
|
|
147
154
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
155
|
+
|
148
156
|
"""
|
149
157
|
|
150
158
|
sync_client_kwargs: Optional[dict] = {}
|
151
|
-
"""Additional kwargs to merge with client_kwargs before
|
159
|
+
"""Additional kwargs to merge with ``client_kwargs`` before
|
160
|
+
passing to the HTTPX Client.
|
152
161
|
|
153
162
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
163
|
+
|
154
164
|
"""
|
155
165
|
|
156
166
|
_client: Optional[Client] = PrivateAttr(default=None)
|
157
|
-
"""
|
158
|
-
The client to use for making requests.
|
159
|
-
"""
|
167
|
+
"""The client to use for making requests."""
|
160
168
|
|
161
169
|
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
162
|
-
"""
|
163
|
-
The async client to use for making requests.
|
164
|
-
"""
|
170
|
+
"""The async client to use for making requests."""
|
165
171
|
|
166
172
|
def _generate_params(
|
167
173
|
self,
|
@@ -1,14 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langchain-ollama
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.7
|
4
4
|
Summary: An integration package connecting Ollama and LangChain
|
5
5
|
License: MIT
|
6
6
|
Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
|
7
7
|
Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
|
8
8
|
Project-URL: repository, https://github.com/langchain-ai/langchain
|
9
9
|
Requires-Python: >=3.9
|
10
|
-
Requires-Dist: ollama<1.0.0,>=0.5.
|
11
|
-
Requires-Dist: langchain-core<1.0.0,>=0.3.
|
10
|
+
Requires-Dist: ollama<1.0.0,>=0.5.3
|
11
|
+
Requires-Dist: langchain-core<1.0.0,>=0.3.74
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
|
14
14
|
# langchain-ollama
|
@@ -0,0 +1,11 @@
|
|
1
|
+
langchain_ollama-0.3.7.dist-info/METADATA,sha256=JdFmlg4IUkM7Sy99773NdtIJKygkdJ6pYauIo29IWtQ,2072
|
2
|
+
langchain_ollama-0.3.7.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
3
|
+
langchain_ollama-0.3.7.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
4
|
+
langchain_ollama-0.3.7.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
|
5
|
+
langchain_ollama/__init__.py,sha256=yXoECBZPpZGcg6V80qJCrgB02jW4KcjICZ5soLel6gw,1162
|
6
|
+
langchain_ollama/_utils.py,sha256=amg8-DK0XaWe_aUO-ADFDUzb9EMJ-vMeyz2e1qrqAUo,1443
|
7
|
+
langchain_ollama/chat_models.py,sha256=xV5R4hlOAQOsRwb4WdfR4UPveI3-QChBaa3nGa4Ny3I,58314
|
8
|
+
langchain_ollama/embeddings.py,sha256=PxYDwvHbq-J6lTusSBIXtTeBnYSjG3R-a2wXFa52LXM,10455
|
9
|
+
langchain_ollama/llms.py,sha256=65NARxA7xKYmwugvw4SxfpDPzHKiGGa6jsX7BgR64oM,16936
|
10
|
+
langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
langchain_ollama-0.3.7.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
langchain_ollama-0.3.5.dist-info/METADATA,sha256=15Fpg-jcUr1QRiPMxng3GPbvRHmAapo1s6qykTZvHk0,2072
|
2
|
-
langchain_ollama-0.3.5.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
3
|
-
langchain_ollama-0.3.5.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
4
|
-
langchain_ollama-0.3.5.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
|
5
|
-
langchain_ollama/__init__.py,sha256=TI1gI0Wpg7mRXehGpxrJG2flF_t4Ev-aIJlLKV-CgL0,633
|
6
|
-
langchain_ollama/_utils.py,sha256=dmFO4tSvDTeMALc89QnTBLNWPMZL0eNAq1EDwuMjRA8,1416
|
7
|
-
langchain_ollama/chat_models.py,sha256=olz3KJeLG1vk47Xl38nN9bP4bcol5cBQnPnu5MyP8k8,55539
|
8
|
-
langchain_ollama/embeddings.py,sha256=walU1vZq_YamLLPDSJLbMtOu6jFbiNnhJ5ni2sybCRs,10318
|
9
|
-
langchain_ollama/llms.py,sha256=PSJ-VQMocp1nm-pgtnKnozidt66RKJiEnhdzftoLNNc,16778
|
10
|
-
langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
-
langchain_ollama-0.3.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|