langchain-ollama 0.1.0rc0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_ollama-0.1.0rc0/LICENSE +21 -0
- langchain_ollama-0.1.0rc0/PKG-INFO +64 -0
- langchain_ollama-0.1.0rc0/README.md +44 -0
- langchain_ollama-0.1.0rc0/langchain_ollama/__init__.py +19 -0
- langchain_ollama-0.1.0rc0/langchain_ollama/chat_models.py +693 -0
- langchain_ollama-0.1.0rc0/langchain_ollama/embeddings.py +51 -0
- langchain_ollama-0.1.0rc0/langchain_ollama/llms.py +343 -0
- langchain_ollama-0.1.0rc0/langchain_ollama/py.typed +0 -0
- langchain_ollama-0.1.0rc0/pyproject.toml +90 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 LangChain, Inc.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,64 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: langchain-ollama
|
3
|
+
Version: 0.1.0rc0
|
4
|
+
Summary: An integration package connecting Ollama and LangChain
|
5
|
+
Home-page: https://github.com/langchain-ai/langchain
|
6
|
+
License: MIT
|
7
|
+
Requires-Python: >=3.8.1,<4.0
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
14
|
+
Requires-Dist: langchain-core (>=0.2.20,<0.3.0)
|
15
|
+
Requires-Dist: ollama (>=0.2.1,<1)
|
16
|
+
Project-URL: Repository, https://github.com/langchain-ai/langchain
|
17
|
+
Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
|
18
|
+
Description-Content-Type: text/markdown
|
19
|
+
|
20
|
+
# langchain-ollama
|
21
|
+
|
22
|
+
This package contains the LangChain integration with Ollama
|
23
|
+
|
24
|
+
## Installation
|
25
|
+
|
26
|
+
```bash
|
27
|
+
pip install -U langchain-ollama
|
28
|
+
```
|
29
|
+
|
30
|
+
You will also need to run the Ollama server locally.
|
31
|
+
You can download it [here](https://ollama.com/download).
|
32
|
+
|
33
|
+
## Chat Models
|
34
|
+
|
35
|
+
`ChatOllama` class exposes chat models from Ollama.
|
36
|
+
|
37
|
+
```python
|
38
|
+
from langchain_ollama import ChatOllama
|
39
|
+
|
40
|
+
llm = ChatOllama(model="llama3")
|
41
|
+
llm.invoke("Sing a ballad of LangChain.")
|
42
|
+
```
|
43
|
+
|
44
|
+
## Embeddings
|
45
|
+
|
46
|
+
`OllamaEmbeddings` class exposes embeddings from Ollama.
|
47
|
+
|
48
|
+
```python
|
49
|
+
from langchain_ollama import OllamaEmbeddings
|
50
|
+
|
51
|
+
embeddings = OllamaEmbeddings(model="llama3")
|
52
|
+
embeddings.embed_query("What is the meaning of life?")
|
53
|
+
```
|
54
|
+
|
55
|
+
## LLMs
|
56
|
+
`OllamaLLM` class exposes LLMs from Ollama.
|
57
|
+
|
58
|
+
```python
|
59
|
+
from langchain_ollama import OllamaLLM
|
60
|
+
|
61
|
+
llm = OllamaLLM()
|
62
|
+
llm.invoke("The meaning of life is")
|
63
|
+
```
|
64
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# langchain-ollama
|
2
|
+
|
3
|
+
This package contains the LangChain integration with Ollama
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
pip install -U langchain-ollama
|
9
|
+
```
|
10
|
+
|
11
|
+
You will also need to run the Ollama server locally.
|
12
|
+
You can download it [here](https://ollama.com/download).
|
13
|
+
|
14
|
+
## Chat Models
|
15
|
+
|
16
|
+
`ChatOllama` class exposes chat models from Ollama.
|
17
|
+
|
18
|
+
```python
|
19
|
+
from langchain_ollama import ChatOllama
|
20
|
+
|
21
|
+
llm = ChatOllama(model="llama3")
|
22
|
+
llm.invoke("Sing a ballad of LangChain.")
|
23
|
+
```
|
24
|
+
|
25
|
+
## Embeddings
|
26
|
+
|
27
|
+
`OllamaEmbeddings` class exposes embeddings from Ollama.
|
28
|
+
|
29
|
+
```python
|
30
|
+
from langchain_ollama import OllamaEmbeddings
|
31
|
+
|
32
|
+
embeddings = OllamaEmbeddings(model="llama3")
|
33
|
+
embeddings.embed_query("What is the meaning of life?")
|
34
|
+
```
|
35
|
+
|
36
|
+
## LLMs
|
37
|
+
`OllamaLLM` class exposes LLMs from Ollama.
|
38
|
+
|
39
|
+
```python
|
40
|
+
from langchain_ollama import OllamaLLM
|
41
|
+
|
42
|
+
llm = OllamaLLM()
|
43
|
+
llm.invoke("The meaning of life is")
|
44
|
+
```
|
@@ -0,0 +1,19 @@
|
|
1
|
+
from importlib import metadata
|
2
|
+
|
3
|
+
from langchain_ollama.chat_models import ChatOllama
|
4
|
+
from langchain_ollama.embeddings import OllamaEmbeddings
|
5
|
+
from langchain_ollama.llms import OllamaLLM
|
6
|
+
|
7
|
+
try:
|
8
|
+
__version__ = metadata.version(__package__)
|
9
|
+
except metadata.PackageNotFoundError:
|
10
|
+
# Case where package metadata is not available.
|
11
|
+
__version__ = ""
|
12
|
+
del metadata # optional, avoids polluting the results of dir(__package__)
|
13
|
+
|
14
|
+
__all__ = [
|
15
|
+
"ChatOllama",
|
16
|
+
"OllamaLLM",
|
17
|
+
"OllamaEmbeddings",
|
18
|
+
"__version__",
|
19
|
+
]
|
@@ -0,0 +1,693 @@
|
|
1
|
+
"""Ollama chat models."""
|
2
|
+
|
3
|
+
from typing import (
|
4
|
+
Any,
|
5
|
+
AsyncIterator,
|
6
|
+
Callable,
|
7
|
+
Dict,
|
8
|
+
Iterator,
|
9
|
+
List,
|
10
|
+
Literal,
|
11
|
+
Mapping,
|
12
|
+
Optional,
|
13
|
+
Sequence,
|
14
|
+
Type,
|
15
|
+
Union,
|
16
|
+
cast,
|
17
|
+
)
|
18
|
+
from uuid import uuid4
|
19
|
+
|
20
|
+
import ollama
|
21
|
+
from langchain_core.callbacks import (
|
22
|
+
CallbackManagerForLLMRun,
|
23
|
+
)
|
24
|
+
from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun
|
25
|
+
from langchain_core.language_models import LanguageModelInput
|
26
|
+
from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
|
27
|
+
from langchain_core.messages import (
|
28
|
+
AIMessage,
|
29
|
+
AIMessageChunk,
|
30
|
+
BaseMessage,
|
31
|
+
HumanMessage,
|
32
|
+
SystemMessage,
|
33
|
+
ToolCall,
|
34
|
+
ToolMessage,
|
35
|
+
)
|
36
|
+
from langchain_core.messages.ai import UsageMetadata
|
37
|
+
from langchain_core.messages.tool import tool_call
|
38
|
+
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
|
39
|
+
from langchain_core.pydantic_v1 import BaseModel
|
40
|
+
from langchain_core.runnables import Runnable
|
41
|
+
from langchain_core.tools import BaseTool
|
42
|
+
from langchain_core.utils.function_calling import convert_to_openai_tool
|
43
|
+
from ollama import AsyncClient, Message, Options
|
44
|
+
|
45
|
+
|
46
|
+
def _get_usage_metadata_from_generation_info(
|
47
|
+
generation_info: Optional[Mapping[str, Any]],
|
48
|
+
) -> Optional[UsageMetadata]:
|
49
|
+
"""Get usage metadata from ollama generation info mapping."""
|
50
|
+
if generation_info is None:
|
51
|
+
return None
|
52
|
+
input_tokens: Optional[int] = generation_info.get("prompt_eval_count")
|
53
|
+
output_tokens: Optional[int] = generation_info.get("eval_count")
|
54
|
+
if input_tokens is not None and output_tokens is not None:
|
55
|
+
return UsageMetadata(
|
56
|
+
input_tokens=input_tokens,
|
57
|
+
output_tokens=output_tokens,
|
58
|
+
total_tokens=input_tokens + output_tokens,
|
59
|
+
)
|
60
|
+
return None
|
61
|
+
|
62
|
+
|
63
|
+
def _get_tool_calls_from_response(
|
64
|
+
response: Mapping[str, Any],
|
65
|
+
) -> List[ToolCall]:
|
66
|
+
"""Get tool calls from ollama response."""
|
67
|
+
tool_calls = []
|
68
|
+
if "message" in response:
|
69
|
+
if "tool_calls" in response["message"]:
|
70
|
+
for tc in response["message"]["tool_calls"]:
|
71
|
+
tool_calls.append(
|
72
|
+
tool_call(
|
73
|
+
id=str(uuid4()),
|
74
|
+
name=tc["function"]["name"],
|
75
|
+
args=tc["function"]["arguments"],
|
76
|
+
)
|
77
|
+
)
|
78
|
+
return tool_calls
|
79
|
+
|
80
|
+
|
81
|
+
def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
|
82
|
+
return {
|
83
|
+
"type": "function",
|
84
|
+
"id": tool_call["id"],
|
85
|
+
"function": {
|
86
|
+
"name": tool_call["name"],
|
87
|
+
"arguments": tool_call["args"],
|
88
|
+
},
|
89
|
+
}
|
90
|
+
|
91
|
+
|
92
|
+
class ChatOllama(BaseChatModel):
|
93
|
+
"""Ollama chat model integration.
|
94
|
+
|
95
|
+
Setup:
|
96
|
+
Install ``langchain-ollama`` and download any models you want to use from ollama.
|
97
|
+
|
98
|
+
.. code-block:: bash
|
99
|
+
|
100
|
+
ollama pull mistral:v0.3
|
101
|
+
pip install -U langchain-ollama
|
102
|
+
|
103
|
+
Key init args — completion params:
|
104
|
+
model: str
|
105
|
+
Name of Ollama model to use.
|
106
|
+
temperature: float
|
107
|
+
Sampling temperature. Ranges from 0.0 to 1.0.
|
108
|
+
num_predict: Optional[int]
|
109
|
+
Max number of tokens to generate.
|
110
|
+
|
111
|
+
See full list of supported init args and their descriptions in the params section.
|
112
|
+
|
113
|
+
Instantiate:
|
114
|
+
.. code-block:: python
|
115
|
+
|
116
|
+
from langchain_ollama import ChatOllama
|
117
|
+
|
118
|
+
llm = ChatOllama(
|
119
|
+
model = "llama3",
|
120
|
+
temperature = 0.8,
|
121
|
+
num_predict = 256,
|
122
|
+
# other params ...
|
123
|
+
)
|
124
|
+
|
125
|
+
Invoke:
|
126
|
+
.. code-block:: python
|
127
|
+
|
128
|
+
messages = [
|
129
|
+
("system", "You are a helpful translator. Translate the user sentence to French."),
|
130
|
+
("human", "I love programming."),
|
131
|
+
]
|
132
|
+
llm.invoke(messages)
|
133
|
+
|
134
|
+
.. code-block:: python
|
135
|
+
|
136
|
+
AIMessage(content='J'adore le programmation. (Note: "programming" can also refer to the act of writing code, so if you meant that, I could translate it as "J'adore programmer". But since you didn\'t specify, I assumed you were talking about the activity itself, which is what "le programmation" usually refers to.)', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:37:50.182604Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 3576619666, 'load_duration': 788524916, 'prompt_eval_count': 32, 'prompt_eval_duration': 128125000, 'eval_count': 71, 'eval_duration': 2656556000}, id='run-ba48f958-6402-41a5-b461-5e250a4ebd36-0')
|
137
|
+
|
138
|
+
Stream:
|
139
|
+
.. code-block:: python
|
140
|
+
|
141
|
+
messages = [
|
142
|
+
("human", "Return the words Hello World!"),
|
143
|
+
]
|
144
|
+
for chunk in llm.stream(messages):
|
145
|
+
print(chunk)
|
146
|
+
|
147
|
+
|
148
|
+
.. code-block:: python
|
149
|
+
|
150
|
+
content='Hello' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
|
151
|
+
content=' World' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
|
152
|
+
content='!' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
|
153
|
+
content='' response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:39:42.274449Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 411875125, 'load_duration': 1898166, 'prompt_eval_count': 14, 'prompt_eval_duration': 297320000, 'eval_count': 4, 'eval_duration': 111099000} id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
|
154
|
+
|
155
|
+
|
156
|
+
.. code-block:: python
|
157
|
+
|
158
|
+
stream = llm.stream(messages)
|
159
|
+
full = next(stream)
|
160
|
+
for chunk in stream:
|
161
|
+
full += chunk
|
162
|
+
full
|
163
|
+
|
164
|
+
.. code-block:: python
|
165
|
+
|
166
|
+
AIMessageChunk(content='Je adore le programmation.(Note: "programmation" is the formal way to say "programming" in French, but informally, people might use the phrase "le développement logiciel" or simply "le code")', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:38:54.933154Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1977300042, 'load_duration': 1345709, 'prompt_eval_duration': 159343000, 'eval_count': 47, 'eval_duration': 1815123000}, id='run-3c81a3ed-3e79-4dd3-a796-04064d804890')
|
167
|
+
|
168
|
+
Async:
|
169
|
+
.. code-block:: python
|
170
|
+
|
171
|
+
messages = [
|
172
|
+
("human", "Hello how are you!"),
|
173
|
+
]
|
174
|
+
await llm.ainvoke(messages)
|
175
|
+
|
176
|
+
.. code-block:: python
|
177
|
+
|
178
|
+
AIMessage(content="Hi there! I'm just an AI, so I don't have feelings or emotions like humans do. But I'm functioning properly and ready to help with any questions or tasks you may have! How can I assist you today?", response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:52:08.165478Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 2138492875, 'load_duration': 1364000, 'prompt_eval_count': 10, 'prompt_eval_duration': 297081000, 'eval_count': 47, 'eval_duration': 1838524000}, id='run-29c510ae-49a4-4cdd-8f23-b972bfab1c49-0')
|
179
|
+
|
180
|
+
.. code-block:: python
|
181
|
+
|
182
|
+
messages = [
|
183
|
+
("human", "Say hello world!"),
|
184
|
+
]
|
185
|
+
async for chunk in llm.astream(messages):
|
186
|
+
print(chunk.content)
|
187
|
+
|
188
|
+
.. code-block:: python
|
189
|
+
|
190
|
+
HEL
|
191
|
+
LO
|
192
|
+
WORLD
|
193
|
+
!
|
194
|
+
|
195
|
+
.. code-block:: python
|
196
|
+
|
197
|
+
messages = [
|
198
|
+
("human", "Say hello world!"),
|
199
|
+
("human","Say goodbye world!")
|
200
|
+
]
|
201
|
+
await llm.abatch(messages)
|
202
|
+
|
203
|
+
.. code-block:: python
|
204
|
+
|
205
|
+
[AIMessage(content='HELLO, WORLD!', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:55:07.315396Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1696745458, 'load_duration': 1505000, 'prompt_eval_count': 8, 'prompt_eval_duration': 111627000, 'eval_count': 6, 'eval_duration': 185181000}, id='run-da6c7562-e25a-4a44-987a-2c83cd8c2686-0'),
|
206
|
+
AIMessage(content="It's been a blast chatting with you! Say goodbye to the world for me, and don't forget to come back and visit us again soon!", response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:55:07.018076Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1399391083, 'load_duration': 1187417, 'prompt_eval_count': 20, 'prompt_eval_duration': 230349000, 'eval_count': 31, 'eval_duration': 1166047000}, id='run-96cad530-6f3e-4cf9-86b4-e0f8abba4cdb-0')]
|
207
|
+
|
208
|
+
JSON mode:
|
209
|
+
.. code-block:: python
|
210
|
+
|
211
|
+
|
212
|
+
json_llm = ChatOllama(format="json")
|
213
|
+
messages = [
|
214
|
+
("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."),
|
215
|
+
]
|
216
|
+
llm.invoke(messages).content
|
217
|
+
|
218
|
+
.. code-block:: python
|
219
|
+
|
220
|
+
'{"location": "Pune, India", "time_of_day": "morning"}'
|
221
|
+
""" # noqa: E501
|
222
|
+
|
223
|
+
model: str = "llama2"
|
224
|
+
"""Model name to use."""
|
225
|
+
|
226
|
+
mirostat: Optional[int] = None
|
227
|
+
"""Enable Mirostat sampling for controlling perplexity.
|
228
|
+
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
|
229
|
+
|
230
|
+
mirostat_eta: Optional[float] = None
|
231
|
+
"""Influences how quickly the algorithm responds to feedback
|
232
|
+
from the generated text. A lower learning rate will result in
|
233
|
+
slower adjustments, while a higher learning rate will make
|
234
|
+
the algorithm more responsive. (Default: 0.1)"""
|
235
|
+
|
236
|
+
mirostat_tau: Optional[float] = None
|
237
|
+
"""Controls the balance between coherence and diversity
|
238
|
+
of the output. A lower value will result in more focused and
|
239
|
+
coherent text. (Default: 5.0)"""
|
240
|
+
|
241
|
+
num_ctx: Optional[int] = None
|
242
|
+
"""Sets the size of the context window used to generate the
|
243
|
+
next token. (Default: 2048) """
|
244
|
+
|
245
|
+
num_gpu: Optional[int] = None
|
246
|
+
"""The number of GPUs to use. On macOS it defaults to 1 to
|
247
|
+
enable metal support, 0 to disable."""
|
248
|
+
|
249
|
+
num_thread: Optional[int] = None
|
250
|
+
"""Sets the number of threads to use during computation.
|
251
|
+
By default, Ollama will detect this for optimal performance.
|
252
|
+
It is recommended to set this value to the number of physical
|
253
|
+
CPU cores your system has (as opposed to the logical number of cores)."""
|
254
|
+
|
255
|
+
num_predict: Optional[int] = None
|
256
|
+
"""Maximum number of tokens to predict when generating text.
|
257
|
+
(Default: 128, -1 = infinite generation, -2 = fill context)"""
|
258
|
+
|
259
|
+
repeat_last_n: Optional[int] = None
|
260
|
+
"""Sets how far back for the model to look back to prevent
|
261
|
+
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
|
262
|
+
|
263
|
+
repeat_penalty: Optional[float] = None
|
264
|
+
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
265
|
+
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
266
|
+
will be more lenient. (Default: 1.1)"""
|
267
|
+
|
268
|
+
temperature: Optional[float] = None
|
269
|
+
"""The temperature of the model. Increasing the temperature will
|
270
|
+
make the model answer more creatively. (Default: 0.8)"""
|
271
|
+
|
272
|
+
stop: Optional[List[str]] = None
|
273
|
+
"""Sets the stop tokens to use."""
|
274
|
+
|
275
|
+
tfs_z: Optional[float] = None
|
276
|
+
"""Tail free sampling is used to reduce the impact of less probable
|
277
|
+
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
278
|
+
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
279
|
+
|
280
|
+
top_k: Optional[int] = None
|
281
|
+
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
282
|
+
will give more diverse answers, while a lower value (e.g. 10)
|
283
|
+
will be more conservative. (Default: 40)"""
|
284
|
+
|
285
|
+
top_p: Optional[float] = None
|
286
|
+
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
287
|
+
to more diverse text, while a lower value (e.g., 0.5) will
|
288
|
+
generate more focused and conservative text. (Default: 0.9)"""
|
289
|
+
|
290
|
+
format: Literal["", "json"] = ""
|
291
|
+
"""Specify the format of the output (options: json)"""
|
292
|
+
|
293
|
+
keep_alive: Optional[Union[int, str]] = None
|
294
|
+
"""How long the model will stay loaded into memory."""
|
295
|
+
|
296
|
+
@property
|
297
|
+
def _default_params(self) -> Dict[str, Any]:
|
298
|
+
"""Get the default parameters for calling Ollama."""
|
299
|
+
return {
|
300
|
+
"model": self.model,
|
301
|
+
"format": self.format,
|
302
|
+
"options": {
|
303
|
+
"mirostat": self.mirostat,
|
304
|
+
"mirostat_eta": self.mirostat_eta,
|
305
|
+
"mirostat_tau": self.mirostat_tau,
|
306
|
+
"num_ctx": self.num_ctx,
|
307
|
+
"num_gpu": self.num_gpu,
|
308
|
+
"num_thread": self.num_thread,
|
309
|
+
"num_predict": self.num_predict,
|
310
|
+
"repeat_last_n": self.repeat_last_n,
|
311
|
+
"repeat_penalty": self.repeat_penalty,
|
312
|
+
"temperature": self.temperature,
|
313
|
+
"stop": self.stop,
|
314
|
+
"tfs_z": self.tfs_z,
|
315
|
+
"top_k": self.top_k,
|
316
|
+
"top_p": self.top_p,
|
317
|
+
},
|
318
|
+
"keep_alive": self.keep_alive,
|
319
|
+
}
|
320
|
+
|
321
|
+
def _convert_messages_to_ollama_messages(
|
322
|
+
self, messages: List[BaseMessage]
|
323
|
+
) -> Sequence[Message]:
|
324
|
+
ollama_messages: List = []
|
325
|
+
for message in messages:
|
326
|
+
role = ""
|
327
|
+
tool_call_id: Optional[str] = None
|
328
|
+
tool_calls: Optional[List[Dict[str, Any]]] = None
|
329
|
+
if isinstance(message, HumanMessage):
|
330
|
+
role = "user"
|
331
|
+
elif isinstance(message, AIMessage):
|
332
|
+
role = "assistant"
|
333
|
+
tool_calls = (
|
334
|
+
[
|
335
|
+
_lc_tool_call_to_openai_tool_call(tool_call)
|
336
|
+
for tool_call in message.tool_calls
|
337
|
+
]
|
338
|
+
if message.tool_calls
|
339
|
+
else None
|
340
|
+
)
|
341
|
+
elif isinstance(message, SystemMessage):
|
342
|
+
role = "system"
|
343
|
+
elif isinstance(message, ToolMessage):
|
344
|
+
role = "tool"
|
345
|
+
tool_call_id = message.tool_call_id
|
346
|
+
else:
|
347
|
+
raise ValueError("Received unsupported message type for Ollama.")
|
348
|
+
|
349
|
+
content = ""
|
350
|
+
images = []
|
351
|
+
if isinstance(message.content, str):
|
352
|
+
content = message.content
|
353
|
+
else:
|
354
|
+
for content_part in cast(List[Dict], message.content):
|
355
|
+
if content_part.get("type") == "text":
|
356
|
+
content += f"\n{content_part['text']}"
|
357
|
+
elif content_part.get("type") == "tool_use":
|
358
|
+
continue
|
359
|
+
elif content_part.get("type") == "image_url":
|
360
|
+
image_url = None
|
361
|
+
temp_image_url = content_part.get("image_url")
|
362
|
+
if isinstance(temp_image_url, str):
|
363
|
+
image_url = content_part["image_url"]
|
364
|
+
elif (
|
365
|
+
isinstance(temp_image_url, dict) and "url" in temp_image_url
|
366
|
+
):
|
367
|
+
image_url = temp_image_url
|
368
|
+
else:
|
369
|
+
raise ValueError(
|
370
|
+
"Only string image_url or dict with string 'url' "
|
371
|
+
"inside content parts are supported."
|
372
|
+
)
|
373
|
+
|
374
|
+
image_url_components = image_url.split(",")
|
375
|
+
# Support data:image/jpeg;base64,<image> format
|
376
|
+
# and base64 strings
|
377
|
+
if len(image_url_components) > 1:
|
378
|
+
images.append(image_url_components[1])
|
379
|
+
else:
|
380
|
+
images.append(image_url_components[0])
|
381
|
+
|
382
|
+
else:
|
383
|
+
raise ValueError(
|
384
|
+
"Unsupported message content type. "
|
385
|
+
"Must either have type 'text' or type 'image_url' "
|
386
|
+
"with a string 'image_url' field."
|
387
|
+
)
|
388
|
+
msg = {
|
389
|
+
"role": role,
|
390
|
+
"content": content,
|
391
|
+
"images": images,
|
392
|
+
}
|
393
|
+
if tool_call_id:
|
394
|
+
msg["tool_call_id"] = tool_call_id
|
395
|
+
if tool_calls:
|
396
|
+
msg["tool_calls"] = tool_calls
|
397
|
+
ollama_messages.append(msg)
|
398
|
+
|
399
|
+
return ollama_messages
|
400
|
+
|
401
|
+
async def _acreate_chat_stream(
|
402
|
+
self,
|
403
|
+
messages: List[BaseMessage],
|
404
|
+
stop: Optional[List[str]] = None,
|
405
|
+
**kwargs: Any,
|
406
|
+
) -> AsyncIterator[Union[Mapping[str, Any], str]]:
|
407
|
+
ollama_messages = self._convert_messages_to_ollama_messages(messages)
|
408
|
+
|
409
|
+
stop = stop if stop is not None else self.stop
|
410
|
+
|
411
|
+
params = self._default_params
|
412
|
+
|
413
|
+
for key in self._default_params:
|
414
|
+
if key in kwargs:
|
415
|
+
params[key] = kwargs[key]
|
416
|
+
|
417
|
+
params["options"]["stop"] = stop
|
418
|
+
async for part in await AsyncClient().chat(
|
419
|
+
model=params["model"],
|
420
|
+
messages=ollama_messages,
|
421
|
+
stream=True,
|
422
|
+
options=Options(**params["options"]),
|
423
|
+
keep_alive=params["keep_alive"],
|
424
|
+
format=params["format"],
|
425
|
+
): # type:ignore
|
426
|
+
yield part
|
427
|
+
|
428
|
+
def _create_chat_stream(
|
429
|
+
self,
|
430
|
+
messages: List[BaseMessage],
|
431
|
+
stop: Optional[List[str]] = None,
|
432
|
+
**kwargs: Any,
|
433
|
+
) -> Iterator[Union[Mapping[str, Any], str]]:
|
434
|
+
ollama_messages = self._convert_messages_to_ollama_messages(messages)
|
435
|
+
|
436
|
+
stop = stop if stop is not None else self.stop
|
437
|
+
|
438
|
+
params = self._default_params
|
439
|
+
|
440
|
+
for key in self._default_params:
|
441
|
+
if key in kwargs:
|
442
|
+
params[key] = kwargs[key]
|
443
|
+
|
444
|
+
params["options"]["stop"] = stop
|
445
|
+
if "tools" in kwargs:
|
446
|
+
# tools not supported by sdk yet.
|
447
|
+
req = {
|
448
|
+
"model": params["model"],
|
449
|
+
"messages": ollama_messages,
|
450
|
+
"stream": False,
|
451
|
+
"format": params["format"],
|
452
|
+
"options": Options(**params["options"]),
|
453
|
+
"keep_alive": params["keep_alive"],
|
454
|
+
"tools": kwargs["tools"],
|
455
|
+
}
|
456
|
+
it = ollama._client._request_stream(
|
457
|
+
"POST",
|
458
|
+
"/api/chat",
|
459
|
+
json=req,
|
460
|
+
stream=False,
|
461
|
+
)
|
462
|
+
yield cast(Mapping[str, Any], it)
|
463
|
+
else:
|
464
|
+
yield from ollama.chat(
|
465
|
+
model=params["model"],
|
466
|
+
messages=ollama_messages,
|
467
|
+
stream=True,
|
468
|
+
options=Options(**params["options"]),
|
469
|
+
keep_alive=params["keep_alive"],
|
470
|
+
format=params["format"],
|
471
|
+
)
|
472
|
+
|
473
|
+
def _chat_stream_with_aggregation(
|
474
|
+
self,
|
475
|
+
messages: List[BaseMessage],
|
476
|
+
stop: Optional[List[str]] = None,
|
477
|
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
478
|
+
verbose: bool = False,
|
479
|
+
**kwargs: Any,
|
480
|
+
) -> ChatGenerationChunk:
|
481
|
+
final_chunk = None
|
482
|
+
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
|
483
|
+
if not isinstance(stream_resp, str):
|
484
|
+
chunk = ChatGenerationChunk(
|
485
|
+
message=AIMessageChunk(
|
486
|
+
content=(
|
487
|
+
stream_resp["message"]["content"]
|
488
|
+
if "message" in stream_resp
|
489
|
+
and "content" in stream_resp["message"]
|
490
|
+
else ""
|
491
|
+
),
|
492
|
+
usage_metadata=_get_usage_metadata_from_generation_info(
|
493
|
+
stream_resp
|
494
|
+
),
|
495
|
+
tool_calls=_get_tool_calls_from_response(stream_resp),
|
496
|
+
),
|
497
|
+
generation_info=(
|
498
|
+
dict(stream_resp) if stream_resp.get("done") is True else None
|
499
|
+
),
|
500
|
+
)
|
501
|
+
if final_chunk is None:
|
502
|
+
final_chunk = chunk
|
503
|
+
else:
|
504
|
+
final_chunk += chunk
|
505
|
+
if run_manager:
|
506
|
+
run_manager.on_llm_new_token(
|
507
|
+
chunk.text,
|
508
|
+
chunk=chunk,
|
509
|
+
verbose=verbose,
|
510
|
+
)
|
511
|
+
if final_chunk is None:
|
512
|
+
raise ValueError("No data received from Ollama stream.")
|
513
|
+
|
514
|
+
return final_chunk
|
515
|
+
|
516
|
+
async def _achat_stream_with_aggregation(
|
517
|
+
self,
|
518
|
+
messages: List[BaseMessage],
|
519
|
+
stop: Optional[List[str]] = None,
|
520
|
+
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
521
|
+
verbose: bool = False,
|
522
|
+
**kwargs: Any,
|
523
|
+
) -> ChatGenerationChunk:
|
524
|
+
final_chunk = None
|
525
|
+
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
|
526
|
+
if not isinstance(stream_resp, str):
|
527
|
+
chunk = ChatGenerationChunk(
|
528
|
+
message=AIMessageChunk(
|
529
|
+
content=(
|
530
|
+
stream_resp["message"]["content"]
|
531
|
+
if "message" in stream_resp
|
532
|
+
and "content" in stream_resp["message"]
|
533
|
+
else ""
|
534
|
+
),
|
535
|
+
usage_metadata=_get_usage_metadata_from_generation_info(
|
536
|
+
stream_resp
|
537
|
+
),
|
538
|
+
tool_calls=_get_tool_calls_from_response(stream_resp),
|
539
|
+
),
|
540
|
+
generation_info=(
|
541
|
+
dict(stream_resp) if stream_resp.get("done") is True else None
|
542
|
+
),
|
543
|
+
)
|
544
|
+
if final_chunk is None:
|
545
|
+
final_chunk = chunk
|
546
|
+
else:
|
547
|
+
final_chunk += chunk
|
548
|
+
if run_manager:
|
549
|
+
await run_manager.on_llm_new_token(
|
550
|
+
chunk.text,
|
551
|
+
chunk=chunk,
|
552
|
+
verbose=verbose,
|
553
|
+
)
|
554
|
+
if final_chunk is None:
|
555
|
+
raise ValueError("No data received from Ollama stream.")
|
556
|
+
|
557
|
+
return final_chunk
|
558
|
+
|
559
|
+
def _get_ls_params(
|
560
|
+
self, stop: Optional[List[str]] = None, **kwargs: Any
|
561
|
+
) -> LangSmithParams:
|
562
|
+
"""Get standard params for tracing."""
|
563
|
+
params = self._get_invocation_params(stop=stop, **kwargs)
|
564
|
+
ls_params = LangSmithParams(
|
565
|
+
ls_provider="ollama",
|
566
|
+
ls_model_name=self.model,
|
567
|
+
ls_model_type="chat",
|
568
|
+
ls_temperature=params.get("temperature", self.temperature),
|
569
|
+
)
|
570
|
+
if ls_stop := stop or params.get("stop", None) or self.stop:
|
571
|
+
ls_params["ls_stop"] = ls_stop
|
572
|
+
return ls_params
|
573
|
+
|
574
|
+
def _generate(
|
575
|
+
self,
|
576
|
+
messages: List[BaseMessage],
|
577
|
+
stop: Optional[List[str]] = None,
|
578
|
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
579
|
+
**kwargs: Any,
|
580
|
+
) -> ChatResult:
|
581
|
+
final_chunk = self._chat_stream_with_aggregation(
|
582
|
+
messages, stop, run_manager, verbose=self.verbose, **kwargs
|
583
|
+
)
|
584
|
+
generation_info = final_chunk.generation_info
|
585
|
+
chat_generation = ChatGeneration(
|
586
|
+
message=AIMessage(
|
587
|
+
content=final_chunk.text,
|
588
|
+
usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
|
589
|
+
tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
|
590
|
+
),
|
591
|
+
generation_info=generation_info,
|
592
|
+
)
|
593
|
+
return ChatResult(generations=[chat_generation])
|
594
|
+
|
595
|
+
def _stream(
|
596
|
+
self,
|
597
|
+
messages: List[BaseMessage],
|
598
|
+
stop: Optional[List[str]] = None,
|
599
|
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
600
|
+
**kwargs: Any,
|
601
|
+
) -> Iterator[ChatGenerationChunk]:
|
602
|
+
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
|
603
|
+
if not isinstance(stream_resp, str):
|
604
|
+
chunk = ChatGenerationChunk(
|
605
|
+
message=AIMessageChunk(
|
606
|
+
content=(
|
607
|
+
stream_resp["message"]["content"]
|
608
|
+
if "message" in stream_resp
|
609
|
+
and "content" in stream_resp["message"]
|
610
|
+
else ""
|
611
|
+
),
|
612
|
+
usage_metadata=_get_usage_metadata_from_generation_info(
|
613
|
+
stream_resp
|
614
|
+
),
|
615
|
+
tool_calls=_get_tool_calls_from_response(stream_resp),
|
616
|
+
),
|
617
|
+
generation_info=(
|
618
|
+
dict(stream_resp) if stream_resp.get("done") is True else None
|
619
|
+
),
|
620
|
+
)
|
621
|
+
if run_manager:
|
622
|
+
run_manager.on_llm_new_token(
|
623
|
+
chunk.text,
|
624
|
+
verbose=self.verbose,
|
625
|
+
)
|
626
|
+
yield chunk
|
627
|
+
|
628
|
+
async def _astream(
|
629
|
+
self,
|
630
|
+
messages: List[BaseMessage],
|
631
|
+
stop: Optional[List[str]] = None,
|
632
|
+
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
633
|
+
**kwargs: Any,
|
634
|
+
) -> AsyncIterator[ChatGenerationChunk]:
|
635
|
+
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
|
636
|
+
if not isinstance(stream_resp, str):
|
637
|
+
chunk = ChatGenerationChunk(
|
638
|
+
message=AIMessageChunk(
|
639
|
+
content=(
|
640
|
+
stream_resp["message"]["content"]
|
641
|
+
if "message" in stream_resp
|
642
|
+
and "content" in stream_resp["message"]
|
643
|
+
else ""
|
644
|
+
),
|
645
|
+
usage_metadata=_get_usage_metadata_from_generation_info(
|
646
|
+
stream_resp
|
647
|
+
),
|
648
|
+
tool_calls=_get_tool_calls_from_response(stream_resp),
|
649
|
+
),
|
650
|
+
generation_info=(
|
651
|
+
dict(stream_resp) if stream_resp.get("done") is True else None
|
652
|
+
),
|
653
|
+
)
|
654
|
+
if run_manager:
|
655
|
+
await run_manager.on_llm_new_token(
|
656
|
+
chunk.text,
|
657
|
+
verbose=self.verbose,
|
658
|
+
)
|
659
|
+
yield chunk
|
660
|
+
|
661
|
+
async def _agenerate(
|
662
|
+
self,
|
663
|
+
messages: List[BaseMessage],
|
664
|
+
stop: Optional[List[str]] = None,
|
665
|
+
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
666
|
+
**kwargs: Any,
|
667
|
+
) -> ChatResult:
|
668
|
+
final_chunk = await self._achat_stream_with_aggregation(
|
669
|
+
messages, stop, run_manager, verbose=self.verbose, **kwargs
|
670
|
+
)
|
671
|
+
generation_info = final_chunk.generation_info
|
672
|
+
chat_generation = ChatGeneration(
|
673
|
+
message=AIMessage(
|
674
|
+
content=final_chunk.text,
|
675
|
+
usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
|
676
|
+
tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
|
677
|
+
),
|
678
|
+
generation_info=generation_info,
|
679
|
+
)
|
680
|
+
return ChatResult(generations=[chat_generation])
|
681
|
+
|
682
|
+
@property
|
683
|
+
def _llm_type(self) -> str:
|
684
|
+
"""Return type of chat model."""
|
685
|
+
return "chat-ollama"
|
686
|
+
|
687
|
+
def bind_tools(
|
688
|
+
self,
|
689
|
+
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
|
690
|
+
**kwargs: Any,
|
691
|
+
) -> Runnable[LanguageModelInput, BaseMessage]:
|
692
|
+
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
|
693
|
+
return super().bind(tools=formatted_tools, **kwargs)
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
import ollama
|
4
|
+
from langchain_core.embeddings import Embeddings
|
5
|
+
from langchain_core.pydantic_v1 import BaseModel, Extra
|
6
|
+
from ollama import AsyncClient
|
7
|
+
|
8
|
+
|
9
|
+
class OllamaEmbeddings(BaseModel, Embeddings):
|
10
|
+
"""OllamaEmbeddings embedding model.
|
11
|
+
|
12
|
+
Example:
|
13
|
+
.. code-block:: python
|
14
|
+
|
15
|
+
from langchain_ollama import OllamaEmbeddings
|
16
|
+
|
17
|
+
model = OllamaEmbeddings(model="llama3")
|
18
|
+
embedder.embed_query("what is the place that jonathan worked at?")
|
19
|
+
"""
|
20
|
+
|
21
|
+
model: str = "llama2"
|
22
|
+
"""Model name to use."""
|
23
|
+
|
24
|
+
class Config:
|
25
|
+
"""Configuration for this pydantic object."""
|
26
|
+
|
27
|
+
extra = Extra.forbid
|
28
|
+
|
29
|
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
30
|
+
"""Embed search docs."""
|
31
|
+
embedded_docs = []
|
32
|
+
for doc in texts:
|
33
|
+
embedded_docs.append(list(ollama.embeddings(self.model, doc)["embedding"]))
|
34
|
+
return embedded_docs
|
35
|
+
|
36
|
+
def embed_query(self, text: str) -> List[float]:
|
37
|
+
"""Embed query text."""
|
38
|
+
return self.embed_documents([text])[0]
|
39
|
+
|
40
|
+
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
|
41
|
+
"""Embed search docs."""
|
42
|
+
embedded_docs = []
|
43
|
+
for doc in texts:
|
44
|
+
embedded_docs.append(
|
45
|
+
list((await AsyncClient().embeddings(self.model, doc))["embedding"])
|
46
|
+
)
|
47
|
+
return embedded_docs
|
48
|
+
|
49
|
+
async def aembed_query(self, text: str) -> List[float]:
|
50
|
+
"""Embed query text."""
|
51
|
+
return (await self.aembed_documents([text]))[0]
|
@@ -0,0 +1,343 @@
|
|
1
|
+
"""Ollama large language models."""
|
2
|
+
|
3
|
+
from typing import (
|
4
|
+
Any,
|
5
|
+
AsyncIterator,
|
6
|
+
Dict,
|
7
|
+
Iterator,
|
8
|
+
List,
|
9
|
+
Literal,
|
10
|
+
Mapping,
|
11
|
+
Optional,
|
12
|
+
Union,
|
13
|
+
)
|
14
|
+
|
15
|
+
import ollama
|
16
|
+
from langchain_core.callbacks import (
|
17
|
+
AsyncCallbackManagerForLLMRun,
|
18
|
+
CallbackManagerForLLMRun,
|
19
|
+
)
|
20
|
+
from langchain_core.language_models import BaseLLM
|
21
|
+
from langchain_core.outputs import GenerationChunk, LLMResult
|
22
|
+
from ollama import AsyncClient, Options
|
23
|
+
|
24
|
+
|
25
|
+
class OllamaLLM(BaseLLM):
|
26
|
+
"""OllamaLLM large language models.
|
27
|
+
|
28
|
+
Example:
|
29
|
+
.. code-block:: python
|
30
|
+
|
31
|
+
from langchain_ollama import OllamaLLM
|
32
|
+
|
33
|
+
model = OllamaLLM(model="llama3")
|
34
|
+
model.invoke("Come up with 10 names for a song about parrots")
|
35
|
+
"""
|
36
|
+
|
37
|
+
model: str = "llama2"
|
38
|
+
"""Model name to use."""
|
39
|
+
|
40
|
+
mirostat: Optional[int] = None
|
41
|
+
"""Enable Mirostat sampling for controlling perplexity.
|
42
|
+
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
|
43
|
+
|
44
|
+
mirostat_eta: Optional[float] = None
|
45
|
+
"""Influences how quickly the algorithm responds to feedback
|
46
|
+
from the generated text. A lower learning rate will result in
|
47
|
+
slower adjustments, while a higher learning rate will make
|
48
|
+
the algorithm more responsive. (Default: 0.1)"""
|
49
|
+
|
50
|
+
mirostat_tau: Optional[float] = None
|
51
|
+
"""Controls the balance between coherence and diversity
|
52
|
+
of the output. A lower value will result in more focused and
|
53
|
+
coherent text. (Default: 5.0)"""
|
54
|
+
|
55
|
+
num_ctx: Optional[int] = None
|
56
|
+
"""Sets the size of the context window used to generate the
|
57
|
+
next token. (Default: 2048) """
|
58
|
+
|
59
|
+
num_gpu: Optional[int] = None
|
60
|
+
"""The number of GPUs to use. On macOS it defaults to 1 to
|
61
|
+
enable metal support, 0 to disable."""
|
62
|
+
|
63
|
+
num_thread: Optional[int] = None
|
64
|
+
"""Sets the number of threads to use during computation.
|
65
|
+
By default, Ollama will detect this for optimal performance.
|
66
|
+
It is recommended to set this value to the number of physical
|
67
|
+
CPU cores your system has (as opposed to the logical number of cores)."""
|
68
|
+
|
69
|
+
num_predict: Optional[int] = None
|
70
|
+
"""Maximum number of tokens to predict when generating text.
|
71
|
+
(Default: 128, -1 = infinite generation, -2 = fill context)"""
|
72
|
+
|
73
|
+
repeat_last_n: Optional[int] = None
|
74
|
+
"""Sets how far back for the model to look back to prevent
|
75
|
+
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
|
76
|
+
|
77
|
+
repeat_penalty: Optional[float] = None
|
78
|
+
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
79
|
+
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
80
|
+
will be more lenient. (Default: 1.1)"""
|
81
|
+
|
82
|
+
temperature: Optional[float] = None
|
83
|
+
"""The temperature of the model. Increasing the temperature will
|
84
|
+
make the model answer more creatively. (Default: 0.8)"""
|
85
|
+
|
86
|
+
stop: Optional[List[str]] = None
|
87
|
+
"""Sets the stop tokens to use."""
|
88
|
+
|
89
|
+
tfs_z: Optional[float] = None
|
90
|
+
"""Tail free sampling is used to reduce the impact of less probable
|
91
|
+
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
92
|
+
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
93
|
+
|
94
|
+
top_k: Optional[int] = None
|
95
|
+
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
96
|
+
will give more diverse answers, while a lower value (e.g. 10)
|
97
|
+
will be more conservative. (Default: 40)"""
|
98
|
+
|
99
|
+
top_p: Optional[float] = None
|
100
|
+
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
101
|
+
to more diverse text, while a lower value (e.g., 0.5) will
|
102
|
+
generate more focused and conservative text. (Default: 0.9)"""
|
103
|
+
|
104
|
+
format: Literal["", "json"] = ""
|
105
|
+
"""Specify the format of the output (options: json)"""
|
106
|
+
|
107
|
+
keep_alive: Optional[Union[int, str]] = None
|
108
|
+
"""How long the model will stay loaded into memory."""
|
109
|
+
|
110
|
+
@property
|
111
|
+
def _default_params(self) -> Dict[str, Any]:
|
112
|
+
"""Get the default parameters for calling Ollama."""
|
113
|
+
return {
|
114
|
+
"model": self.model,
|
115
|
+
"format": self.format,
|
116
|
+
"options": {
|
117
|
+
"mirostat": self.mirostat,
|
118
|
+
"mirostat_eta": self.mirostat_eta,
|
119
|
+
"mirostat_tau": self.mirostat_tau,
|
120
|
+
"num_ctx": self.num_ctx,
|
121
|
+
"num_gpu": self.num_gpu,
|
122
|
+
"num_thread": self.num_thread,
|
123
|
+
"num_predict": self.num_predict,
|
124
|
+
"repeat_last_n": self.repeat_last_n,
|
125
|
+
"repeat_penalty": self.repeat_penalty,
|
126
|
+
"temperature": self.temperature,
|
127
|
+
"stop": self.stop,
|
128
|
+
"tfs_z": self.tfs_z,
|
129
|
+
"top_k": self.top_k,
|
130
|
+
"top_p": self.top_p,
|
131
|
+
},
|
132
|
+
"keep_alive": self.keep_alive,
|
133
|
+
}
|
134
|
+
|
135
|
+
@property
|
136
|
+
def _llm_type(self) -> str:
|
137
|
+
"""Return type of LLM."""
|
138
|
+
return "ollama-llm"
|
139
|
+
|
140
|
+
async def _acreate_generate_stream(
|
141
|
+
self,
|
142
|
+
prompt: str,
|
143
|
+
stop: Optional[List[str]] = None,
|
144
|
+
**kwargs: Any,
|
145
|
+
) -> AsyncIterator[Union[Mapping[str, Any], str]]:
|
146
|
+
if self.stop is not None and stop is not None:
|
147
|
+
raise ValueError("`stop` found in both the input and default params.")
|
148
|
+
elif self.stop is not None:
|
149
|
+
stop = self.stop
|
150
|
+
|
151
|
+
params = self._default_params
|
152
|
+
|
153
|
+
for key in self._default_params:
|
154
|
+
if key in kwargs:
|
155
|
+
params[key] = kwargs[key]
|
156
|
+
|
157
|
+
params["options"]["stop"] = stop
|
158
|
+
async for part in await AsyncClient().generate(
|
159
|
+
model=params["model"],
|
160
|
+
prompt=prompt,
|
161
|
+
stream=True,
|
162
|
+
options=Options(**params["options"]),
|
163
|
+
keep_alive=params["keep_alive"],
|
164
|
+
format=params["format"],
|
165
|
+
): # type: ignore
|
166
|
+
yield part
|
167
|
+
|
168
|
+
def _create_generate_stream(
|
169
|
+
self,
|
170
|
+
prompt: str,
|
171
|
+
stop: Optional[List[str]] = None,
|
172
|
+
**kwargs: Any,
|
173
|
+
) -> Iterator[Union[Mapping[str, Any], str]]:
|
174
|
+
if self.stop is not None and stop is not None:
|
175
|
+
raise ValueError("`stop` found in both the input and default params.")
|
176
|
+
elif self.stop is not None:
|
177
|
+
stop = self.stop
|
178
|
+
|
179
|
+
params = self._default_params
|
180
|
+
|
181
|
+
for key in self._default_params:
|
182
|
+
if key in kwargs:
|
183
|
+
params[key] = kwargs[key]
|
184
|
+
|
185
|
+
params["options"]["stop"] = stop
|
186
|
+
yield from ollama.generate(
|
187
|
+
model=params["model"],
|
188
|
+
prompt=prompt,
|
189
|
+
stream=True,
|
190
|
+
options=Options(**params["options"]),
|
191
|
+
keep_alive=params["keep_alive"],
|
192
|
+
format=params["format"],
|
193
|
+
)
|
194
|
+
|
195
|
+
async def _astream_with_aggregation(
|
196
|
+
self,
|
197
|
+
prompt: str,
|
198
|
+
stop: Optional[List[str]] = None,
|
199
|
+
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
200
|
+
verbose: bool = False,
|
201
|
+
**kwargs: Any,
|
202
|
+
) -> GenerationChunk:
|
203
|
+
final_chunk = None
|
204
|
+
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
|
205
|
+
if not isinstance(stream_resp, str):
|
206
|
+
chunk = GenerationChunk(
|
207
|
+
text=stream_resp["response"] if "response" in stream_resp else "",
|
208
|
+
generation_info=dict(stream_resp)
|
209
|
+
if stream_resp.get("done") is True
|
210
|
+
else None,
|
211
|
+
)
|
212
|
+
if final_chunk is None:
|
213
|
+
final_chunk = chunk
|
214
|
+
else:
|
215
|
+
final_chunk += chunk
|
216
|
+
if run_manager:
|
217
|
+
await run_manager.on_llm_new_token(
|
218
|
+
chunk.text,
|
219
|
+
chunk=chunk,
|
220
|
+
verbose=verbose,
|
221
|
+
)
|
222
|
+
if final_chunk is None:
|
223
|
+
raise ValueError("No data received from Ollama stream.")
|
224
|
+
|
225
|
+
return final_chunk
|
226
|
+
|
227
|
+
def _stream_with_aggregation(
|
228
|
+
self,
|
229
|
+
prompt: str,
|
230
|
+
stop: Optional[List[str]] = None,
|
231
|
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
232
|
+
verbose: bool = False,
|
233
|
+
**kwargs: Any,
|
234
|
+
) -> GenerationChunk:
|
235
|
+
final_chunk = None
|
236
|
+
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
|
237
|
+
if not isinstance(stream_resp, str):
|
238
|
+
chunk = GenerationChunk(
|
239
|
+
text=stream_resp["response"] if "response" in stream_resp else "",
|
240
|
+
generation_info=dict(stream_resp)
|
241
|
+
if stream_resp.get("done") is True
|
242
|
+
else None,
|
243
|
+
)
|
244
|
+
if final_chunk is None:
|
245
|
+
final_chunk = chunk
|
246
|
+
else:
|
247
|
+
final_chunk += chunk
|
248
|
+
if run_manager:
|
249
|
+
run_manager.on_llm_new_token(
|
250
|
+
chunk.text,
|
251
|
+
chunk=chunk,
|
252
|
+
verbose=verbose,
|
253
|
+
)
|
254
|
+
if final_chunk is None:
|
255
|
+
raise ValueError("No data received from Ollama stream.")
|
256
|
+
|
257
|
+
return final_chunk
|
258
|
+
|
259
|
+
def _generate(
|
260
|
+
self,
|
261
|
+
prompts: List[str],
|
262
|
+
stop: Optional[List[str]] = None,
|
263
|
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
264
|
+
**kwargs: Any,
|
265
|
+
) -> LLMResult:
|
266
|
+
generations = []
|
267
|
+
for prompt in prompts:
|
268
|
+
final_chunk = self._stream_with_aggregation(
|
269
|
+
prompt,
|
270
|
+
stop=stop,
|
271
|
+
run_manager=run_manager,
|
272
|
+
verbose=self.verbose,
|
273
|
+
**kwargs,
|
274
|
+
)
|
275
|
+
generations.append([final_chunk])
|
276
|
+
return LLMResult(generations=generations) # type: ignore[arg-type]
|
277
|
+
|
278
|
+
async def _agenerate(
|
279
|
+
self,
|
280
|
+
prompts: List[str],
|
281
|
+
stop: Optional[List[str]] = None,
|
282
|
+
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
283
|
+
**kwargs: Any,
|
284
|
+
) -> LLMResult:
|
285
|
+
generations = []
|
286
|
+
for prompt in prompts:
|
287
|
+
final_chunk = await self._astream_with_aggregation(
|
288
|
+
prompt,
|
289
|
+
stop=stop,
|
290
|
+
run_manager=run_manager,
|
291
|
+
verbose=self.verbose,
|
292
|
+
**kwargs,
|
293
|
+
)
|
294
|
+
generations.append([final_chunk])
|
295
|
+
return LLMResult(generations=generations) # type: ignore[arg-type]
|
296
|
+
|
297
|
+
def _stream(
|
298
|
+
self,
|
299
|
+
prompt: str,
|
300
|
+
stop: Optional[List[str]] = None,
|
301
|
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
302
|
+
**kwargs: Any,
|
303
|
+
) -> Iterator[GenerationChunk]:
|
304
|
+
for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
|
305
|
+
if not isinstance(stream_resp, str):
|
306
|
+
chunk = GenerationChunk(
|
307
|
+
text=stream_resp["message"]["content"]
|
308
|
+
if "message" in stream_resp
|
309
|
+
else "",
|
310
|
+
generation_info=dict(stream_resp)
|
311
|
+
if stream_resp.get("done") is True
|
312
|
+
else None,
|
313
|
+
)
|
314
|
+
if run_manager:
|
315
|
+
run_manager.on_llm_new_token(
|
316
|
+
chunk.text,
|
317
|
+
verbose=self.verbose,
|
318
|
+
)
|
319
|
+
yield chunk
|
320
|
+
|
321
|
+
async def _astream(
|
322
|
+
self,
|
323
|
+
prompt: str,
|
324
|
+
stop: Optional[List[str]] = None,
|
325
|
+
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
326
|
+
**kwargs: Any,
|
327
|
+
) -> AsyncIterator[GenerationChunk]:
|
328
|
+
async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
|
329
|
+
if not isinstance(stream_resp, str):
|
330
|
+
chunk = GenerationChunk(
|
331
|
+
text=stream_resp["message"]["content"]
|
332
|
+
if "message" in stream_resp
|
333
|
+
else "",
|
334
|
+
generation_info=dict(stream_resp)
|
335
|
+
if stream_resp.get("done") is True
|
336
|
+
else None,
|
337
|
+
)
|
338
|
+
if run_manager:
|
339
|
+
await run_manager.on_llm_new_token(
|
340
|
+
chunk.text,
|
341
|
+
verbose=self.verbose,
|
342
|
+
)
|
343
|
+
yield chunk
|
File without changes
|
@@ -0,0 +1,90 @@
|
|
1
|
+
[tool.poetry]
|
2
|
+
name = "langchain-ollama"
|
3
|
+
version = "0.1.0rc0"
|
4
|
+
description = "An integration package connecting Ollama and LangChain"
|
5
|
+
authors = []
|
6
|
+
readme = "README.md"
|
7
|
+
repository = "https://github.com/langchain-ai/langchain"
|
8
|
+
license = "MIT"
|
9
|
+
|
10
|
+
[tool.poetry.urls]
|
11
|
+
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama"
|
12
|
+
|
13
|
+
[tool.poetry.dependencies]
|
14
|
+
python = ">=3.8.1,<4.0"
|
15
|
+
ollama = ">=0.2.1,<1"
|
16
|
+
langchain-core = "^0.2.20"
|
17
|
+
|
18
|
+
[tool.poetry.group.test]
|
19
|
+
optional = true
|
20
|
+
|
21
|
+
[tool.poetry.group.test.dependencies]
|
22
|
+
pytest = "^7.4.3"
|
23
|
+
pytest-asyncio = "^0.23.2"
|
24
|
+
syrupy = "^4.0.2"
|
25
|
+
pytest-socket = "^0.7.0"
|
26
|
+
langchain-core = { path = "../../core", develop = true }
|
27
|
+
langchain-standard-tests = { path = "../../standard-tests", develop = true }
|
28
|
+
|
29
|
+
[tool.poetry.group.codespell]
|
30
|
+
optional = true
|
31
|
+
|
32
|
+
[tool.poetry.group.codespell.dependencies]
|
33
|
+
codespell = "^2.2.6"
|
34
|
+
|
35
|
+
[tool.poetry.group.test_integration]
|
36
|
+
optional = true
|
37
|
+
|
38
|
+
[tool.poetry.group.test_integration.dependencies]
|
39
|
+
|
40
|
+
[tool.poetry.group.lint]
|
41
|
+
optional = true
|
42
|
+
|
43
|
+
[tool.poetry.group.lint.dependencies]
|
44
|
+
ruff = "^0.1.8"
|
45
|
+
|
46
|
+
[tool.poetry.group.typing.dependencies]
|
47
|
+
mypy = "^1.7.1"
|
48
|
+
langchain-core = { path = "../../core", develop = true }
|
49
|
+
|
50
|
+
[tool.poetry.group.dev]
|
51
|
+
optional = true
|
52
|
+
|
53
|
+
[tool.poetry.group.dev.dependencies]
|
54
|
+
langchain-core = { path = "../../core", develop = true }
|
55
|
+
|
56
|
+
[tool.ruff.lint]
|
57
|
+
select = [
|
58
|
+
"E", # pycodestyle
|
59
|
+
"F", # pyflakes
|
60
|
+
"I", # isort
|
61
|
+
"T201", # print
|
62
|
+
]
|
63
|
+
|
64
|
+
[tool.mypy]
|
65
|
+
disallow_untyped_defs = "True"
|
66
|
+
|
67
|
+
[tool.coverage.run]
|
68
|
+
omit = ["tests/*"]
|
69
|
+
|
70
|
+
[build-system]
|
71
|
+
requires = ["poetry-core>=1.0.0"]
|
72
|
+
build-backend = "poetry.core.masonry.api"
|
73
|
+
|
74
|
+
[tool.pytest.ini_options]
|
75
|
+
# --strict-markers will raise errors on unknown marks.
|
76
|
+
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
77
|
+
#
|
78
|
+
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
79
|
+
# --strict-config any warnings encountered while parsing the `pytest`
|
80
|
+
# section of the configuration file raise errors.
|
81
|
+
#
|
82
|
+
# https://github.com/tophat/syrupy
|
83
|
+
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
|
84
|
+
addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
|
85
|
+
# Registering custom markers.
|
86
|
+
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
87
|
+
markers = [
|
88
|
+
"compile: mark placeholder test used to compile integration tests without running them",
|
89
|
+
]
|
90
|
+
asyncio_mode = "auto"
|