langchain-ollama 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 LangChain, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,64 @@
1
+ Metadata-Version: 2.1
2
+ Name: langchain-ollama
3
+ Version: 0.1.0
4
+ Summary: An integration package connecting Ollama and LangChain
5
+ Home-page: https://github.com/langchain-ai/langchain
6
+ License: MIT
7
+ Requires-Python: >=3.8.1,<4.0
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: langchain-core (>=0.2.20,<0.3.0)
15
+ Requires-Dist: ollama (>=0.3.0,<1)
16
+ Project-URL: Repository, https://github.com/langchain-ai/langchain
17
+ Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
18
+ Description-Content-Type: text/markdown
19
+
20
+ # langchain-ollama
21
+
22
+ This package contains the LangChain integration with Ollama
23
+
24
+ ## Installation
25
+
26
+ ```bash
27
+ pip install -U langchain-ollama
28
+ ```
29
+
30
+ You will also need to run the Ollama server locally.
31
+ You can download it [here](https://ollama.com/download).
32
+
33
+ ## Chat Models
34
+
35
+ `ChatOllama` class exposes chat models from Ollama.
36
+
37
+ ```python
38
+ from langchain_ollama import ChatOllama
39
+
40
+ llm = ChatOllama(model="llama3-groq-tool-use")
41
+ llm.invoke("Sing a ballad of LangChain.")
42
+ ```
43
+
44
+ ## Embeddings
45
+
46
+ `OllamaEmbeddings` class exposes embeddings from Ollama.
47
+
48
+ ```python
49
+ from langchain_ollama import OllamaEmbeddings
50
+
51
+ embeddings = OllamaEmbeddings(model="llama3")
52
+ embeddings.embed_query("What is the meaning of life?")
53
+ ```
54
+
55
+ ## LLMs
56
+ `OllamaLLM` class exposes LLMs from Ollama.
57
+
58
+ ```python
59
+ from langchain_ollama import OllamaLLM
60
+
61
+ llm = OllamaLLM(model="llama3")
62
+ llm.invoke("The meaning of life is")
63
+ ```
64
+
@@ -0,0 +1,44 @@
1
+ # langchain-ollama
2
+
3
+ This package contains the LangChain integration with Ollama
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install -U langchain-ollama
9
+ ```
10
+
11
+ You will also need to run the Ollama server locally.
12
+ You can download it [here](https://ollama.com/download).
13
+
14
+ ## Chat Models
15
+
16
+ `ChatOllama` class exposes chat models from Ollama.
17
+
18
+ ```python
19
+ from langchain_ollama import ChatOllama
20
+
21
+ llm = ChatOllama(model="llama3-groq-tool-use")
22
+ llm.invoke("Sing a ballad of LangChain.")
23
+ ```
24
+
25
+ ## Embeddings
26
+
27
+ `OllamaEmbeddings` class exposes embeddings from Ollama.
28
+
29
+ ```python
30
+ from langchain_ollama import OllamaEmbeddings
31
+
32
+ embeddings = OllamaEmbeddings(model="llama3")
33
+ embeddings.embed_query("What is the meaning of life?")
34
+ ```
35
+
36
+ ## LLMs
37
+ `OllamaLLM` class exposes LLMs from Ollama.
38
+
39
+ ```python
40
+ from langchain_ollama import OllamaLLM
41
+
42
+ llm = OllamaLLM(model="llama3")
43
+ llm.invoke("The meaning of life is")
44
+ ```
@@ -0,0 +1,19 @@
1
+ from importlib import metadata
2
+
3
+ from langchain_ollama.chat_models import ChatOllama
4
+ from langchain_ollama.embeddings import OllamaEmbeddings
5
+ from langchain_ollama.llms import OllamaLLM
6
+
7
+ try:
8
+ __version__ = metadata.version(__package__)
9
+ except metadata.PackageNotFoundError:
10
+ # Case where package metadata is not available.
11
+ __version__ = ""
12
+ del metadata # optional, avoids polluting the results of dir(__package__)
13
+
14
+ __all__ = [
15
+ "ChatOllama",
16
+ "OllamaLLM",
17
+ "OllamaEmbeddings",
18
+ "__version__",
19
+ ]
@@ -0,0 +1,719 @@
1
+ """Ollama chat models."""
2
+
3
+ from typing import (
4
+ Any,
5
+ AsyncIterator,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ List,
10
+ Literal,
11
+ Mapping,
12
+ Optional,
13
+ Sequence,
14
+ Type,
15
+ Union,
16
+ cast,
17
+ )
18
+ from uuid import uuid4
19
+
20
+ import ollama
21
+ from langchain_core.callbacks import (
22
+ CallbackManagerForLLMRun,
23
+ )
24
+ from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun
25
+ from langchain_core.language_models import LanguageModelInput
26
+ from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
27
+ from langchain_core.messages import (
28
+ AIMessage,
29
+ AIMessageChunk,
30
+ BaseMessage,
31
+ HumanMessage,
32
+ SystemMessage,
33
+ ToolCall,
34
+ ToolMessage,
35
+ )
36
+ from langchain_core.messages.ai import UsageMetadata
37
+ from langchain_core.messages.tool import tool_call
38
+ from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
39
+ from langchain_core.pydantic_v1 import BaseModel
40
+ from langchain_core.runnables import Runnable
41
+ from langchain_core.tools import BaseTool
42
+ from langchain_core.utils.function_calling import convert_to_openai_tool
43
+ from ollama import AsyncClient, Message, Options
44
+
45
+
46
+ def _get_usage_metadata_from_generation_info(
47
+ generation_info: Optional[Mapping[str, Any]],
48
+ ) -> Optional[UsageMetadata]:
49
+ """Get usage metadata from ollama generation info mapping."""
50
+ if generation_info is None:
51
+ return None
52
+ input_tokens: Optional[int] = generation_info.get("prompt_eval_count")
53
+ output_tokens: Optional[int] = generation_info.get("eval_count")
54
+ if input_tokens is not None and output_tokens is not None:
55
+ return UsageMetadata(
56
+ input_tokens=input_tokens,
57
+ output_tokens=output_tokens,
58
+ total_tokens=input_tokens + output_tokens,
59
+ )
60
+ return None
61
+
62
+
63
+ def _get_tool_calls_from_response(
64
+ response: Mapping[str, Any],
65
+ ) -> List[ToolCall]:
66
+ """Get tool calls from ollama response."""
67
+ tool_calls = []
68
+ if "message" in response:
69
+ if "tool_calls" in response["message"]:
70
+ for tc in response["message"]["tool_calls"]:
71
+ tool_calls.append(
72
+ tool_call(
73
+ id=str(uuid4()),
74
+ name=tc["function"]["name"],
75
+ args=tc["function"]["arguments"],
76
+ )
77
+ )
78
+ return tool_calls
79
+
80
+
81
+ def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
82
+ return {
83
+ "type": "function",
84
+ "id": tool_call["id"],
85
+ "function": {
86
+ "name": tool_call["name"],
87
+ "arguments": tool_call["args"],
88
+ },
89
+ }
90
+
91
+
92
+ class ChatOllama(BaseChatModel):
93
+ """Ollama chat model integration.
94
+
95
+ Setup:
96
+ Install ``langchain-ollama`` and download any models you want to use from ollama.
97
+
98
+ .. code-block:: bash
99
+
100
+ ollama pull mistral:v0.3
101
+ pip install -U langchain-ollama
102
+
103
+ Key init args — completion params:
104
+ model: str
105
+ Name of Ollama model to use.
106
+ temperature: float
107
+ Sampling temperature. Ranges from 0.0 to 1.0.
108
+ num_predict: Optional[int]
109
+ Max number of tokens to generate.
110
+
111
+ See full list of supported init args and their descriptions in the params section.
112
+
113
+ Instantiate:
114
+ .. code-block:: python
115
+
116
+ from langchain_ollama import ChatOllama
117
+
118
+ llm = ChatOllama(
119
+ model = "llama3",
120
+ temperature = 0.8,
121
+ num_predict = 256,
122
+ # other params ...
123
+ )
124
+
125
+ Invoke:
126
+ .. code-block:: python
127
+
128
+ messages = [
129
+ ("system", "You are a helpful translator. Translate the user sentence to French."),
130
+ ("human", "I love programming."),
131
+ ]
132
+ llm.invoke(messages)
133
+
134
+ .. code-block:: python
135
+
136
+ AIMessage(content='J'adore le programmation. (Note: "programming" can also refer to the act of writing code, so if you meant that, I could translate it as "J'adore programmer". But since you didn\'t specify, I assumed you were talking about the activity itself, which is what "le programmation" usually refers to.)', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:37:50.182604Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 3576619666, 'load_duration': 788524916, 'prompt_eval_count': 32, 'prompt_eval_duration': 128125000, 'eval_count': 71, 'eval_duration': 2656556000}, id='run-ba48f958-6402-41a5-b461-5e250a4ebd36-0')
137
+
138
+ Stream:
139
+ .. code-block:: python
140
+
141
+ messages = [
142
+ ("human", "Return the words Hello World!"),
143
+ ]
144
+ for chunk in llm.stream(messages):
145
+ print(chunk)
146
+
147
+
148
+ .. code-block:: python
149
+
150
+ content='Hello' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
151
+ content=' World' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
152
+ content='!' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
153
+ content='' response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:39:42.274449Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 411875125, 'load_duration': 1898166, 'prompt_eval_count': 14, 'prompt_eval_duration': 297320000, 'eval_count': 4, 'eval_duration': 111099000} id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
154
+
155
+
156
+ .. code-block:: python
157
+
158
+ stream = llm.stream(messages)
159
+ full = next(stream)
160
+ for chunk in stream:
161
+ full += chunk
162
+ full
163
+
164
+ .. code-block:: python
165
+
166
+ AIMessageChunk(content='Je adore le programmation.(Note: "programmation" is the formal way to say "programming" in French, but informally, people might use the phrase "le développement logiciel" or simply "le code")', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:38:54.933154Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1977300042, 'load_duration': 1345709, 'prompt_eval_duration': 159343000, 'eval_count': 47, 'eval_duration': 1815123000}, id='run-3c81a3ed-3e79-4dd3-a796-04064d804890')
167
+
168
+ Async:
169
+ .. code-block:: python
170
+
171
+ messages = [
172
+ ("human", "Hello how are you!"),
173
+ ]
174
+ await llm.ainvoke(messages)
175
+
176
+ .. code-block:: python
177
+
178
+ AIMessage(content="Hi there! I'm just an AI, so I don't have feelings or emotions like humans do. But I'm functioning properly and ready to help with any questions or tasks you may have! How can I assist you today?", response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:52:08.165478Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 2138492875, 'load_duration': 1364000, 'prompt_eval_count': 10, 'prompt_eval_duration': 297081000, 'eval_count': 47, 'eval_duration': 1838524000}, id='run-29c510ae-49a4-4cdd-8f23-b972bfab1c49-0')
179
+
180
+ .. code-block:: python
181
+
182
+ messages = [
183
+ ("human", "Say hello world!"),
184
+ ]
185
+ async for chunk in llm.astream(messages):
186
+ print(chunk.content)
187
+
188
+ .. code-block:: python
189
+
190
+ HEL
191
+ LO
192
+ WORLD
193
+ !
194
+
195
+ .. code-block:: python
196
+
197
+ messages = [
198
+ ("human", "Say hello world!"),
199
+ ("human","Say goodbye world!")
200
+ ]
201
+ await llm.abatch(messages)
202
+
203
+ .. code-block:: python
204
+
205
+ [AIMessage(content='HELLO, WORLD!', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:55:07.315396Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1696745458, 'load_duration': 1505000, 'prompt_eval_count': 8, 'prompt_eval_duration': 111627000, 'eval_count': 6, 'eval_duration': 185181000}, id='run-da6c7562-e25a-4a44-987a-2c83cd8c2686-0'),
206
+ AIMessage(content="It's been a blast chatting with you! Say goodbye to the world for me, and don't forget to come back and visit us again soon!", response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:55:07.018076Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1399391083, 'load_duration': 1187417, 'prompt_eval_count': 20, 'prompt_eval_duration': 230349000, 'eval_count': 31, 'eval_duration': 1166047000}, id='run-96cad530-6f3e-4cf9-86b4-e0f8abba4cdb-0')]
207
+
208
+ JSON mode:
209
+ .. code-block:: python
210
+
211
+
212
+ json_llm = ChatOllama(format="json")
213
+ messages = [
214
+ ("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."),
215
+ ]
216
+ llm.invoke(messages).content
217
+
218
+ .. code-block:: python
219
+
220
+ '{"location": "Pune, India", "time_of_day": "morning"}'
221
+
222
+ Tool Calling:
223
+ .. warning::
224
+ Ollama currently does not support streaming for tools
225
+
226
+ .. code-block:: python
227
+
228
+ from langchain_ollama import ChatOllama
229
+ from langchain_core.pydantic_v1 import BaseModel, Field
230
+
231
+ class Multiply(BaseModel):
232
+ a: int = Field(..., description="First integer")
233
+ b: int = Field(..., description="Second integer")
234
+
235
+ ans = await chat.invoke("What is 45*67")
236
+ ans.tool_calls
237
+
238
+ .. code-block:: python
239
+
240
+ [{'name': 'Multiply',
241
+ 'args': {'a': 45, 'b': 67},
242
+ 'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
243
+ 'type': 'tool_call'}]
244
+ """ # noqa: E501
245
+
246
+ model: str
247
+ """Model name to use."""
248
+
249
+ mirostat: Optional[int] = None
250
+ """Enable Mirostat sampling for controlling perplexity.
251
+ (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
252
+
253
+ mirostat_eta: Optional[float] = None
254
+ """Influences how quickly the algorithm responds to feedback
255
+ from the generated text. A lower learning rate will result in
256
+ slower adjustments, while a higher learning rate will make
257
+ the algorithm more responsive. (Default: 0.1)"""
258
+
259
+ mirostat_tau: Optional[float] = None
260
+ """Controls the balance between coherence and diversity
261
+ of the output. A lower value will result in more focused and
262
+ coherent text. (Default: 5.0)"""
263
+
264
+ num_ctx: Optional[int] = None
265
+ """Sets the size of the context window used to generate the
266
+ next token. (Default: 2048) """
267
+
268
+ num_gpu: Optional[int] = None
269
+ """The number of GPUs to use. On macOS it defaults to 1 to
270
+ enable metal support, 0 to disable."""
271
+
272
+ num_thread: Optional[int] = None
273
+ """Sets the number of threads to use during computation.
274
+ By default, Ollama will detect this for optimal performance.
275
+ It is recommended to set this value to the number of physical
276
+ CPU cores your system has (as opposed to the logical number of cores)."""
277
+
278
+ num_predict: Optional[int] = None
279
+ """Maximum number of tokens to predict when generating text.
280
+ (Default: 128, -1 = infinite generation, -2 = fill context)"""
281
+
282
+ repeat_last_n: Optional[int] = None
283
+ """Sets how far back for the model to look back to prevent
284
+ repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
285
+
286
+ repeat_penalty: Optional[float] = None
287
+ """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
288
+ will penalize repetitions more strongly, while a lower value (e.g., 0.9)
289
+ will be more lenient. (Default: 1.1)"""
290
+
291
+ temperature: Optional[float] = None
292
+ """The temperature of the model. Increasing the temperature will
293
+ make the model answer more creatively. (Default: 0.8)"""
294
+
295
+ stop: Optional[List[str]] = None
296
+ """Sets the stop tokens to use."""
297
+
298
+ tfs_z: Optional[float] = None
299
+ """Tail free sampling is used to reduce the impact of less probable
300
+ tokens from the output. A higher value (e.g., 2.0) will reduce the
301
+ impact more, while a value of 1.0 disables this setting. (default: 1)"""
302
+
303
+ top_k: Optional[int] = None
304
+ """Reduces the probability of generating nonsense. A higher value (e.g. 100)
305
+ will give more diverse answers, while a lower value (e.g. 10)
306
+ will be more conservative. (Default: 40)"""
307
+
308
+ top_p: Optional[float] = None
309
+ """Works together with top-k. A higher value (e.g., 0.95) will lead
310
+ to more diverse text, while a lower value (e.g., 0.5) will
311
+ generate more focused and conservative text. (Default: 0.9)"""
312
+
313
+ format: Literal["", "json"] = ""
314
+ """Specify the format of the output (options: json)"""
315
+
316
+ keep_alive: Optional[Union[int, str]] = None
317
+ """How long the model will stay loaded into memory."""
318
+
319
+ @property
320
+ def _default_params(self) -> Dict[str, Any]:
321
+ """Get the default parameters for calling Ollama."""
322
+ return {
323
+ "model": self.model,
324
+ "format": self.format,
325
+ "options": {
326
+ "mirostat": self.mirostat,
327
+ "mirostat_eta": self.mirostat_eta,
328
+ "mirostat_tau": self.mirostat_tau,
329
+ "num_ctx": self.num_ctx,
330
+ "num_gpu": self.num_gpu,
331
+ "num_thread": self.num_thread,
332
+ "num_predict": self.num_predict,
333
+ "repeat_last_n": self.repeat_last_n,
334
+ "repeat_penalty": self.repeat_penalty,
335
+ "temperature": self.temperature,
336
+ "stop": self.stop,
337
+ "tfs_z": self.tfs_z,
338
+ "top_k": self.top_k,
339
+ "top_p": self.top_p,
340
+ },
341
+ "keep_alive": self.keep_alive,
342
+ }
343
+
344
+ def _convert_messages_to_ollama_messages(
345
+ self, messages: List[BaseMessage]
346
+ ) -> Sequence[Message]:
347
+ ollama_messages: List = []
348
+ for message in messages:
349
+ role = ""
350
+ tool_call_id: Optional[str] = None
351
+ tool_calls: Optional[List[Dict[str, Any]]] = None
352
+ if isinstance(message, HumanMessage):
353
+ role = "user"
354
+ elif isinstance(message, AIMessage):
355
+ role = "assistant"
356
+ tool_calls = (
357
+ [
358
+ _lc_tool_call_to_openai_tool_call(tool_call)
359
+ for tool_call in message.tool_calls
360
+ ]
361
+ if message.tool_calls
362
+ else None
363
+ )
364
+ elif isinstance(message, SystemMessage):
365
+ role = "system"
366
+ elif isinstance(message, ToolMessage):
367
+ role = "tool"
368
+ tool_call_id = message.tool_call_id
369
+ else:
370
+ raise ValueError("Received unsupported message type for Ollama.")
371
+
372
+ content = ""
373
+ images = []
374
+ if isinstance(message.content, str):
375
+ content = message.content
376
+ else:
377
+ for content_part in cast(List[Dict], message.content):
378
+ if content_part.get("type") == "text":
379
+ content += f"\n{content_part['text']}"
380
+ elif content_part.get("type") == "tool_use":
381
+ continue
382
+ elif content_part.get("type") == "image_url":
383
+ image_url = None
384
+ temp_image_url = content_part.get("image_url")
385
+ if isinstance(temp_image_url, str):
386
+ image_url = content_part["image_url"]
387
+ elif (
388
+ isinstance(temp_image_url, dict) and "url" in temp_image_url
389
+ ):
390
+ image_url = temp_image_url
391
+ else:
392
+ raise ValueError(
393
+ "Only string image_url or dict with string 'url' "
394
+ "inside content parts are supported."
395
+ )
396
+
397
+ image_url_components = image_url.split(",")
398
+ # Support data:image/jpeg;base64,<image> format
399
+ # and base64 strings
400
+ if len(image_url_components) > 1:
401
+ images.append(image_url_components[1])
402
+ else:
403
+ images.append(image_url_components[0])
404
+
405
+ else:
406
+ raise ValueError(
407
+ "Unsupported message content type. "
408
+ "Must either have type 'text' or type 'image_url' "
409
+ "with a string 'image_url' field."
410
+ )
411
+ msg = {
412
+ "role": role,
413
+ "content": content,
414
+ "images": images,
415
+ }
416
+ if tool_call_id:
417
+ msg["tool_call_id"] = tool_call_id
418
+ if tool_calls:
419
+ msg["tool_calls"] = tool_calls
420
+ ollama_messages.append(msg)
421
+
422
+ return ollama_messages
423
+
424
+ async def _acreate_chat_stream(
425
+ self,
426
+ messages: List[BaseMessage],
427
+ stop: Optional[List[str]] = None,
428
+ **kwargs: Any,
429
+ ) -> AsyncIterator[Union[Mapping[str, Any], str]]:
430
+ ollama_messages = self._convert_messages_to_ollama_messages(messages)
431
+
432
+ stop = stop if stop is not None else self.stop
433
+
434
+ params = self._default_params
435
+
436
+ for key in self._default_params:
437
+ if key in kwargs:
438
+ params[key] = kwargs[key]
439
+
440
+ params["options"]["stop"] = stop
441
+ if "tools" in kwargs:
442
+ yield await AsyncClient().chat(
443
+ model=params["model"],
444
+ messages=ollama_messages,
445
+ stream=False,
446
+ options=Options(**params["options"]),
447
+ keep_alive=params["keep_alive"],
448
+ format=params["format"],
449
+ tools=kwargs["tools"],
450
+ ) # type:ignore
451
+ else:
452
+ async for part in await AsyncClient().chat(
453
+ model=params["model"],
454
+ messages=ollama_messages,
455
+ stream=True,
456
+ options=Options(**params["options"]),
457
+ keep_alive=params["keep_alive"],
458
+ format=params["format"],
459
+ ): # type:ignore
460
+ yield part
461
+
462
+ def _create_chat_stream(
463
+ self,
464
+ messages: List[BaseMessage],
465
+ stop: Optional[List[str]] = None,
466
+ **kwargs: Any,
467
+ ) -> Iterator[Union[Mapping[str, Any], str]]:
468
+ ollama_messages = self._convert_messages_to_ollama_messages(messages)
469
+
470
+ stop = stop if stop is not None else self.stop
471
+
472
+ params = self._default_params
473
+
474
+ for key in self._default_params:
475
+ if key in kwargs:
476
+ params[key] = kwargs[key]
477
+
478
+ params["options"]["stop"] = stop
479
+ if "tools" in kwargs:
480
+ yield ollama.chat(
481
+ model=params["model"],
482
+ messages=ollama_messages,
483
+ stream=False,
484
+ options=Options(**params["options"]),
485
+ keep_alive=params["keep_alive"],
486
+ format=params["format"],
487
+ tools=kwargs["tools"],
488
+ )
489
+ else:
490
+ yield from ollama.chat(
491
+ model=params["model"],
492
+ messages=ollama_messages,
493
+ stream=True,
494
+ options=Options(**params["options"]),
495
+ keep_alive=params["keep_alive"],
496
+ format=params["format"],
497
+ )
498
+
499
+ def _chat_stream_with_aggregation(
500
+ self,
501
+ messages: List[BaseMessage],
502
+ stop: Optional[List[str]] = None,
503
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
504
+ verbose: bool = False,
505
+ **kwargs: Any,
506
+ ) -> ChatGenerationChunk:
507
+ final_chunk = None
508
+ for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
509
+ if not isinstance(stream_resp, str):
510
+ chunk = ChatGenerationChunk(
511
+ message=AIMessageChunk(
512
+ content=(
513
+ stream_resp["message"]["content"]
514
+ if "message" in stream_resp
515
+ and "content" in stream_resp["message"]
516
+ else ""
517
+ ),
518
+ usage_metadata=_get_usage_metadata_from_generation_info(
519
+ stream_resp
520
+ ),
521
+ tool_calls=_get_tool_calls_from_response(stream_resp),
522
+ ),
523
+ generation_info=(
524
+ dict(stream_resp) if stream_resp.get("done") is True else None
525
+ ),
526
+ )
527
+ if final_chunk is None:
528
+ final_chunk = chunk
529
+ else:
530
+ final_chunk += chunk
531
+ if run_manager:
532
+ run_manager.on_llm_new_token(
533
+ chunk.text,
534
+ chunk=chunk,
535
+ verbose=verbose,
536
+ )
537
+ if final_chunk is None:
538
+ raise ValueError("No data received from Ollama stream.")
539
+
540
+ return final_chunk
541
+
542
+ async def _achat_stream_with_aggregation(
543
+ self,
544
+ messages: List[BaseMessage],
545
+ stop: Optional[List[str]] = None,
546
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
547
+ verbose: bool = False,
548
+ **kwargs: Any,
549
+ ) -> ChatGenerationChunk:
550
+ final_chunk = None
551
+ async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
552
+ if not isinstance(stream_resp, str):
553
+ chunk = ChatGenerationChunk(
554
+ message=AIMessageChunk(
555
+ content=(
556
+ stream_resp["message"]["content"]
557
+ if "message" in stream_resp
558
+ and "content" in stream_resp["message"]
559
+ else ""
560
+ ),
561
+ usage_metadata=_get_usage_metadata_from_generation_info(
562
+ stream_resp
563
+ ),
564
+ tool_calls=_get_tool_calls_from_response(stream_resp),
565
+ ),
566
+ generation_info=(
567
+ dict(stream_resp) if stream_resp.get("done") is True else None
568
+ ),
569
+ )
570
+ if final_chunk is None:
571
+ final_chunk = chunk
572
+ else:
573
+ final_chunk += chunk
574
+ if run_manager:
575
+ await run_manager.on_llm_new_token(
576
+ chunk.text,
577
+ chunk=chunk,
578
+ verbose=verbose,
579
+ )
580
+ if final_chunk is None:
581
+ raise ValueError("No data received from Ollama stream.")
582
+
583
+ return final_chunk
584
+
585
+ def _get_ls_params(
586
+ self, stop: Optional[List[str]] = None, **kwargs: Any
587
+ ) -> LangSmithParams:
588
+ """Get standard params for tracing."""
589
+ params = self._get_invocation_params(stop=stop, **kwargs)
590
+ ls_params = LangSmithParams(
591
+ ls_provider="ollama",
592
+ ls_model_name=self.model,
593
+ ls_model_type="chat",
594
+ ls_temperature=params.get("temperature", self.temperature),
595
+ )
596
+ if ls_stop := stop or params.get("stop", None) or self.stop:
597
+ ls_params["ls_stop"] = ls_stop
598
+ return ls_params
599
+
600
+ def _generate(
601
+ self,
602
+ messages: List[BaseMessage],
603
+ stop: Optional[List[str]] = None,
604
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
605
+ **kwargs: Any,
606
+ ) -> ChatResult:
607
+ final_chunk = self._chat_stream_with_aggregation(
608
+ messages, stop, run_manager, verbose=self.verbose, **kwargs
609
+ )
610
+ generation_info = final_chunk.generation_info
611
+ chat_generation = ChatGeneration(
612
+ message=AIMessage(
613
+ content=final_chunk.text,
614
+ usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
615
+ tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
616
+ ),
617
+ generation_info=generation_info,
618
+ )
619
+ return ChatResult(generations=[chat_generation])
620
+
621
+ def _stream(
622
+ self,
623
+ messages: List[BaseMessage],
624
+ stop: Optional[List[str]] = None,
625
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
626
+ **kwargs: Any,
627
+ ) -> Iterator[ChatGenerationChunk]:
628
+ for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
629
+ if not isinstance(stream_resp, str):
630
+ chunk = ChatGenerationChunk(
631
+ message=AIMessageChunk(
632
+ content=(
633
+ stream_resp["message"]["content"]
634
+ if "message" in stream_resp
635
+ and "content" in stream_resp["message"]
636
+ else ""
637
+ ),
638
+ usage_metadata=_get_usage_metadata_from_generation_info(
639
+ stream_resp
640
+ ),
641
+ tool_calls=_get_tool_calls_from_response(stream_resp),
642
+ ),
643
+ generation_info=(
644
+ dict(stream_resp) if stream_resp.get("done") is True else None
645
+ ),
646
+ )
647
+ if run_manager:
648
+ run_manager.on_llm_new_token(
649
+ chunk.text,
650
+ verbose=self.verbose,
651
+ )
652
+ yield chunk
653
+
654
+ async def _astream(
655
+ self,
656
+ messages: List[BaseMessage],
657
+ stop: Optional[List[str]] = None,
658
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
659
+ **kwargs: Any,
660
+ ) -> AsyncIterator[ChatGenerationChunk]:
661
+ async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
662
+ if not isinstance(stream_resp, str):
663
+ chunk = ChatGenerationChunk(
664
+ message=AIMessageChunk(
665
+ content=(
666
+ stream_resp["message"]["content"]
667
+ if "message" in stream_resp
668
+ and "content" in stream_resp["message"]
669
+ else ""
670
+ ),
671
+ usage_metadata=_get_usage_metadata_from_generation_info(
672
+ stream_resp
673
+ ),
674
+ tool_calls=_get_tool_calls_from_response(stream_resp),
675
+ ),
676
+ generation_info=(
677
+ dict(stream_resp) if stream_resp.get("done") is True else None
678
+ ),
679
+ )
680
+ if run_manager:
681
+ await run_manager.on_llm_new_token(
682
+ chunk.text,
683
+ verbose=self.verbose,
684
+ )
685
+ yield chunk
686
+
687
+ async def _agenerate(
688
+ self,
689
+ messages: List[BaseMessage],
690
+ stop: Optional[List[str]] = None,
691
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
692
+ **kwargs: Any,
693
+ ) -> ChatResult:
694
+ final_chunk = await self._achat_stream_with_aggregation(
695
+ messages, stop, run_manager, verbose=self.verbose, **kwargs
696
+ )
697
+ generation_info = final_chunk.generation_info
698
+ chat_generation = ChatGeneration(
699
+ message=AIMessage(
700
+ content=final_chunk.text,
701
+ usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
702
+ tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
703
+ ),
704
+ generation_info=generation_info,
705
+ )
706
+ return ChatResult(generations=[chat_generation])
707
+
708
+ @property
709
+ def _llm_type(self) -> str:
710
+ """Return type of chat model."""
711
+ return "chat-ollama"
712
+
713
+ def bind_tools(
714
+ self,
715
+ tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
716
+ **kwargs: Any,
717
+ ) -> Runnable[LanguageModelInput, BaseMessage]:
718
+ formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
719
+ return super().bind(tools=formatted_tools, **kwargs)
@@ -0,0 +1,51 @@
1
+ from typing import List
2
+
3
+ import ollama
4
+ from langchain_core.embeddings import Embeddings
5
+ from langchain_core.pydantic_v1 import BaseModel, Extra
6
+ from ollama import AsyncClient
7
+
8
+
9
+ class OllamaEmbeddings(BaseModel, Embeddings):
10
+ """OllamaEmbeddings embedding model.
11
+
12
+ Example:
13
+ .. code-block:: python
14
+
15
+ from langchain_ollama import OllamaEmbeddings
16
+
17
+ model = OllamaEmbeddings(model="llama3")
18
+ embedder.embed_query("what is the place that jonathan worked at?")
19
+ """
20
+
21
+ model: str
22
+ """Model name to use."""
23
+
24
+ class Config:
25
+ """Configuration for this pydantic object."""
26
+
27
+ extra = Extra.forbid
28
+
29
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
30
+ """Embed search docs."""
31
+ embedded_docs = []
32
+ for doc in texts:
33
+ embedded_docs.append(list(ollama.embeddings(self.model, doc)["embedding"]))
34
+ return embedded_docs
35
+
36
+ def embed_query(self, text: str) -> List[float]:
37
+ """Embed query text."""
38
+ return self.embed_documents([text])[0]
39
+
40
+ async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
41
+ """Embed search docs."""
42
+ embedded_docs = []
43
+ for doc in texts:
44
+ embedded_docs.append(
45
+ list((await AsyncClient().embeddings(self.model, doc))["embedding"])
46
+ )
47
+ return embedded_docs
48
+
49
+ async def aembed_query(self, text: str) -> List[float]:
50
+ """Embed query text."""
51
+ return (await self.aembed_documents([text]))[0]
@@ -0,0 +1,347 @@
1
+ """Ollama large language models."""
2
+
3
+ from typing import (
4
+ Any,
5
+ AsyncIterator,
6
+ Dict,
7
+ Iterator,
8
+ List,
9
+ Literal,
10
+ Mapping,
11
+ Optional,
12
+ Union,
13
+ )
14
+
15
+ import ollama
16
+ from langchain_core.callbacks import (
17
+ AsyncCallbackManagerForLLMRun,
18
+ CallbackManagerForLLMRun,
19
+ )
20
+ from langchain_core.language_models import BaseLLM
21
+ from langchain_core.outputs import GenerationChunk, LLMResult
22
+ from ollama import AsyncClient, Options
23
+
24
+
25
+ class OllamaLLM(BaseLLM):
26
+ """OllamaLLM large language models.
27
+
28
+ Example:
29
+ .. code-block:: python
30
+
31
+ from langchain_ollama import OllamaLLM
32
+
33
+ model = OllamaLLM(model="llama3")
34
+ model.invoke("Come up with 10 names for a song about parrots")
35
+ """
36
+
37
+ model: str
38
+ """Model name to use."""
39
+
40
+ mirostat: Optional[int] = None
41
+ """Enable Mirostat sampling for controlling perplexity.
42
+ (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
43
+
44
+ mirostat_eta: Optional[float] = None
45
+ """Influences how quickly the algorithm responds to feedback
46
+ from the generated text. A lower learning rate will result in
47
+ slower adjustments, while a higher learning rate will make
48
+ the algorithm more responsive. (Default: 0.1)"""
49
+
50
+ mirostat_tau: Optional[float] = None
51
+ """Controls the balance between coherence and diversity
52
+ of the output. A lower value will result in more focused and
53
+ coherent text. (Default: 5.0)"""
54
+
55
+ num_ctx: Optional[int] = None
56
+ """Sets the size of the context window used to generate the
57
+ next token. (Default: 2048) """
58
+
59
+ num_gpu: Optional[int] = None
60
+ """The number of GPUs to use. On macOS it defaults to 1 to
61
+ enable metal support, 0 to disable."""
62
+
63
+ num_thread: Optional[int] = None
64
+ """Sets the number of threads to use during computation.
65
+ By default, Ollama will detect this for optimal performance.
66
+ It is recommended to set this value to the number of physical
67
+ CPU cores your system has (as opposed to the logical number of cores)."""
68
+
69
+ num_predict: Optional[int] = None
70
+ """Maximum number of tokens to predict when generating text.
71
+ (Default: 128, -1 = infinite generation, -2 = fill context)"""
72
+
73
+ repeat_last_n: Optional[int] = None
74
+ """Sets how far back for the model to look back to prevent
75
+ repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
76
+
77
+ repeat_penalty: Optional[float] = None
78
+ """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
79
+ will penalize repetitions more strongly, while a lower value (e.g., 0.9)
80
+ will be more lenient. (Default: 1.1)"""
81
+
82
+ temperature: Optional[float] = None
83
+ """The temperature of the model. Increasing the temperature will
84
+ make the model answer more creatively. (Default: 0.8)"""
85
+
86
+ stop: Optional[List[str]] = None
87
+ """Sets the stop tokens to use."""
88
+
89
+ tfs_z: Optional[float] = None
90
+ """Tail free sampling is used to reduce the impact of less probable
91
+ tokens from the output. A higher value (e.g., 2.0) will reduce the
92
+ impact more, while a value of 1.0 disables this setting. (default: 1)"""
93
+
94
+ top_k: Optional[int] = None
95
+ """Reduces the probability of generating nonsense. A higher value (e.g. 100)
96
+ will give more diverse answers, while a lower value (e.g. 10)
97
+ will be more conservative. (Default: 40)"""
98
+
99
+ top_p: Optional[float] = None
100
+ """Works together with top-k. A higher value (e.g., 0.95) will lead
101
+ to more diverse text, while a lower value (e.g., 0.5) will
102
+ generate more focused and conservative text. (Default: 0.9)"""
103
+
104
+ format: Literal["", "json"] = ""
105
+ """Specify the format of the output (options: json)"""
106
+
107
+ keep_alive: Optional[Union[int, str]] = None
108
+ """How long the model will stay loaded into memory."""
109
+
110
+ @property
111
+ def _default_params(self) -> Dict[str, Any]:
112
+ """Get the default parameters for calling Ollama."""
113
+ return {
114
+ "model": self.model,
115
+ "format": self.format,
116
+ "options": {
117
+ "mirostat": self.mirostat,
118
+ "mirostat_eta": self.mirostat_eta,
119
+ "mirostat_tau": self.mirostat_tau,
120
+ "num_ctx": self.num_ctx,
121
+ "num_gpu": self.num_gpu,
122
+ "num_thread": self.num_thread,
123
+ "num_predict": self.num_predict,
124
+ "repeat_last_n": self.repeat_last_n,
125
+ "repeat_penalty": self.repeat_penalty,
126
+ "temperature": self.temperature,
127
+ "stop": self.stop,
128
+ "tfs_z": self.tfs_z,
129
+ "top_k": self.top_k,
130
+ "top_p": self.top_p,
131
+ },
132
+ "keep_alive": self.keep_alive,
133
+ }
134
+
135
+ @property
136
+ def _llm_type(self) -> str:
137
+ """Return type of LLM."""
138
+ return "ollama-llm"
139
+
140
+ async def _acreate_generate_stream(
141
+ self,
142
+ prompt: str,
143
+ stop: Optional[List[str]] = None,
144
+ **kwargs: Any,
145
+ ) -> AsyncIterator[Union[Mapping[str, Any], str]]:
146
+ if self.stop is not None and stop is not None:
147
+ raise ValueError("`stop` found in both the input and default params.")
148
+ elif self.stop is not None:
149
+ stop = self.stop
150
+
151
+ params = self._default_params
152
+
153
+ for key in self._default_params:
154
+ if key in kwargs:
155
+ params[key] = kwargs[key]
156
+
157
+ params["options"]["stop"] = stop
158
+ async for part in await AsyncClient().generate(
159
+ model=params["model"],
160
+ prompt=prompt,
161
+ stream=True,
162
+ options=Options(**params["options"]),
163
+ keep_alive=params["keep_alive"],
164
+ format=params["format"],
165
+ ): # type: ignore
166
+ yield part
167
+
168
+ def _create_generate_stream(
169
+ self,
170
+ prompt: str,
171
+ stop: Optional[List[str]] = None,
172
+ **kwargs: Any,
173
+ ) -> Iterator[Union[Mapping[str, Any], str]]:
174
+ if self.stop is not None and stop is not None:
175
+ raise ValueError("`stop` found in both the input and default params.")
176
+ elif self.stop is not None:
177
+ stop = self.stop
178
+
179
+ params = self._default_params
180
+
181
+ for key in self._default_params:
182
+ if key in kwargs:
183
+ params[key] = kwargs[key]
184
+
185
+ params["options"]["stop"] = stop
186
+ yield from ollama.generate(
187
+ model=params["model"],
188
+ prompt=prompt,
189
+ stream=True,
190
+ options=Options(**params["options"]),
191
+ keep_alive=params["keep_alive"],
192
+ format=params["format"],
193
+ )
194
+
195
+ async def _astream_with_aggregation(
196
+ self,
197
+ prompt: str,
198
+ stop: Optional[List[str]] = None,
199
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
200
+ verbose: bool = False,
201
+ **kwargs: Any,
202
+ ) -> GenerationChunk:
203
+ final_chunk = None
204
+ async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
205
+ if not isinstance(stream_resp, str):
206
+ chunk = GenerationChunk(
207
+ text=stream_resp["response"] if "response" in stream_resp else "",
208
+ generation_info=(
209
+ dict(stream_resp) if stream_resp.get("done") is True else None
210
+ ),
211
+ )
212
+ if final_chunk is None:
213
+ final_chunk = chunk
214
+ else:
215
+ final_chunk += chunk
216
+ if run_manager:
217
+ await run_manager.on_llm_new_token(
218
+ chunk.text,
219
+ chunk=chunk,
220
+ verbose=verbose,
221
+ )
222
+ if final_chunk is None:
223
+ raise ValueError("No data received from Ollama stream.")
224
+
225
+ return final_chunk
226
+
227
+ def _stream_with_aggregation(
228
+ self,
229
+ prompt: str,
230
+ stop: Optional[List[str]] = None,
231
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
232
+ verbose: bool = False,
233
+ **kwargs: Any,
234
+ ) -> GenerationChunk:
235
+ final_chunk = None
236
+ for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
237
+ if not isinstance(stream_resp, str):
238
+ chunk = GenerationChunk(
239
+ text=stream_resp["response"] if "response" in stream_resp else "",
240
+ generation_info=(
241
+ dict(stream_resp) if stream_resp.get("done") is True else None
242
+ ),
243
+ )
244
+ if final_chunk is None:
245
+ final_chunk = chunk
246
+ else:
247
+ final_chunk += chunk
248
+ if run_manager:
249
+ run_manager.on_llm_new_token(
250
+ chunk.text,
251
+ chunk=chunk,
252
+ verbose=verbose,
253
+ )
254
+ if final_chunk is None:
255
+ raise ValueError("No data received from Ollama stream.")
256
+
257
+ return final_chunk
258
+
259
+ def _generate(
260
+ self,
261
+ prompts: List[str],
262
+ stop: Optional[List[str]] = None,
263
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
264
+ **kwargs: Any,
265
+ ) -> LLMResult:
266
+ generations = []
267
+ for prompt in prompts:
268
+ final_chunk = self._stream_with_aggregation(
269
+ prompt,
270
+ stop=stop,
271
+ run_manager=run_manager,
272
+ verbose=self.verbose,
273
+ **kwargs,
274
+ )
275
+ generations.append([final_chunk])
276
+ return LLMResult(generations=generations) # type: ignore[arg-type]
277
+
278
+ async def _agenerate(
279
+ self,
280
+ prompts: List[str],
281
+ stop: Optional[List[str]] = None,
282
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
283
+ **kwargs: Any,
284
+ ) -> LLMResult:
285
+ generations = []
286
+ for prompt in prompts:
287
+ final_chunk = await self._astream_with_aggregation(
288
+ prompt,
289
+ stop=stop,
290
+ run_manager=run_manager,
291
+ verbose=self.verbose,
292
+ **kwargs,
293
+ )
294
+ generations.append([final_chunk])
295
+ return LLMResult(generations=generations) # type: ignore[arg-type]
296
+
297
+ def _stream(
298
+ self,
299
+ prompt: str,
300
+ stop: Optional[List[str]] = None,
301
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
302
+ **kwargs: Any,
303
+ ) -> Iterator[GenerationChunk]:
304
+ for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
305
+ if not isinstance(stream_resp, str):
306
+ chunk = GenerationChunk(
307
+ text=(
308
+ stream_resp["message"]["content"]
309
+ if "message" in stream_resp
310
+ else ""
311
+ ),
312
+ generation_info=(
313
+ dict(stream_resp) if stream_resp.get("done") is True else None
314
+ ),
315
+ )
316
+ if run_manager:
317
+ run_manager.on_llm_new_token(
318
+ chunk.text,
319
+ verbose=self.verbose,
320
+ )
321
+ yield chunk
322
+
323
+ async def _astream(
324
+ self,
325
+ prompt: str,
326
+ stop: Optional[List[str]] = None,
327
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
328
+ **kwargs: Any,
329
+ ) -> AsyncIterator[GenerationChunk]:
330
+ async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
331
+ if not isinstance(stream_resp, str):
332
+ chunk = GenerationChunk(
333
+ text=(
334
+ stream_resp["message"]["content"]
335
+ if "message" in stream_resp
336
+ else ""
337
+ ),
338
+ generation_info=(
339
+ dict(stream_resp) if stream_resp.get("done") is True else None
340
+ ),
341
+ )
342
+ if run_manager:
343
+ await run_manager.on_llm_new_token(
344
+ chunk.text,
345
+ verbose=self.verbose,
346
+ )
347
+ yield chunk
File without changes
@@ -0,0 +1,90 @@
1
+ [tool.poetry]
2
+ name = "langchain-ollama"
3
+ version = "0.1.0"
4
+ description = "An integration package connecting Ollama and LangChain"
5
+ authors = []
6
+ readme = "README.md"
7
+ repository = "https://github.com/langchain-ai/langchain"
8
+ license = "MIT"
9
+
10
+ [tool.poetry.urls]
11
+ "Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama"
12
+
13
+ [tool.poetry.dependencies]
14
+ python = ">=3.8.1,<4.0"
15
+ ollama = ">=0.3.0,<1"
16
+ langchain-core = "^0.2.20"
17
+
18
+ [tool.poetry.group.test]
19
+ optional = true
20
+
21
+ [tool.poetry.group.test.dependencies]
22
+ pytest = "^7.4.3"
23
+ pytest-asyncio = "^0.23.2"
24
+ syrupy = "^4.0.2"
25
+ pytest-socket = "^0.7.0"
26
+ langchain-core = { path = "../../core", develop = true }
27
+ langchain-standard-tests = { path = "../../standard-tests", develop = true }
28
+
29
+ [tool.poetry.group.codespell]
30
+ optional = true
31
+
32
+ [tool.poetry.group.codespell.dependencies]
33
+ codespell = "^2.2.6"
34
+
35
+ [tool.poetry.group.test_integration]
36
+ optional = true
37
+
38
+ [tool.poetry.group.test_integration.dependencies]
39
+
40
+ [tool.poetry.group.lint]
41
+ optional = true
42
+
43
+ [tool.poetry.group.lint.dependencies]
44
+ ruff = "^0.1.8"
45
+
46
+ [tool.poetry.group.typing.dependencies]
47
+ mypy = "^1.7.1"
48
+ langchain-core = { path = "../../core", develop = true }
49
+
50
+ [tool.poetry.group.dev]
51
+ optional = true
52
+
53
+ [tool.poetry.group.dev.dependencies]
54
+ langchain-core = { path = "../../core", develop = true }
55
+
56
+ [tool.ruff.lint]
57
+ select = [
58
+ "E", # pycodestyle
59
+ "F", # pyflakes
60
+ "I", # isort
61
+ "T201", # print
62
+ ]
63
+
64
+ [tool.mypy]
65
+ disallow_untyped_defs = "True"
66
+
67
+ [tool.coverage.run]
68
+ omit = ["tests/*"]
69
+
70
+ [build-system]
71
+ requires = ["poetry-core>=1.0.0"]
72
+ build-backend = "poetry.core.masonry.api"
73
+
74
+ [tool.pytest.ini_options]
75
+ # --strict-markers will raise errors on unknown marks.
76
+ # https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
77
+ #
78
+ # https://docs.pytest.org/en/7.1.x/reference/reference.html
79
+ # --strict-config any warnings encountered while parsing the `pytest`
80
+ # section of the configuration file raise errors.
81
+ #
82
+ # https://github.com/tophat/syrupy
83
+ # --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
84
+ addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
85
+ # Registering custom markers.
86
+ # https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
87
+ markers = [
88
+ "compile: mark placeholder test used to compile integration tests without running them",
89
+ ]
90
+ asyncio_mode = "auto"