langchain-ollama 0.1.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ from importlib import metadata
2
+
3
+ from langchain_ollama.chat_models import ChatOllama
4
+ from langchain_ollama.embeddings import OllamaEmbeddings
5
+ from langchain_ollama.llms import OllamaLLM
6
+
7
+ try:
8
+ __version__ = metadata.version(__package__)
9
+ except metadata.PackageNotFoundError:
10
+ # Case where package metadata is not available.
11
+ __version__ = ""
12
+ del metadata # optional, avoids polluting the results of dir(__package__)
13
+
14
+ __all__ = [
15
+ "ChatOllama",
16
+ "OllamaLLM",
17
+ "OllamaEmbeddings",
18
+ "__version__",
19
+ ]
@@ -0,0 +1,693 @@
1
+ """Ollama chat models."""
2
+
3
+ from typing import (
4
+ Any,
5
+ AsyncIterator,
6
+ Callable,
7
+ Dict,
8
+ Iterator,
9
+ List,
10
+ Literal,
11
+ Mapping,
12
+ Optional,
13
+ Sequence,
14
+ Type,
15
+ Union,
16
+ cast,
17
+ )
18
+ from uuid import uuid4
19
+
20
+ import ollama
21
+ from langchain_core.callbacks import (
22
+ CallbackManagerForLLMRun,
23
+ )
24
+ from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun
25
+ from langchain_core.language_models import LanguageModelInput
26
+ from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
27
+ from langchain_core.messages import (
28
+ AIMessage,
29
+ AIMessageChunk,
30
+ BaseMessage,
31
+ HumanMessage,
32
+ SystemMessage,
33
+ ToolCall,
34
+ ToolMessage,
35
+ )
36
+ from langchain_core.messages.ai import UsageMetadata
37
+ from langchain_core.messages.tool import tool_call
38
+ from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
39
+ from langchain_core.pydantic_v1 import BaseModel
40
+ from langchain_core.runnables import Runnable
41
+ from langchain_core.tools import BaseTool
42
+ from langchain_core.utils.function_calling import convert_to_openai_tool
43
+ from ollama import AsyncClient, Message, Options
44
+
45
+
46
+ def _get_usage_metadata_from_generation_info(
47
+ generation_info: Optional[Mapping[str, Any]],
48
+ ) -> Optional[UsageMetadata]:
49
+ """Get usage metadata from ollama generation info mapping."""
50
+ if generation_info is None:
51
+ return None
52
+ input_tokens: Optional[int] = generation_info.get("prompt_eval_count")
53
+ output_tokens: Optional[int] = generation_info.get("eval_count")
54
+ if input_tokens is not None and output_tokens is not None:
55
+ return UsageMetadata(
56
+ input_tokens=input_tokens,
57
+ output_tokens=output_tokens,
58
+ total_tokens=input_tokens + output_tokens,
59
+ )
60
+ return None
61
+
62
+
63
+ def _get_tool_calls_from_response(
64
+ response: Mapping[str, Any],
65
+ ) -> List[ToolCall]:
66
+ """Get tool calls from ollama response."""
67
+ tool_calls = []
68
+ if "message" in response:
69
+ if "tool_calls" in response["message"]:
70
+ for tc in response["message"]["tool_calls"]:
71
+ tool_calls.append(
72
+ tool_call(
73
+ id=str(uuid4()),
74
+ name=tc["function"]["name"],
75
+ args=tc["function"]["arguments"],
76
+ )
77
+ )
78
+ return tool_calls
79
+
80
+
81
+ def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
82
+ return {
83
+ "type": "function",
84
+ "id": tool_call["id"],
85
+ "function": {
86
+ "name": tool_call["name"],
87
+ "arguments": tool_call["args"],
88
+ },
89
+ }
90
+
91
+
92
+ class ChatOllama(BaseChatModel):
93
+ """Ollama chat model integration.
94
+
95
+ Setup:
96
+ Install ``langchain-ollama`` and download any models you want to use from ollama.
97
+
98
+ .. code-block:: bash
99
+
100
+ ollama pull mistral:v0.3
101
+ pip install -U langchain-ollama
102
+
103
+ Key init args — completion params:
104
+ model: str
105
+ Name of Ollama model to use.
106
+ temperature: float
107
+ Sampling temperature. Ranges from 0.0 to 1.0.
108
+ num_predict: Optional[int]
109
+ Max number of tokens to generate.
110
+
111
+ See full list of supported init args and their descriptions in the params section.
112
+
113
+ Instantiate:
114
+ .. code-block:: python
115
+
116
+ from langchain_ollama import ChatOllama
117
+
118
+ llm = ChatOllama(
119
+ model = "llama3",
120
+ temperature = 0.8,
121
+ num_predict = 256,
122
+ # other params ...
123
+ )
124
+
125
+ Invoke:
126
+ .. code-block:: python
127
+
128
+ messages = [
129
+ ("system", "You are a helpful translator. Translate the user sentence to French."),
130
+ ("human", "I love programming."),
131
+ ]
132
+ llm.invoke(messages)
133
+
134
+ .. code-block:: python
135
+
136
+ AIMessage(content='J'adore le programmation. (Note: "programming" can also refer to the act of writing code, so if you meant that, I could translate it as "J'adore programmer". But since you didn\'t specify, I assumed you were talking about the activity itself, which is what "le programmation" usually refers to.)', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:37:50.182604Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 3576619666, 'load_duration': 788524916, 'prompt_eval_count': 32, 'prompt_eval_duration': 128125000, 'eval_count': 71, 'eval_duration': 2656556000}, id='run-ba48f958-6402-41a5-b461-5e250a4ebd36-0')
137
+
138
+ Stream:
139
+ .. code-block:: python
140
+
141
+ messages = [
142
+ ("human", "Return the words Hello World!"),
143
+ ]
144
+ for chunk in llm.stream(messages):
145
+ print(chunk)
146
+
147
+
148
+ .. code-block:: python
149
+
150
+ content='Hello' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
151
+ content=' World' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
152
+ content='!' id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
153
+ content='' response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:39:42.274449Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 411875125, 'load_duration': 1898166, 'prompt_eval_count': 14, 'prompt_eval_duration': 297320000, 'eval_count': 4, 'eval_duration': 111099000} id='run-327ff5ad-45c8-49fe-965c-0a93982e9be1'
154
+
155
+
156
+ .. code-block:: python
157
+
158
+ stream = llm.stream(messages)
159
+ full = next(stream)
160
+ for chunk in stream:
161
+ full += chunk
162
+ full
163
+
164
+ .. code-block:: python
165
+
166
+ AIMessageChunk(content='Je adore le programmation.(Note: "programmation" is the formal way to say "programming" in French, but informally, people might use the phrase "le développement logiciel" or simply "le code")', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:38:54.933154Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1977300042, 'load_duration': 1345709, 'prompt_eval_duration': 159343000, 'eval_count': 47, 'eval_duration': 1815123000}, id='run-3c81a3ed-3e79-4dd3-a796-04064d804890')
167
+
168
+ Async:
169
+ .. code-block:: python
170
+
171
+ messages = [
172
+ ("human", "Hello how are you!"),
173
+ ]
174
+ await llm.ainvoke(messages)
175
+
176
+ .. code-block:: python
177
+
178
+ AIMessage(content="Hi there! I'm just an AI, so I don't have feelings or emotions like humans do. But I'm functioning properly and ready to help with any questions or tasks you may have! How can I assist you today?", response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:52:08.165478Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 2138492875, 'load_duration': 1364000, 'prompt_eval_count': 10, 'prompt_eval_duration': 297081000, 'eval_count': 47, 'eval_duration': 1838524000}, id='run-29c510ae-49a4-4cdd-8f23-b972bfab1c49-0')
179
+
180
+ .. code-block:: python
181
+
182
+ messages = [
183
+ ("human", "Say hello world!"),
184
+ ]
185
+ async for chunk in llm.astream(messages):
186
+ print(chunk.content)
187
+
188
+ .. code-block:: python
189
+
190
+ HEL
191
+ LO
192
+ WORLD
193
+ !
194
+
195
+ .. code-block:: python
196
+
197
+ messages = [
198
+ ("human", "Say hello world!"),
199
+ ("human","Say goodbye world!")
200
+ ]
201
+ await llm.abatch(messages)
202
+
203
+ .. code-block:: python
204
+
205
+ [AIMessage(content='HELLO, WORLD!', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:55:07.315396Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1696745458, 'load_duration': 1505000, 'prompt_eval_count': 8, 'prompt_eval_duration': 111627000, 'eval_count': 6, 'eval_duration': 185181000}, id='run-da6c7562-e25a-4a44-987a-2c83cd8c2686-0'),
206
+ AIMessage(content="It's been a blast chatting with you! Say goodbye to the world for me, and don't forget to come back and visit us again soon!", response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:55:07.018076Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1399391083, 'load_duration': 1187417, 'prompt_eval_count': 20, 'prompt_eval_duration': 230349000, 'eval_count': 31, 'eval_duration': 1166047000}, id='run-96cad530-6f3e-4cf9-86b4-e0f8abba4cdb-0')]
207
+
208
+ JSON mode:
209
+ .. code-block:: python
210
+
211
+
212
+ json_llm = ChatOllama(format="json")
213
+ messages = [
214
+ ("human", "Return a query for the weather in a random location and time of day with two keys: location and time_of_day. Respond using JSON only."),
215
+ ]
216
+ llm.invoke(messages).content
217
+
218
+ .. code-block:: python
219
+
220
+ '{"location": "Pune, India", "time_of_day": "morning"}'
221
+ """ # noqa: E501
222
+
223
+ model: str = "llama2"
224
+ """Model name to use."""
225
+
226
+ mirostat: Optional[int] = None
227
+ """Enable Mirostat sampling for controlling perplexity.
228
+ (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
229
+
230
+ mirostat_eta: Optional[float] = None
231
+ """Influences how quickly the algorithm responds to feedback
232
+ from the generated text. A lower learning rate will result in
233
+ slower adjustments, while a higher learning rate will make
234
+ the algorithm more responsive. (Default: 0.1)"""
235
+
236
+ mirostat_tau: Optional[float] = None
237
+ """Controls the balance between coherence and diversity
238
+ of the output. A lower value will result in more focused and
239
+ coherent text. (Default: 5.0)"""
240
+
241
+ num_ctx: Optional[int] = None
242
+ """Sets the size of the context window used to generate the
243
+ next token. (Default: 2048) """
244
+
245
+ num_gpu: Optional[int] = None
246
+ """The number of GPUs to use. On macOS it defaults to 1 to
247
+ enable metal support, 0 to disable."""
248
+
249
+ num_thread: Optional[int] = None
250
+ """Sets the number of threads to use during computation.
251
+ By default, Ollama will detect this for optimal performance.
252
+ It is recommended to set this value to the number of physical
253
+ CPU cores your system has (as opposed to the logical number of cores)."""
254
+
255
+ num_predict: Optional[int] = None
256
+ """Maximum number of tokens to predict when generating text.
257
+ (Default: 128, -1 = infinite generation, -2 = fill context)"""
258
+
259
+ repeat_last_n: Optional[int] = None
260
+ """Sets how far back for the model to look back to prevent
261
+ repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
262
+
263
+ repeat_penalty: Optional[float] = None
264
+ """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
265
+ will penalize repetitions more strongly, while a lower value (e.g., 0.9)
266
+ will be more lenient. (Default: 1.1)"""
267
+
268
+ temperature: Optional[float] = None
269
+ """The temperature of the model. Increasing the temperature will
270
+ make the model answer more creatively. (Default: 0.8)"""
271
+
272
+ stop: Optional[List[str]] = None
273
+ """Sets the stop tokens to use."""
274
+
275
+ tfs_z: Optional[float] = None
276
+ """Tail free sampling is used to reduce the impact of less probable
277
+ tokens from the output. A higher value (e.g., 2.0) will reduce the
278
+ impact more, while a value of 1.0 disables this setting. (default: 1)"""
279
+
280
+ top_k: Optional[int] = None
281
+ """Reduces the probability of generating nonsense. A higher value (e.g. 100)
282
+ will give more diverse answers, while a lower value (e.g. 10)
283
+ will be more conservative. (Default: 40)"""
284
+
285
+ top_p: Optional[float] = None
286
+ """Works together with top-k. A higher value (e.g., 0.95) will lead
287
+ to more diverse text, while a lower value (e.g., 0.5) will
288
+ generate more focused and conservative text. (Default: 0.9)"""
289
+
290
+ format: Literal["", "json"] = ""
291
+ """Specify the format of the output (options: json)"""
292
+
293
+ keep_alive: Optional[Union[int, str]] = None
294
+ """How long the model will stay loaded into memory."""
295
+
296
+ @property
297
+ def _default_params(self) -> Dict[str, Any]:
298
+ """Get the default parameters for calling Ollama."""
299
+ return {
300
+ "model": self.model,
301
+ "format": self.format,
302
+ "options": {
303
+ "mirostat": self.mirostat,
304
+ "mirostat_eta": self.mirostat_eta,
305
+ "mirostat_tau": self.mirostat_tau,
306
+ "num_ctx": self.num_ctx,
307
+ "num_gpu": self.num_gpu,
308
+ "num_thread": self.num_thread,
309
+ "num_predict": self.num_predict,
310
+ "repeat_last_n": self.repeat_last_n,
311
+ "repeat_penalty": self.repeat_penalty,
312
+ "temperature": self.temperature,
313
+ "stop": self.stop,
314
+ "tfs_z": self.tfs_z,
315
+ "top_k": self.top_k,
316
+ "top_p": self.top_p,
317
+ },
318
+ "keep_alive": self.keep_alive,
319
+ }
320
+
321
+ def _convert_messages_to_ollama_messages(
322
+ self, messages: List[BaseMessage]
323
+ ) -> Sequence[Message]:
324
+ ollama_messages: List = []
325
+ for message in messages:
326
+ role = ""
327
+ tool_call_id: Optional[str] = None
328
+ tool_calls: Optional[List[Dict[str, Any]]] = None
329
+ if isinstance(message, HumanMessage):
330
+ role = "user"
331
+ elif isinstance(message, AIMessage):
332
+ role = "assistant"
333
+ tool_calls = (
334
+ [
335
+ _lc_tool_call_to_openai_tool_call(tool_call)
336
+ for tool_call in message.tool_calls
337
+ ]
338
+ if message.tool_calls
339
+ else None
340
+ )
341
+ elif isinstance(message, SystemMessage):
342
+ role = "system"
343
+ elif isinstance(message, ToolMessage):
344
+ role = "tool"
345
+ tool_call_id = message.tool_call_id
346
+ else:
347
+ raise ValueError("Received unsupported message type for Ollama.")
348
+
349
+ content = ""
350
+ images = []
351
+ if isinstance(message.content, str):
352
+ content = message.content
353
+ else:
354
+ for content_part in cast(List[Dict], message.content):
355
+ if content_part.get("type") == "text":
356
+ content += f"\n{content_part['text']}"
357
+ elif content_part.get("type") == "tool_use":
358
+ continue
359
+ elif content_part.get("type") == "image_url":
360
+ image_url = None
361
+ temp_image_url = content_part.get("image_url")
362
+ if isinstance(temp_image_url, str):
363
+ image_url = content_part["image_url"]
364
+ elif (
365
+ isinstance(temp_image_url, dict) and "url" in temp_image_url
366
+ ):
367
+ image_url = temp_image_url
368
+ else:
369
+ raise ValueError(
370
+ "Only string image_url or dict with string 'url' "
371
+ "inside content parts are supported."
372
+ )
373
+
374
+ image_url_components = image_url.split(",")
375
+ # Support data:image/jpeg;base64,<image> format
376
+ # and base64 strings
377
+ if len(image_url_components) > 1:
378
+ images.append(image_url_components[1])
379
+ else:
380
+ images.append(image_url_components[0])
381
+
382
+ else:
383
+ raise ValueError(
384
+ "Unsupported message content type. "
385
+ "Must either have type 'text' or type 'image_url' "
386
+ "with a string 'image_url' field."
387
+ )
388
+ msg = {
389
+ "role": role,
390
+ "content": content,
391
+ "images": images,
392
+ }
393
+ if tool_call_id:
394
+ msg["tool_call_id"] = tool_call_id
395
+ if tool_calls:
396
+ msg["tool_calls"] = tool_calls
397
+ ollama_messages.append(msg)
398
+
399
+ return ollama_messages
400
+
401
+ async def _acreate_chat_stream(
402
+ self,
403
+ messages: List[BaseMessage],
404
+ stop: Optional[List[str]] = None,
405
+ **kwargs: Any,
406
+ ) -> AsyncIterator[Union[Mapping[str, Any], str]]:
407
+ ollama_messages = self._convert_messages_to_ollama_messages(messages)
408
+
409
+ stop = stop if stop is not None else self.stop
410
+
411
+ params = self._default_params
412
+
413
+ for key in self._default_params:
414
+ if key in kwargs:
415
+ params[key] = kwargs[key]
416
+
417
+ params["options"]["stop"] = stop
418
+ async for part in await AsyncClient().chat(
419
+ model=params["model"],
420
+ messages=ollama_messages,
421
+ stream=True,
422
+ options=Options(**params["options"]),
423
+ keep_alive=params["keep_alive"],
424
+ format=params["format"],
425
+ ): # type:ignore
426
+ yield part
427
+
428
+ def _create_chat_stream(
429
+ self,
430
+ messages: List[BaseMessage],
431
+ stop: Optional[List[str]] = None,
432
+ **kwargs: Any,
433
+ ) -> Iterator[Union[Mapping[str, Any], str]]:
434
+ ollama_messages = self._convert_messages_to_ollama_messages(messages)
435
+
436
+ stop = stop if stop is not None else self.stop
437
+
438
+ params = self._default_params
439
+
440
+ for key in self._default_params:
441
+ if key in kwargs:
442
+ params[key] = kwargs[key]
443
+
444
+ params["options"]["stop"] = stop
445
+ if "tools" in kwargs:
446
+ # tools not supported by sdk yet.
447
+ req = {
448
+ "model": params["model"],
449
+ "messages": ollama_messages,
450
+ "stream": False,
451
+ "format": params["format"],
452
+ "options": Options(**params["options"]),
453
+ "keep_alive": params["keep_alive"],
454
+ "tools": kwargs["tools"],
455
+ }
456
+ it = ollama._client._request_stream(
457
+ "POST",
458
+ "/api/chat",
459
+ json=req,
460
+ stream=False,
461
+ )
462
+ yield cast(Mapping[str, Any], it)
463
+ else:
464
+ yield from ollama.chat(
465
+ model=params["model"],
466
+ messages=ollama_messages,
467
+ stream=True,
468
+ options=Options(**params["options"]),
469
+ keep_alive=params["keep_alive"],
470
+ format=params["format"],
471
+ )
472
+
473
+ def _chat_stream_with_aggregation(
474
+ self,
475
+ messages: List[BaseMessage],
476
+ stop: Optional[List[str]] = None,
477
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
478
+ verbose: bool = False,
479
+ **kwargs: Any,
480
+ ) -> ChatGenerationChunk:
481
+ final_chunk = None
482
+ for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
483
+ if not isinstance(stream_resp, str):
484
+ chunk = ChatGenerationChunk(
485
+ message=AIMessageChunk(
486
+ content=(
487
+ stream_resp["message"]["content"]
488
+ if "message" in stream_resp
489
+ and "content" in stream_resp["message"]
490
+ else ""
491
+ ),
492
+ usage_metadata=_get_usage_metadata_from_generation_info(
493
+ stream_resp
494
+ ),
495
+ tool_calls=_get_tool_calls_from_response(stream_resp),
496
+ ),
497
+ generation_info=(
498
+ dict(stream_resp) if stream_resp.get("done") is True else None
499
+ ),
500
+ )
501
+ if final_chunk is None:
502
+ final_chunk = chunk
503
+ else:
504
+ final_chunk += chunk
505
+ if run_manager:
506
+ run_manager.on_llm_new_token(
507
+ chunk.text,
508
+ chunk=chunk,
509
+ verbose=verbose,
510
+ )
511
+ if final_chunk is None:
512
+ raise ValueError("No data received from Ollama stream.")
513
+
514
+ return final_chunk
515
+
516
+ async def _achat_stream_with_aggregation(
517
+ self,
518
+ messages: List[BaseMessage],
519
+ stop: Optional[List[str]] = None,
520
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
521
+ verbose: bool = False,
522
+ **kwargs: Any,
523
+ ) -> ChatGenerationChunk:
524
+ final_chunk = None
525
+ async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
526
+ if not isinstance(stream_resp, str):
527
+ chunk = ChatGenerationChunk(
528
+ message=AIMessageChunk(
529
+ content=(
530
+ stream_resp["message"]["content"]
531
+ if "message" in stream_resp
532
+ and "content" in stream_resp["message"]
533
+ else ""
534
+ ),
535
+ usage_metadata=_get_usage_metadata_from_generation_info(
536
+ stream_resp
537
+ ),
538
+ tool_calls=_get_tool_calls_from_response(stream_resp),
539
+ ),
540
+ generation_info=(
541
+ dict(stream_resp) if stream_resp.get("done") is True else None
542
+ ),
543
+ )
544
+ if final_chunk is None:
545
+ final_chunk = chunk
546
+ else:
547
+ final_chunk += chunk
548
+ if run_manager:
549
+ await run_manager.on_llm_new_token(
550
+ chunk.text,
551
+ chunk=chunk,
552
+ verbose=verbose,
553
+ )
554
+ if final_chunk is None:
555
+ raise ValueError("No data received from Ollama stream.")
556
+
557
+ return final_chunk
558
+
559
+ def _get_ls_params(
560
+ self, stop: Optional[List[str]] = None, **kwargs: Any
561
+ ) -> LangSmithParams:
562
+ """Get standard params for tracing."""
563
+ params = self._get_invocation_params(stop=stop, **kwargs)
564
+ ls_params = LangSmithParams(
565
+ ls_provider="ollama",
566
+ ls_model_name=self.model,
567
+ ls_model_type="chat",
568
+ ls_temperature=params.get("temperature", self.temperature),
569
+ )
570
+ if ls_stop := stop or params.get("stop", None) or self.stop:
571
+ ls_params["ls_stop"] = ls_stop
572
+ return ls_params
573
+
574
+ def _generate(
575
+ self,
576
+ messages: List[BaseMessage],
577
+ stop: Optional[List[str]] = None,
578
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
579
+ **kwargs: Any,
580
+ ) -> ChatResult:
581
+ final_chunk = self._chat_stream_with_aggregation(
582
+ messages, stop, run_manager, verbose=self.verbose, **kwargs
583
+ )
584
+ generation_info = final_chunk.generation_info
585
+ chat_generation = ChatGeneration(
586
+ message=AIMessage(
587
+ content=final_chunk.text,
588
+ usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
589
+ tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
590
+ ),
591
+ generation_info=generation_info,
592
+ )
593
+ return ChatResult(generations=[chat_generation])
594
+
595
+ def _stream(
596
+ self,
597
+ messages: List[BaseMessage],
598
+ stop: Optional[List[str]] = None,
599
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
600
+ **kwargs: Any,
601
+ ) -> Iterator[ChatGenerationChunk]:
602
+ for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
603
+ if not isinstance(stream_resp, str):
604
+ chunk = ChatGenerationChunk(
605
+ message=AIMessageChunk(
606
+ content=(
607
+ stream_resp["message"]["content"]
608
+ if "message" in stream_resp
609
+ and "content" in stream_resp["message"]
610
+ else ""
611
+ ),
612
+ usage_metadata=_get_usage_metadata_from_generation_info(
613
+ stream_resp
614
+ ),
615
+ tool_calls=_get_tool_calls_from_response(stream_resp),
616
+ ),
617
+ generation_info=(
618
+ dict(stream_resp) if stream_resp.get("done") is True else None
619
+ ),
620
+ )
621
+ if run_manager:
622
+ run_manager.on_llm_new_token(
623
+ chunk.text,
624
+ verbose=self.verbose,
625
+ )
626
+ yield chunk
627
+
628
+ async def _astream(
629
+ self,
630
+ messages: List[BaseMessage],
631
+ stop: Optional[List[str]] = None,
632
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
633
+ **kwargs: Any,
634
+ ) -> AsyncIterator[ChatGenerationChunk]:
635
+ async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
636
+ if not isinstance(stream_resp, str):
637
+ chunk = ChatGenerationChunk(
638
+ message=AIMessageChunk(
639
+ content=(
640
+ stream_resp["message"]["content"]
641
+ if "message" in stream_resp
642
+ and "content" in stream_resp["message"]
643
+ else ""
644
+ ),
645
+ usage_metadata=_get_usage_metadata_from_generation_info(
646
+ stream_resp
647
+ ),
648
+ tool_calls=_get_tool_calls_from_response(stream_resp),
649
+ ),
650
+ generation_info=(
651
+ dict(stream_resp) if stream_resp.get("done") is True else None
652
+ ),
653
+ )
654
+ if run_manager:
655
+ await run_manager.on_llm_new_token(
656
+ chunk.text,
657
+ verbose=self.verbose,
658
+ )
659
+ yield chunk
660
+
661
+ async def _agenerate(
662
+ self,
663
+ messages: List[BaseMessage],
664
+ stop: Optional[List[str]] = None,
665
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
666
+ **kwargs: Any,
667
+ ) -> ChatResult:
668
+ final_chunk = await self._achat_stream_with_aggregation(
669
+ messages, stop, run_manager, verbose=self.verbose, **kwargs
670
+ )
671
+ generation_info = final_chunk.generation_info
672
+ chat_generation = ChatGeneration(
673
+ message=AIMessage(
674
+ content=final_chunk.text,
675
+ usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
676
+ tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
677
+ ),
678
+ generation_info=generation_info,
679
+ )
680
+ return ChatResult(generations=[chat_generation])
681
+
682
+ @property
683
+ def _llm_type(self) -> str:
684
+ """Return type of chat model."""
685
+ return "chat-ollama"
686
+
687
+ def bind_tools(
688
+ self,
689
+ tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
690
+ **kwargs: Any,
691
+ ) -> Runnable[LanguageModelInput, BaseMessage]:
692
+ formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
693
+ return super().bind(tools=formatted_tools, **kwargs)
@@ -0,0 +1,51 @@
1
+ from typing import List
2
+
3
+ import ollama
4
+ from langchain_core.embeddings import Embeddings
5
+ from langchain_core.pydantic_v1 import BaseModel, Extra
6
+ from ollama import AsyncClient
7
+
8
+
9
+ class OllamaEmbeddings(BaseModel, Embeddings):
10
+ """OllamaEmbeddings embedding model.
11
+
12
+ Example:
13
+ .. code-block:: python
14
+
15
+ from langchain_ollama import OllamaEmbeddings
16
+
17
+ model = OllamaEmbeddings(model="llama3")
18
+ embedder.embed_query("what is the place that jonathan worked at?")
19
+ """
20
+
21
+ model: str = "llama2"
22
+ """Model name to use."""
23
+
24
+ class Config:
25
+ """Configuration for this pydantic object."""
26
+
27
+ extra = Extra.forbid
28
+
29
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
30
+ """Embed search docs."""
31
+ embedded_docs = []
32
+ for doc in texts:
33
+ embedded_docs.append(list(ollama.embeddings(self.model, doc)["embedding"]))
34
+ return embedded_docs
35
+
36
+ def embed_query(self, text: str) -> List[float]:
37
+ """Embed query text."""
38
+ return self.embed_documents([text])[0]
39
+
40
+ async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
41
+ """Embed search docs."""
42
+ embedded_docs = []
43
+ for doc in texts:
44
+ embedded_docs.append(
45
+ list((await AsyncClient().embeddings(self.model, doc))["embedding"])
46
+ )
47
+ return embedded_docs
48
+
49
+ async def aembed_query(self, text: str) -> List[float]:
50
+ """Embed query text."""
51
+ return (await self.aembed_documents([text]))[0]
@@ -0,0 +1,343 @@
1
+ """Ollama large language models."""
2
+
3
+ from typing import (
4
+ Any,
5
+ AsyncIterator,
6
+ Dict,
7
+ Iterator,
8
+ List,
9
+ Literal,
10
+ Mapping,
11
+ Optional,
12
+ Union,
13
+ )
14
+
15
+ import ollama
16
+ from langchain_core.callbacks import (
17
+ AsyncCallbackManagerForLLMRun,
18
+ CallbackManagerForLLMRun,
19
+ )
20
+ from langchain_core.language_models import BaseLLM
21
+ from langchain_core.outputs import GenerationChunk, LLMResult
22
+ from ollama import AsyncClient, Options
23
+
24
+
25
+ class OllamaLLM(BaseLLM):
26
+ """OllamaLLM large language models.
27
+
28
+ Example:
29
+ .. code-block:: python
30
+
31
+ from langchain_ollama import OllamaLLM
32
+
33
+ model = OllamaLLM(model="llama3")
34
+ model.invoke("Come up with 10 names for a song about parrots")
35
+ """
36
+
37
+ model: str = "llama2"
38
+ """Model name to use."""
39
+
40
+ mirostat: Optional[int] = None
41
+ """Enable Mirostat sampling for controlling perplexity.
42
+ (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
43
+
44
+ mirostat_eta: Optional[float] = None
45
+ """Influences how quickly the algorithm responds to feedback
46
+ from the generated text. A lower learning rate will result in
47
+ slower adjustments, while a higher learning rate will make
48
+ the algorithm more responsive. (Default: 0.1)"""
49
+
50
+ mirostat_tau: Optional[float] = None
51
+ """Controls the balance between coherence and diversity
52
+ of the output. A lower value will result in more focused and
53
+ coherent text. (Default: 5.0)"""
54
+
55
+ num_ctx: Optional[int] = None
56
+ """Sets the size of the context window used to generate the
57
+ next token. (Default: 2048) """
58
+
59
+ num_gpu: Optional[int] = None
60
+ """The number of GPUs to use. On macOS it defaults to 1 to
61
+ enable metal support, 0 to disable."""
62
+
63
+ num_thread: Optional[int] = None
64
+ """Sets the number of threads to use during computation.
65
+ By default, Ollama will detect this for optimal performance.
66
+ It is recommended to set this value to the number of physical
67
+ CPU cores your system has (as opposed to the logical number of cores)."""
68
+
69
+ num_predict: Optional[int] = None
70
+ """Maximum number of tokens to predict when generating text.
71
+ (Default: 128, -1 = infinite generation, -2 = fill context)"""
72
+
73
+ repeat_last_n: Optional[int] = None
74
+ """Sets how far back for the model to look back to prevent
75
+ repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
76
+
77
+ repeat_penalty: Optional[float] = None
78
+ """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
79
+ will penalize repetitions more strongly, while a lower value (e.g., 0.9)
80
+ will be more lenient. (Default: 1.1)"""
81
+
82
+ temperature: Optional[float] = None
83
+ """The temperature of the model. Increasing the temperature will
84
+ make the model answer more creatively. (Default: 0.8)"""
85
+
86
+ stop: Optional[List[str]] = None
87
+ """Sets the stop tokens to use."""
88
+
89
+ tfs_z: Optional[float] = None
90
+ """Tail free sampling is used to reduce the impact of less probable
91
+ tokens from the output. A higher value (e.g., 2.0) will reduce the
92
+ impact more, while a value of 1.0 disables this setting. (default: 1)"""
93
+
94
+ top_k: Optional[int] = None
95
+ """Reduces the probability of generating nonsense. A higher value (e.g. 100)
96
+ will give more diverse answers, while a lower value (e.g. 10)
97
+ will be more conservative. (Default: 40)"""
98
+
99
+ top_p: Optional[float] = None
100
+ """Works together with top-k. A higher value (e.g., 0.95) will lead
101
+ to more diverse text, while a lower value (e.g., 0.5) will
102
+ generate more focused and conservative text. (Default: 0.9)"""
103
+
104
+ format: Literal["", "json"] = ""
105
+ """Specify the format of the output (options: json)"""
106
+
107
+ keep_alive: Optional[Union[int, str]] = None
108
+ """How long the model will stay loaded into memory."""
109
+
110
+ @property
111
+ def _default_params(self) -> Dict[str, Any]:
112
+ """Get the default parameters for calling Ollama."""
113
+ return {
114
+ "model": self.model,
115
+ "format": self.format,
116
+ "options": {
117
+ "mirostat": self.mirostat,
118
+ "mirostat_eta": self.mirostat_eta,
119
+ "mirostat_tau": self.mirostat_tau,
120
+ "num_ctx": self.num_ctx,
121
+ "num_gpu": self.num_gpu,
122
+ "num_thread": self.num_thread,
123
+ "num_predict": self.num_predict,
124
+ "repeat_last_n": self.repeat_last_n,
125
+ "repeat_penalty": self.repeat_penalty,
126
+ "temperature": self.temperature,
127
+ "stop": self.stop,
128
+ "tfs_z": self.tfs_z,
129
+ "top_k": self.top_k,
130
+ "top_p": self.top_p,
131
+ },
132
+ "keep_alive": self.keep_alive,
133
+ }
134
+
135
+ @property
136
+ def _llm_type(self) -> str:
137
+ """Return type of LLM."""
138
+ return "ollama-llm"
139
+
140
+ async def _acreate_generate_stream(
141
+ self,
142
+ prompt: str,
143
+ stop: Optional[List[str]] = None,
144
+ **kwargs: Any,
145
+ ) -> AsyncIterator[Union[Mapping[str, Any], str]]:
146
+ if self.stop is not None and stop is not None:
147
+ raise ValueError("`stop` found in both the input and default params.")
148
+ elif self.stop is not None:
149
+ stop = self.stop
150
+
151
+ params = self._default_params
152
+
153
+ for key in self._default_params:
154
+ if key in kwargs:
155
+ params[key] = kwargs[key]
156
+
157
+ params["options"]["stop"] = stop
158
+ async for part in await AsyncClient().generate(
159
+ model=params["model"],
160
+ prompt=prompt,
161
+ stream=True,
162
+ options=Options(**params["options"]),
163
+ keep_alive=params["keep_alive"],
164
+ format=params["format"],
165
+ ): # type: ignore
166
+ yield part
167
+
168
+ def _create_generate_stream(
169
+ self,
170
+ prompt: str,
171
+ stop: Optional[List[str]] = None,
172
+ **kwargs: Any,
173
+ ) -> Iterator[Union[Mapping[str, Any], str]]:
174
+ if self.stop is not None and stop is not None:
175
+ raise ValueError("`stop` found in both the input and default params.")
176
+ elif self.stop is not None:
177
+ stop = self.stop
178
+
179
+ params = self._default_params
180
+
181
+ for key in self._default_params:
182
+ if key in kwargs:
183
+ params[key] = kwargs[key]
184
+
185
+ params["options"]["stop"] = stop
186
+ yield from ollama.generate(
187
+ model=params["model"],
188
+ prompt=prompt,
189
+ stream=True,
190
+ options=Options(**params["options"]),
191
+ keep_alive=params["keep_alive"],
192
+ format=params["format"],
193
+ )
194
+
195
+ async def _astream_with_aggregation(
196
+ self,
197
+ prompt: str,
198
+ stop: Optional[List[str]] = None,
199
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
200
+ verbose: bool = False,
201
+ **kwargs: Any,
202
+ ) -> GenerationChunk:
203
+ final_chunk = None
204
+ async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
205
+ if not isinstance(stream_resp, str):
206
+ chunk = GenerationChunk(
207
+ text=stream_resp["response"] if "response" in stream_resp else "",
208
+ generation_info=dict(stream_resp)
209
+ if stream_resp.get("done") is True
210
+ else None,
211
+ )
212
+ if final_chunk is None:
213
+ final_chunk = chunk
214
+ else:
215
+ final_chunk += chunk
216
+ if run_manager:
217
+ await run_manager.on_llm_new_token(
218
+ chunk.text,
219
+ chunk=chunk,
220
+ verbose=verbose,
221
+ )
222
+ if final_chunk is None:
223
+ raise ValueError("No data received from Ollama stream.")
224
+
225
+ return final_chunk
226
+
227
+ def _stream_with_aggregation(
228
+ self,
229
+ prompt: str,
230
+ stop: Optional[List[str]] = None,
231
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
232
+ verbose: bool = False,
233
+ **kwargs: Any,
234
+ ) -> GenerationChunk:
235
+ final_chunk = None
236
+ for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
237
+ if not isinstance(stream_resp, str):
238
+ chunk = GenerationChunk(
239
+ text=stream_resp["response"] if "response" in stream_resp else "",
240
+ generation_info=dict(stream_resp)
241
+ if stream_resp.get("done") is True
242
+ else None,
243
+ )
244
+ if final_chunk is None:
245
+ final_chunk = chunk
246
+ else:
247
+ final_chunk += chunk
248
+ if run_manager:
249
+ run_manager.on_llm_new_token(
250
+ chunk.text,
251
+ chunk=chunk,
252
+ verbose=verbose,
253
+ )
254
+ if final_chunk is None:
255
+ raise ValueError("No data received from Ollama stream.")
256
+
257
+ return final_chunk
258
+
259
+ def _generate(
260
+ self,
261
+ prompts: List[str],
262
+ stop: Optional[List[str]] = None,
263
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
264
+ **kwargs: Any,
265
+ ) -> LLMResult:
266
+ generations = []
267
+ for prompt in prompts:
268
+ final_chunk = self._stream_with_aggregation(
269
+ prompt,
270
+ stop=stop,
271
+ run_manager=run_manager,
272
+ verbose=self.verbose,
273
+ **kwargs,
274
+ )
275
+ generations.append([final_chunk])
276
+ return LLMResult(generations=generations) # type: ignore[arg-type]
277
+
278
+ async def _agenerate(
279
+ self,
280
+ prompts: List[str],
281
+ stop: Optional[List[str]] = None,
282
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
283
+ **kwargs: Any,
284
+ ) -> LLMResult:
285
+ generations = []
286
+ for prompt in prompts:
287
+ final_chunk = await self._astream_with_aggregation(
288
+ prompt,
289
+ stop=stop,
290
+ run_manager=run_manager,
291
+ verbose=self.verbose,
292
+ **kwargs,
293
+ )
294
+ generations.append([final_chunk])
295
+ return LLMResult(generations=generations) # type: ignore[arg-type]
296
+
297
+ def _stream(
298
+ self,
299
+ prompt: str,
300
+ stop: Optional[List[str]] = None,
301
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
302
+ **kwargs: Any,
303
+ ) -> Iterator[GenerationChunk]:
304
+ for stream_resp in self._create_generate_stream(prompt, stop, **kwargs):
305
+ if not isinstance(stream_resp, str):
306
+ chunk = GenerationChunk(
307
+ text=stream_resp["message"]["content"]
308
+ if "message" in stream_resp
309
+ else "",
310
+ generation_info=dict(stream_resp)
311
+ if stream_resp.get("done") is True
312
+ else None,
313
+ )
314
+ if run_manager:
315
+ run_manager.on_llm_new_token(
316
+ chunk.text,
317
+ verbose=self.verbose,
318
+ )
319
+ yield chunk
320
+
321
+ async def _astream(
322
+ self,
323
+ prompt: str,
324
+ stop: Optional[List[str]] = None,
325
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
326
+ **kwargs: Any,
327
+ ) -> AsyncIterator[GenerationChunk]:
328
+ async for stream_resp in self._acreate_generate_stream(prompt, stop, **kwargs):
329
+ if not isinstance(stream_resp, str):
330
+ chunk = GenerationChunk(
331
+ text=stream_resp["message"]["content"]
332
+ if "message" in stream_resp
333
+ else "",
334
+ generation_info=dict(stream_resp)
335
+ if stream_resp.get("done") is True
336
+ else None,
337
+ )
338
+ if run_manager:
339
+ await run_manager.on_llm_new_token(
340
+ chunk.text,
341
+ verbose=self.verbose,
342
+ )
343
+ yield chunk
File without changes
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 LangChain, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,64 @@
1
+ Metadata-Version: 2.1
2
+ Name: langchain-ollama
3
+ Version: 0.1.0rc0
4
+ Summary: An integration package connecting Ollama and LangChain
5
+ Home-page: https://github.com/langchain-ai/langchain
6
+ License: MIT
7
+ Requires-Python: >=3.8.1,<4.0
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: langchain-core (>=0.2.20,<0.3.0)
15
+ Requires-Dist: ollama (>=0.2.1,<1)
16
+ Project-URL: Repository, https://github.com/langchain-ai/langchain
17
+ Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
18
+ Description-Content-Type: text/markdown
19
+
20
+ # langchain-ollama
21
+
22
+ This package contains the LangChain integration with Ollama
23
+
24
+ ## Installation
25
+
26
+ ```bash
27
+ pip install -U langchain-ollama
28
+ ```
29
+
30
+ You will also need to run the Ollama server locally.
31
+ You can download it [here](https://ollama.com/download).
32
+
33
+ ## Chat Models
34
+
35
+ `ChatOllama` class exposes chat models from Ollama.
36
+
37
+ ```python
38
+ from langchain_ollama import ChatOllama
39
+
40
+ llm = ChatOllama(model="llama3")
41
+ llm.invoke("Sing a ballad of LangChain.")
42
+ ```
43
+
44
+ ## Embeddings
45
+
46
+ `OllamaEmbeddings` class exposes embeddings from Ollama.
47
+
48
+ ```python
49
+ from langchain_ollama import OllamaEmbeddings
50
+
51
+ embeddings = OllamaEmbeddings(model="llama3")
52
+ embeddings.embed_query("What is the meaning of life?")
53
+ ```
54
+
55
+ ## LLMs
56
+ `OllamaLLM` class exposes LLMs from Ollama.
57
+
58
+ ```python
59
+ from langchain_ollama import OllamaLLM
60
+
61
+ llm = OllamaLLM()
62
+ llm.invoke("The meaning of life is")
63
+ ```
64
+
@@ -0,0 +1,9 @@
1
+ langchain_ollama/__init__.py,sha256=HhQZqbCjhrbr2dC_9Dkw12pg4HPjnDXUoInROMNJKqA,518
2
+ langchain_ollama/chat_models.py,sha256=HgKG2MJ-ORJHC9ootpZNq9Gi507awQWAhWqJYkz-w9U,27680
3
+ langchain_ollama/embeddings.py,sha256=_T8N0rh3z9mZHwwUFYNlJngjYmHt5GiQZVOy7l4JNMc,1551
4
+ langchain_ollama/llms.py,sha256=rbpupb4cx0dG692Drs5Verm1Tz48U9gdcLuxkZcxBpo,12071
5
+ langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ langchain_ollama-0.1.0rc0.dist-info/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
7
+ langchain_ollama-0.1.0rc0.dist-info/METADATA,sha256=uVl4yXYxTEAZykbX5bxB36iJTeBfnlLvkQClaJhT2nw,1671
8
+ langchain_ollama-0.1.0rc0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
+ langchain_ollama-0.1.0rc0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any