langchain-ollama 0.2.2rc1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,20 @@
1
1
  """Ollama chat models."""
2
2
 
3
+ import json
4
+ from operator import itemgetter
3
5
  from typing import (
4
6
  Any,
5
7
  AsyncIterator,
6
8
  Callable,
7
9
  Dict,
10
+ Final,
8
11
  Iterator,
9
12
  List,
10
13
  Literal,
11
14
  Mapping,
12
15
  Optional,
13
16
  Sequence,
17
+ Tuple,
14
18
  Type,
15
19
  Union,
16
20
  cast,
@@ -21,12 +25,14 @@ from langchain_core.callbacks import (
21
25
  CallbackManagerForLLMRun,
22
26
  )
23
27
  from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun
28
+ from langchain_core.exceptions import OutputParserException
24
29
  from langchain_core.language_models import LanguageModelInput
25
30
  from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
26
31
  from langchain_core.messages import (
27
32
  AIMessage,
28
33
  AIMessageChunk,
29
34
  BaseMessage,
35
+ BaseMessageChunk,
30
36
  HumanMessage,
31
37
  SystemMessage,
32
38
  ToolCall,
@@ -34,13 +40,28 @@ from langchain_core.messages import (
34
40
  )
35
41
  from langchain_core.messages.ai import UsageMetadata
36
42
  from langchain_core.messages.tool import tool_call
43
+ from langchain_core.output_parsers import (
44
+ JsonOutputKeyToolsParser,
45
+ JsonOutputParser,
46
+ PydanticOutputParser,
47
+ PydanticToolsParser,
48
+ )
37
49
  from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
38
- from langchain_core.runnables import Runnable
50
+ from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
39
51
  from langchain_core.tools import BaseTool
40
- from langchain_core.utils.function_calling import convert_to_openai_tool
52
+ from langchain_core.utils.function_calling import (
53
+ convert_to_json_schema,
54
+ convert_to_openai_tool,
55
+ )
56
+ from langchain_core.utils.pydantic import TypeBaseModel, is_basemodel_subclass
41
57
  from ollama import AsyncClient, Client, Message, Options
42
- from pydantic import PrivateAttr, model_validator
43
- from typing_extensions import Self
58
+ from pydantic import BaseModel, PrivateAttr, model_validator
59
+ from pydantic.json_schema import JsonSchemaValue
60
+ from pydantic.v1 import BaseModel as BaseModelV1
61
+ from typing_extensions import Self, is_typeddict
62
+
63
+ DEFAULT_THINK_TOKEN_START: Final[str] = "<think>"
64
+ DEFAULT_THINK_TOKEN_END: Final[str] = "</think>"
44
65
 
45
66
 
46
67
  def _get_usage_metadata_from_generation_info(
@@ -60,6 +81,76 @@ def _get_usage_metadata_from_generation_info(
60
81
  return None
61
82
 
62
83
 
84
+ def _parse_json_string(
85
+ json_string: str, raw_tool_call: dict[str, Any], skip: bool
86
+ ) -> Any:
87
+ """Attempt to parse a JSON string for tool calling.
88
+
89
+ Args:
90
+ json_string: JSON string to parse.
91
+ skip: Whether to ignore parsing errors and return the value anyways.
92
+ raw_tool_call: Raw tool call to include in error message.
93
+
94
+ Returns:
95
+ The parsed JSON string.
96
+
97
+ Raises:
98
+ OutputParserException: If the JSON string wrong invalid and skip=False.
99
+ """
100
+ try:
101
+ return json.loads(json_string)
102
+ except json.JSONDecodeError as e:
103
+ if skip:
104
+ return json_string
105
+ msg = (
106
+ f"Function {raw_tool_call['function']['name']} arguments:\n\n"
107
+ f"{raw_tool_call['function']['arguments']}\n\nare not valid JSON. "
108
+ f"Received JSONDecodeError {e}"
109
+ )
110
+ raise OutputParserException(msg) from e
111
+ except TypeError as e:
112
+ if skip:
113
+ return json_string
114
+ msg = (
115
+ f"Function {raw_tool_call['function']['name']} arguments:\n\n"
116
+ f"{raw_tool_call['function']['arguments']}\n\nare not a string or a "
117
+ f"dictionary. Received TypeError {e}"
118
+ )
119
+ raise OutputParserException(msg) from e
120
+
121
+
122
+ def _parse_arguments_from_tool_call(
123
+ raw_tool_call: dict[str, Any],
124
+ ) -> Optional[dict[str, Any]]:
125
+ """Parse arguments by trying to parse any shallowly nested string-encoded JSON.
126
+
127
+ Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
128
+ Should be removed/changed if fixed upstream.
129
+ See https://github.com/ollama/ollama/issues/6155
130
+ """
131
+ if "function" not in raw_tool_call:
132
+ return None
133
+ arguments = raw_tool_call["function"]["arguments"]
134
+ parsed_arguments: dict = {}
135
+ if isinstance(arguments, dict):
136
+ for key, value in arguments.items():
137
+ if isinstance(value, str):
138
+ parsed_value = _parse_json_string(
139
+ value, skip=True, raw_tool_call=raw_tool_call
140
+ )
141
+ if isinstance(parsed_value, (dict, list)):
142
+ parsed_arguments[key] = parsed_value
143
+ else:
144
+ parsed_arguments[key] = value
145
+ else:
146
+ parsed_arguments[key] = value
147
+ else:
148
+ parsed_arguments = _parse_json_string(
149
+ arguments, skip=False, raw_tool_call=raw_tool_call
150
+ )
151
+ return parsed_arguments
152
+
153
+
63
154
  def _get_tool_calls_from_response(
64
155
  response: Mapping[str, Any],
65
156
  ) -> List[ToolCall]:
@@ -72,7 +163,7 @@ def _get_tool_calls_from_response(
72
163
  tool_call(
73
164
  id=str(uuid4()),
74
165
  name=tc["function"]["name"],
75
- args=tc["function"]["arguments"],
166
+ args=_parse_arguments_from_tool_call(tc) or {},
76
167
  )
77
168
  )
78
169
  return tool_calls
@@ -89,6 +180,10 @@ def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
89
180
  }
90
181
 
91
182
 
183
+ def _is_pydantic_class(obj: Any) -> bool:
184
+ return isinstance(obj, type) and is_basemodel_subclass(obj)
185
+
186
+
92
187
  class ChatOllama(BaseChatModel):
93
188
  r"""Ollama chat model integration.
94
189
 
@@ -144,7 +239,7 @@ class ChatOllama(BaseChatModel):
144
239
  ("human", "Return the words Hello World!"),
145
240
  ]
146
241
  for chunk in llm.stream(messages):
147
- print(chunk)
242
+ print(chunk.text(), end="")
148
243
 
149
244
 
150
245
  .. code-block:: python
@@ -222,8 +317,6 @@ class ChatOllama(BaseChatModel):
222
317
  '{"location": "Pune, India", "time_of_day": "morning"}'
223
318
 
224
319
  Tool Calling:
225
- .. warning::
226
- Ollama currently does not support streaming for tools
227
320
 
228
321
  .. code-block:: python
229
322
 
@@ -248,6 +341,13 @@ class ChatOllama(BaseChatModel):
248
341
  model: str
249
342
  """Model name to use."""
250
343
 
344
+ extract_reasoning: Optional[Union[bool, Tuple[str, str]]] = False
345
+ """Whether to extract the reasoning tokens in think blocks.
346
+ Extracts `chunk.content` to `chunk.additional_kwargs.reasoning_content`.
347
+ If a tuple is supplied, they are assumed to be the (start, end) tokens.
348
+ If `extract_reasoning=True`, the tokens will default to (<think>, </think>).
349
+ """
350
+
251
351
  mirostat: Optional[int] = None
252
352
  """Enable Mirostat sampling for controlling perplexity.
253
353
  (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
@@ -317,8 +417,8 @@ class ChatOllama(BaseChatModel):
317
417
  to more diverse text, while a lower value (e.g., 0.5) will
318
418
  generate more focused and conservative text. (Default: 0.9)"""
319
419
 
320
- format: Literal["", "json"] = ""
321
- """Specify the format of the output (options: json)"""
420
+ format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
421
+ """Specify the format of the output (options: "json", JSON schema)."""
322
422
 
323
423
  keep_alive: Optional[Union[int, str]] = None
324
424
  """How long the model will stay loaded into memory."""
@@ -375,12 +475,9 @@ class ChatOllama(BaseChatModel):
375
475
  },
376
476
  )
377
477
 
378
- tools = kwargs.get("tools")
379
- default_stream = not bool(tools)
380
-
381
478
  params = {
382
479
  "messages": ollama_messages,
383
- "stream": kwargs.pop("stream", default_stream),
480
+ "stream": kwargs.pop("stream", True),
384
481
  "model": kwargs.pop("model", self.model),
385
482
  "format": kwargs.pop("format", self.format),
386
483
  "options": Options(**options_dict),
@@ -388,7 +485,7 @@ class ChatOllama(BaseChatModel):
388
485
  **kwargs,
389
486
  }
390
487
 
391
- if tools:
488
+ if tools := kwargs.get("tools"):
392
489
  params["tools"] = tools
393
490
 
394
491
  return params
@@ -484,6 +581,28 @@ class ChatOllama(BaseChatModel):
484
581
 
485
582
  return ollama_messages
486
583
 
584
+ def _extract_reasoning(
585
+ self, message_chunk: BaseMessageChunk, is_thinking: bool
586
+ ) -> Tuple[BaseMessageChunk, bool]:
587
+ """Mutate a message chunk to extract reasoning content."""
588
+ if not self.extract_reasoning:
589
+ return message_chunk, is_thinking
590
+ elif self.extract_reasoning is True:
591
+ start_token = DEFAULT_THINK_TOKEN_START
592
+ end_token = DEFAULT_THINK_TOKEN_END
593
+ else:
594
+ start_token, end_token = cast(tuple, self.extract_reasoning)
595
+ if start_token in message_chunk.content:
596
+ is_thinking = True
597
+ content = message_chunk.content
598
+ if is_thinking:
599
+ message_chunk.additional_kwargs["reasoning_content"] = content
600
+ message_chunk.content = ""
601
+ if end_token in content:
602
+ is_thinking = False
603
+
604
+ return message_chunk, is_thinking
605
+
487
606
  async def _acreate_chat_stream(
488
607
  self,
489
608
  messages: List[BaseMessage],
@@ -520,35 +639,17 @@ class ChatOllama(BaseChatModel):
520
639
  **kwargs: Any,
521
640
  ) -> ChatGenerationChunk:
522
641
  final_chunk = None
523
- for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
524
- if not isinstance(stream_resp, str):
525
- chunk = ChatGenerationChunk(
526
- message=AIMessageChunk(
527
- content=(
528
- stream_resp["message"]["content"]
529
- if "message" in stream_resp
530
- and "content" in stream_resp["message"]
531
- else ""
532
- ),
533
- usage_metadata=_get_usage_metadata_from_generation_info(
534
- stream_resp
535
- ),
536
- tool_calls=_get_tool_calls_from_response(stream_resp),
537
- ),
538
- generation_info=(
539
- dict(stream_resp) if stream_resp.get("done") is True else None
540
- ),
642
+ for chunk in self._iterate_over_stream(messages, stop, **kwargs):
643
+ if final_chunk is None:
644
+ final_chunk = chunk
645
+ else:
646
+ final_chunk += chunk
647
+ if run_manager:
648
+ run_manager.on_llm_new_token(
649
+ chunk.text,
650
+ chunk=chunk,
651
+ verbose=verbose,
541
652
  )
542
- if final_chunk is None:
543
- final_chunk = chunk
544
- else:
545
- final_chunk += chunk
546
- if run_manager:
547
- run_manager.on_llm_new_token(
548
- chunk.text,
549
- chunk=chunk,
550
- verbose=verbose,
551
- )
552
653
  if final_chunk is None:
553
654
  raise ValueError("No data received from Ollama stream.")
554
655
 
@@ -563,35 +664,17 @@ class ChatOllama(BaseChatModel):
563
664
  **kwargs: Any,
564
665
  ) -> ChatGenerationChunk:
565
666
  final_chunk = None
566
- async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
567
- if not isinstance(stream_resp, str):
568
- chunk = ChatGenerationChunk(
569
- message=AIMessageChunk(
570
- content=(
571
- stream_resp["message"]["content"]
572
- if "message" in stream_resp
573
- and "content" in stream_resp["message"]
574
- else ""
575
- ),
576
- usage_metadata=_get_usage_metadata_from_generation_info(
577
- stream_resp
578
- ),
579
- tool_calls=_get_tool_calls_from_response(stream_resp),
580
- ),
581
- generation_info=(
582
- dict(stream_resp) if stream_resp.get("done") is True else None
583
- ),
667
+ async for chunk in self._aiterate_over_stream(messages, stop, **kwargs):
668
+ if final_chunk is None:
669
+ final_chunk = chunk
670
+ else:
671
+ final_chunk += chunk
672
+ if run_manager:
673
+ await run_manager.on_llm_new_token(
674
+ chunk.text,
675
+ chunk=chunk,
676
+ verbose=verbose,
584
677
  )
585
- if final_chunk is None:
586
- final_chunk = chunk
587
- else:
588
- final_chunk += chunk
589
- if run_manager:
590
- await run_manager.on_llm_new_token(
591
- chunk.text,
592
- chunk=chunk,
593
- verbose=verbose,
594
- )
595
678
  if final_chunk is None:
596
679
  raise ValueError("No data received from Ollama stream.")
597
680
 
@@ -628,18 +711,19 @@ class ChatOllama(BaseChatModel):
628
711
  content=final_chunk.text,
629
712
  usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
630
713
  tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
714
+ additional_kwargs=final_chunk.message.additional_kwargs,
631
715
  ),
632
716
  generation_info=generation_info,
633
717
  )
634
718
  return ChatResult(generations=[chat_generation])
635
719
 
636
- def _stream(
720
+ def _iterate_over_stream(
637
721
  self,
638
722
  messages: List[BaseMessage],
639
723
  stop: Optional[List[str]] = None,
640
- run_manager: Optional[CallbackManagerForLLMRun] = None,
641
724
  **kwargs: Any,
642
725
  ) -> Iterator[ChatGenerationChunk]:
726
+ is_thinking = False
643
727
  for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
644
728
  if not isinstance(stream_resp, str):
645
729
  chunk = ChatGenerationChunk(
@@ -659,20 +743,35 @@ class ChatOllama(BaseChatModel):
659
743
  dict(stream_resp) if stream_resp.get("done") is True else None
660
744
  ),
661
745
  )
662
- if run_manager:
663
- run_manager.on_llm_new_token(
664
- chunk.text,
665
- verbose=self.verbose,
746
+ if self.extract_reasoning:
747
+ message, is_thinking = self._extract_reasoning(
748
+ chunk.message, is_thinking
666
749
  )
750
+ chunk.message = message
667
751
  yield chunk
668
752
 
669
- async def _astream(
753
+ def _stream(
754
+ self,
755
+ messages: List[BaseMessage],
756
+ stop: Optional[List[str]] = None,
757
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
758
+ **kwargs: Any,
759
+ ) -> Iterator[ChatGenerationChunk]:
760
+ for chunk in self._iterate_over_stream(messages, stop, **kwargs):
761
+ if run_manager:
762
+ run_manager.on_llm_new_token(
763
+ chunk.text,
764
+ verbose=self.verbose,
765
+ )
766
+ yield chunk
767
+
768
+ async def _aiterate_over_stream(
670
769
  self,
671
770
  messages: List[BaseMessage],
672
771
  stop: Optional[List[str]] = None,
673
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
674
772
  **kwargs: Any,
675
773
  ) -> AsyncIterator[ChatGenerationChunk]:
774
+ is_thinking = False
676
775
  async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
677
776
  if not isinstance(stream_resp, str):
678
777
  chunk = ChatGenerationChunk(
@@ -692,13 +791,28 @@ class ChatOllama(BaseChatModel):
692
791
  dict(stream_resp) if stream_resp.get("done") is True else None
693
792
  ),
694
793
  )
695
- if run_manager:
696
- await run_manager.on_llm_new_token(
697
- chunk.text,
698
- verbose=self.verbose,
794
+ if self.extract_reasoning:
795
+ message, is_thinking = self._extract_reasoning(
796
+ chunk.message, is_thinking
699
797
  )
798
+ chunk.message = message
700
799
  yield chunk
701
800
 
801
+ async def _astream(
802
+ self,
803
+ messages: List[BaseMessage],
804
+ stop: Optional[List[str]] = None,
805
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
806
+ **kwargs: Any,
807
+ ) -> AsyncIterator[ChatGenerationChunk]:
808
+ async for chunk in self._aiterate_over_stream(messages, stop, **kwargs):
809
+ if run_manager:
810
+ await run_manager.on_llm_new_token(
811
+ chunk.text,
812
+ verbose=self.verbose,
813
+ )
814
+ yield chunk
815
+
702
816
  async def _agenerate(
703
817
  self,
704
818
  messages: List[BaseMessage],
@@ -715,6 +829,7 @@ class ChatOllama(BaseChatModel):
715
829
  content=final_chunk.text,
716
830
  usage_metadata=cast(AIMessageChunk, final_chunk.message).usage_metadata,
717
831
  tool_calls=cast(AIMessageChunk, final_chunk.message).tool_calls,
832
+ additional_kwargs=final_chunk.message.additional_kwargs,
718
833
  ),
719
834
  generation_info=generation_info,
720
835
  )
@@ -747,3 +862,352 @@ class ChatOllama(BaseChatModel):
747
862
  """ # noqa: E501
748
863
  formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
749
864
  return super().bind(tools=formatted_tools, **kwargs)
865
+
866
+ def with_structured_output(
867
+ self,
868
+ schema: Union[Dict, type],
869
+ *,
870
+ method: Literal["function_calling", "json_mode", "json_schema"] = "json_schema",
871
+ include_raw: bool = False,
872
+ **kwargs: Any,
873
+ ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
874
+ """Model wrapper that returns outputs formatted to match the given schema.
875
+
876
+ Args:
877
+ schema:
878
+ The output schema. Can be passed in as:
879
+
880
+ - a Pydantic class,
881
+ - a JSON schema
882
+ - a TypedDict class
883
+ - an OpenAI function/tool schema.
884
+
885
+ If ``schema`` is a Pydantic class then the model output will be a
886
+ Pydantic instance of that class, and the model-generated fields will be
887
+ validated by the Pydantic class. Otherwise the model output will be a
888
+ dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`
889
+ for more on how to properly specify types and descriptions of
890
+ schema fields when specifying a Pydantic or TypedDict class.
891
+
892
+ method: The method for steering model generation, one of:
893
+
894
+ - "json_schema":
895
+ Uses Ollama's structured output API: https://ollama.com/blog/structured-outputs
896
+ - "function_calling":
897
+ Uses Ollama's tool-calling API
898
+ - "json_mode":
899
+ Specifies ``format="json"``. Note that if using JSON mode then you
900
+ must include instructions for formatting the output into the
901
+ desired schema into the model call.
902
+
903
+ include_raw:
904
+ If False then only the parsed structured output is returned. If
905
+ an error occurs during model output parsing it will be raised. If True
906
+ then both the raw model response (a BaseMessage) and the parsed model
907
+ response will be returned. If an error occurs during output parsing it
908
+ will be caught and returned as well. The final output is always a dict
909
+ with keys "raw", "parsed", and "parsing_error".
910
+
911
+ kwargs: Additional keyword args aren't supported.
912
+
913
+ Returns:
914
+ A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
915
+
916
+ | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
917
+
918
+ | If ``include_raw`` is True, then Runnable outputs a dict with keys:
919
+
920
+ - "raw": BaseMessage
921
+ - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
922
+ - "parsing_error": Optional[BaseException]
923
+
924
+ .. versionchanged:: 0.2.2
925
+
926
+ Added support for structured output API via ``format`` parameter.
927
+
928
+ .. versionchanged:: 0.3.0
929
+
930
+ Updated default ``method`` to ``"json_schema"``.
931
+
932
+ .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False
933
+
934
+ .. code-block:: python
935
+
936
+ from typing import Optional
937
+
938
+ from langchain_ollama import ChatOllama
939
+ from pydantic import BaseModel, Field
940
+
941
+
942
+ class AnswerWithJustification(BaseModel):
943
+ '''An answer to the user question along with justification for the answer.'''
944
+
945
+ answer: str
946
+ justification: Optional[str] = Field(
947
+ default=..., description="A justification for the answer."
948
+ )
949
+
950
+
951
+ llm = ChatOllama(model="llama3.1", temperature=0)
952
+ structured_llm = llm.with_structured_output(
953
+ AnswerWithJustification
954
+ )
955
+
956
+ structured_llm.invoke(
957
+ "What weighs more a pound of bricks or a pound of feathers"
958
+ )
959
+
960
+ # -> AnswerWithJustification(
961
+ # answer='They weigh the same',
962
+ # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
963
+ # )
964
+
965
+ .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=True
966
+
967
+ .. code-block:: python
968
+
969
+ from langchain_ollama import ChatOllama
970
+ from pydantic import BaseModel
971
+
972
+
973
+ class AnswerWithJustification(BaseModel):
974
+ '''An answer to the user question along with justification for the answer.'''
975
+
976
+ answer: str
977
+ justification: str
978
+
979
+
980
+ llm = ChatOllama(model="llama3.1", temperature=0)
981
+ structured_llm = llm.with_structured_output(
982
+ AnswerWithJustification, include_raw=True
983
+ )
984
+
985
+ structured_llm.invoke(
986
+ "What weighs more a pound of bricks or a pound of feathers"
987
+ )
988
+ # -> {
989
+ # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
990
+ # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
991
+ # 'parsing_error': None
992
+ # }
993
+
994
+ .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False
995
+
996
+ .. code-block:: python
997
+
998
+ from typing import Optional
999
+
1000
+ from langchain_ollama import ChatOllama
1001
+ from pydantic import BaseModel, Field
1002
+
1003
+
1004
+ class AnswerWithJustification(BaseModel):
1005
+ '''An answer to the user question along with justification for the answer.'''
1006
+
1007
+ answer: str
1008
+ justification: Optional[str] = Field(
1009
+ default=..., description="A justification for the answer."
1010
+ )
1011
+
1012
+
1013
+ llm = ChatOllama(model="llama3.1", temperature=0)
1014
+ structured_llm = llm.with_structured_output(
1015
+ AnswerWithJustification, method="function_calling"
1016
+ )
1017
+
1018
+ structured_llm.invoke(
1019
+ "What weighs more a pound of bricks or a pound of feathers"
1020
+ )
1021
+
1022
+ # -> AnswerWithJustification(
1023
+ # answer='They weigh the same',
1024
+ # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
1025
+ # )
1026
+
1027
+ .. dropdown:: Example: schema=TypedDict class, method="function_calling", include_raw=False
1028
+
1029
+ .. code-block:: python
1030
+
1031
+ # IMPORTANT: If you are using Python <=3.8, you need to import Annotated
1032
+ # from typing_extensions, not from typing.
1033
+ from typing_extensions import Annotated, TypedDict
1034
+
1035
+ from langchain_ollama import ChatOllama
1036
+
1037
+
1038
+ class AnswerWithJustification(TypedDict):
1039
+ '''An answer to the user question along with justification for the answer.'''
1040
+
1041
+ answer: str
1042
+ justification: Annotated[
1043
+ Optional[str], None, "A justification for the answer."
1044
+ ]
1045
+
1046
+
1047
+ llm = ChatOllama(model="llama3.1", temperature=0)
1048
+ structured_llm = llm.with_structured_output(AnswerWithJustification)
1049
+
1050
+ structured_llm.invoke(
1051
+ "What weighs more a pound of bricks or a pound of feathers"
1052
+ )
1053
+ # -> {
1054
+ # 'answer': 'They weigh the same',
1055
+ # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
1056
+ # }
1057
+
1058
+ .. dropdown:: Example: schema=OpenAI function schema, method="function_calling", include_raw=False
1059
+
1060
+ .. code-block:: python
1061
+
1062
+ from langchain_ollama import ChatOllama
1063
+
1064
+ oai_schema = {
1065
+ 'name': 'AnswerWithJustification',
1066
+ 'description': 'An answer to the user question along with justification for the answer.',
1067
+ 'parameters': {
1068
+ 'type': 'object',
1069
+ 'properties': {
1070
+ 'answer': {'type': 'string'},
1071
+ 'justification': {'description': 'A justification for the answer.', 'type': 'string'}
1072
+ },
1073
+ 'required': ['answer']
1074
+ }
1075
+ }
1076
+
1077
+ llm = ChatOllama(model="llama3.1", temperature=0)
1078
+ structured_llm = llm.with_structured_output(oai_schema)
1079
+
1080
+ structured_llm.invoke(
1081
+ "What weighs more a pound of bricks or a pound of feathers"
1082
+ )
1083
+ # -> {
1084
+ # 'answer': 'They weigh the same',
1085
+ # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
1086
+ # }
1087
+
1088
+ .. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True
1089
+
1090
+ .. code-block::
1091
+
1092
+ from langchain_ollama import ChatOllama
1093
+ from pydantic import BaseModel
1094
+
1095
+ class AnswerWithJustification(BaseModel):
1096
+ answer: str
1097
+ justification: str
1098
+
1099
+ llm = ChatOllama(model="llama3.1", temperature=0)
1100
+ structured_llm = llm.with_structured_output(
1101
+ AnswerWithJustification,
1102
+ method="json_mode",
1103
+ include_raw=True
1104
+ )
1105
+
1106
+ structured_llm.invoke(
1107
+ "Answer the following question. "
1108
+ "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n"
1109
+ "What's heavier a pound of bricks or a pound of feathers?"
1110
+ )
1111
+ # -> {
1112
+ # 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'),
1113
+ # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
1114
+ # 'parsing_error': None
1115
+ # }
1116
+ """ # noqa: E501, D301
1117
+ _ = kwargs.pop("strict", None)
1118
+ if kwargs:
1119
+ raise ValueError(f"Received unsupported arguments {kwargs}")
1120
+ is_pydantic_schema = _is_pydantic_class(schema)
1121
+ if method == "function_calling":
1122
+ if schema is None:
1123
+ raise ValueError(
1124
+ "schema must be specified when method is not 'json_mode'. "
1125
+ "Received None."
1126
+ )
1127
+ formatted_tool = convert_to_openai_tool(schema)
1128
+ tool_name = formatted_tool["function"]["name"]
1129
+ llm = self.bind_tools(
1130
+ [schema],
1131
+ tool_choice=tool_name,
1132
+ ls_structured_output_format={
1133
+ "kwargs": {"method": method},
1134
+ "schema": formatted_tool,
1135
+ },
1136
+ )
1137
+ if is_pydantic_schema:
1138
+ output_parser: Runnable = PydanticToolsParser(
1139
+ tools=[schema], # type: ignore[list-item]
1140
+ first_tool_only=True,
1141
+ )
1142
+ else:
1143
+ output_parser = JsonOutputKeyToolsParser(
1144
+ key_name=tool_name, first_tool_only=True
1145
+ )
1146
+ elif method == "json_mode":
1147
+ llm = self.bind(
1148
+ format="json",
1149
+ ls_structured_output_format={
1150
+ "kwargs": {"method": method},
1151
+ "schema": schema,
1152
+ },
1153
+ )
1154
+ output_parser = (
1155
+ PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type]
1156
+ if is_pydantic_schema
1157
+ else JsonOutputParser()
1158
+ )
1159
+ elif method == "json_schema":
1160
+ if schema is None:
1161
+ raise ValueError(
1162
+ "schema must be specified when method is not 'json_mode'. "
1163
+ "Received None."
1164
+ )
1165
+ if is_pydantic_schema:
1166
+ schema = cast(TypeBaseModel, schema)
1167
+ if issubclass(schema, BaseModelV1):
1168
+ response_format = schema.schema()
1169
+ else:
1170
+ response_format = schema.model_json_schema()
1171
+ llm = self.bind(
1172
+ format=response_format,
1173
+ ls_structured_output_format={
1174
+ "kwargs": {"method": method},
1175
+ "schema": schema,
1176
+ },
1177
+ )
1178
+ output_parser = PydanticOutputParser(pydantic_object=schema)
1179
+ else:
1180
+ if is_typeddict(schema):
1181
+ response_format = convert_to_json_schema(schema)
1182
+ if "required" not in response_format:
1183
+ response_format["required"] = list(
1184
+ response_format["properties"].keys()
1185
+ )
1186
+ else:
1187
+ # is JSON schema
1188
+ response_format = cast(dict, schema)
1189
+ llm = self.bind(
1190
+ format=response_format,
1191
+ ls_structured_output_format={
1192
+ "kwargs": {"method": method},
1193
+ "schema": response_format,
1194
+ },
1195
+ )
1196
+ output_parser = JsonOutputParser()
1197
+ else:
1198
+ raise ValueError(
1199
+ f"Unrecognized method argument. Expected one of 'function_calling', "
1200
+ f"'json_schema', or 'json_mode'. Received: '{method}'"
1201
+ )
1202
+
1203
+ if include_raw:
1204
+ parser_assign = RunnablePassthrough.assign(
1205
+ parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
1206
+ )
1207
+ parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
1208
+ parser_with_fallback = parser_assign.with_fallbacks(
1209
+ [parser_none], exception_key="parsing_error"
1210
+ )
1211
+ return RunnableMap(raw=llm) | parser_with_fallback
1212
+ else:
1213
+ return llm | output_parser
@@ -1,9 +1,6 @@
1
1
  """Ollama embeddings models."""
2
2
 
3
- from typing import (
4
- List,
5
- Optional,
6
- )
3
+ from typing import Any, Dict, List, Optional
7
4
 
8
5
  from langchain_core.embeddings import Embeddings
9
6
  from ollama import AsyncClient, Client
@@ -144,10 +141,94 @@ class OllamaEmbeddings(BaseModel, Embeddings):
144
141
  The async client to use for making requests.
145
142
  """
146
143
 
144
+ mirostat: Optional[int] = None
145
+ """Enable Mirostat sampling for controlling perplexity.
146
+ (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
147
+
148
+ mirostat_eta: Optional[float] = None
149
+ """Influences how quickly the algorithm responds to feedback
150
+ from the generated text. A lower learning rate will result in
151
+ slower adjustments, while a higher learning rate will make
152
+ the algorithm more responsive. (Default: 0.1)"""
153
+
154
+ mirostat_tau: Optional[float] = None
155
+ """Controls the balance between coherence and diversity
156
+ of the output. A lower value will result in more focused and
157
+ coherent text. (Default: 5.0)"""
158
+
159
+ num_ctx: Optional[int] = None
160
+ """Sets the size of the context window used to generate the
161
+ next token. (Default: 2048) """
162
+
163
+ num_gpu: Optional[int] = None
164
+ """The number of GPUs to use. On macOS it defaults to 1 to
165
+ enable metal support, 0 to disable."""
166
+
167
+ keep_alive: Optional[int] = None
168
+ """controls how long the model will stay loaded into memory
169
+ following the request (default: 5m)
170
+ """
171
+
172
+ num_thread: Optional[int] = None
173
+ """Sets the number of threads to use during computation.
174
+ By default, Ollama will detect this for optimal performance.
175
+ It is recommended to set this value to the number of physical
176
+ CPU cores your system has (as opposed to the logical number of cores)."""
177
+
178
+ repeat_last_n: Optional[int] = None
179
+ """Sets how far back for the model to look back to prevent
180
+ repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
181
+
182
+ repeat_penalty: Optional[float] = None
183
+ """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
184
+ will penalize repetitions more strongly, while a lower value (e.g., 0.9)
185
+ will be more lenient. (Default: 1.1)"""
186
+
187
+ temperature: Optional[float] = None
188
+ """The temperature of the model. Increasing the temperature will
189
+ make the model answer more creatively. (Default: 0.8)"""
190
+
191
+ stop: Optional[List[str]] = None
192
+ """Sets the stop tokens to use."""
193
+
194
+ tfs_z: Optional[float] = None
195
+ """Tail free sampling is used to reduce the impact of less probable
196
+ tokens from the output. A higher value (e.g., 2.0) will reduce the
197
+ impact more, while a value of 1.0 disables this setting. (default: 1)"""
198
+
199
+ top_k: Optional[int] = None
200
+ """Reduces the probability of generating nonsense. A higher value (e.g. 100)
201
+ will give more diverse answers, while a lower value (e.g. 10)
202
+ will be more conservative. (Default: 40)"""
203
+
204
+ top_p: Optional[float] = None
205
+ """Works together with top-k. A higher value (e.g., 0.95) will lead
206
+ to more diverse text, while a lower value (e.g., 0.5) will
207
+ generate more focused and conservative text. (Default: 0.9)"""
208
+
147
209
  model_config = ConfigDict(
148
210
  extra="forbid",
149
211
  )
150
212
 
213
+ @property
214
+ def _default_params(self) -> Dict[str, Any]:
215
+ """Get the default parameters for calling Ollama."""
216
+ return {
217
+ "mirostat": self.mirostat,
218
+ "mirostat_eta": self.mirostat_eta,
219
+ "mirostat_tau": self.mirostat_tau,
220
+ "num_ctx": self.num_ctx,
221
+ "num_gpu": self.num_gpu,
222
+ "num_thread": self.num_thread,
223
+ "repeat_last_n": self.repeat_last_n,
224
+ "repeat_penalty": self.repeat_penalty,
225
+ "temperature": self.temperature,
226
+ "stop": self.stop,
227
+ "tfs_z": self.tfs_z,
228
+ "top_k": self.top_k,
229
+ "top_p": self.top_p,
230
+ }
231
+
151
232
  @model_validator(mode="after")
152
233
  def _set_clients(self) -> Self:
153
234
  """Set clients to use for ollama."""
@@ -158,7 +239,9 @@ class OllamaEmbeddings(BaseModel, Embeddings):
158
239
 
159
240
  def embed_documents(self, texts: List[str]) -> List[List[float]]:
160
241
  """Embed search docs."""
161
- embedded_docs = self._client.embed(self.model, texts)["embeddings"]
242
+ embedded_docs = self._client.embed(
243
+ self.model, texts, options=self._default_params, keep_alive=self.keep_alive
244
+ )["embeddings"]
162
245
  return embedded_docs
163
246
 
164
247
  def embed_query(self, text: str) -> List[float]:
@@ -167,9 +250,11 @@ class OllamaEmbeddings(BaseModel, Embeddings):
167
250
 
168
251
  async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
169
252
  """Embed search docs."""
170
- embedded_docs = (await self._async_client.embed(self.model, texts))[
171
- "embeddings"
172
- ]
253
+ embedded_docs = (
254
+ await self._async_client.embed(
255
+ self.model, texts, keep_alive=self.keep_alive
256
+ )
257
+ )["embeddings"]
173
258
  return embedded_docs
174
259
 
175
260
  async def aembed_query(self, text: str) -> List[float]:
@@ -1,21 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langchain-ollama
3
- Version: 0.2.2rc1
3
+ Version: 0.3.0
4
4
  Summary: An integration package connecting Ollama and LangChain
5
- Home-page: https://github.com/langchain-ai/langchain
6
5
  License: MIT
7
- Requires-Python: >=3.9,<4.0
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.9
11
- Classifier: Programming Language :: Python :: 3.10
12
- Classifier: Programming Language :: Python :: 3.11
13
- Classifier: Programming Language :: Python :: 3.12
14
- Requires-Dist: langchain-core (>=0.3.20,<0.4.0)
15
- Requires-Dist: ollama (>=0.3.0,<1)
16
- Project-URL: Repository, https://github.com/langchain-ai/langchain
17
- Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
18
6
  Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/partners/ollama
7
+ Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-ollama%3D%3D0%22&expanded=true
8
+ Project-URL: repository, https://github.com/langchain-ai/langchain
9
+ Requires-Python: <4.0,>=3.9
10
+ Requires-Dist: ollama<1,>=0.4.4
11
+ Requires-Dist: langchain-core<1.0.0,>=0.3.47
19
12
  Description-Content-Type: text/markdown
20
13
 
21
14
  # langchain-ollama
@@ -62,4 +55,3 @@ from langchain_ollama import OllamaLLM
62
55
  llm = OllamaLLM(model="llama3")
63
56
  llm.invoke("The meaning of life is")
64
57
  ```
65
-
@@ -0,0 +1,10 @@
1
+ langchain_ollama-0.3.0.dist-info/METADATA,sha256=VcLxoKw-32dqWPuJrjPGq2HwweTu_v3ZEtLNIRNUBRc,1463
2
+ langchain_ollama-0.3.0.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
3
+ langchain_ollama-0.3.0.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
4
+ langchain_ollama-0.3.0.dist-info/licenses/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
5
+ langchain_ollama/__init__.py,sha256=SxPRrWcPayJpbwhheTtlqCaPp9ffiAAgZMM5Wc1yYpM,634
6
+ langchain_ollama/chat_models.py,sha256=VMk5GnKiyPQ5TERQDhdSe2uiBOKtCP0GmYlcJs4CC14,49328
7
+ langchain_ollama/embeddings.py,sha256=d0jSB-T8Awv0razTUA_iD-ZvTma82Nw44YtiVu983u0,8633
8
+ langchain_ollama/llms.py,sha256=ojnYU0efhN10xhUINu1dCR2Erw79J_mYS6_l45J7Vls,12778
9
+ langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ langchain_ollama-0.3.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.8.1
2
+ Generator: pdm-backend (2.4.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -0,0 +1,4 @@
1
+ [console_scripts]
2
+
3
+ [gui_scripts]
4
+
@@ -1,9 +0,0 @@
1
- langchain_ollama/__init__.py,sha256=SxPRrWcPayJpbwhheTtlqCaPp9ffiAAgZMM5Wc1yYpM,634
2
- langchain_ollama/chat_models.py,sha256=BS28WEnDBq0aUrlOyABbcMkvIk4C-oV_Zj6bnhQoJkM,29902
3
- langchain_ollama/embeddings.py,sha256=svqdPF44qX5qbFpZmLiXrzTC-AldmMlZRS5wBfY-EmA,5056
4
- langchain_ollama/llms.py,sha256=ojnYU0efhN10xhUINu1dCR2Erw79J_mYS6_l45J7Vls,12778
5
- langchain_ollama/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- langchain_ollama-0.2.2rc1.dist-info/LICENSE,sha256=2btS8uNUDWD_UNjw9ba6ZJt_00aUjEw9CGyK-xIHY8c,1072
7
- langchain_ollama-0.2.2rc1.dist-info/METADATA,sha256=E9wttWytUkVCrJtbUjYA0nMxIt8tTkZOQZDFCU6Z_nc,1828
8
- langchain_ollama-0.2.2rc1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
9
- langchain_ollama-0.2.2rc1.dist-info/RECORD,,