langchain-google-genai 1.0.5__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-google-genai might be problematic. Click here for more details.

Files changed (17) hide show
  1. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/PKG-INFO +3 -3
  2. langchain_google_genai-1.0.7/langchain_google_genai/_function_utils.py +340 -0
  3. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/chat_models.py +81 -8
  4. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/embeddings.py +100 -20
  5. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/llms.py +10 -3
  6. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/pyproject.toml +8 -4
  7. langchain_google_genai-1.0.5/langchain_google_genai/_function_utils.py +0 -237
  8. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/LICENSE +0 -0
  9. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/README.md +0 -0
  10. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/__init__.py +0 -0
  11. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/_common.py +0 -0
  12. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/_enums.py +0 -0
  13. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/_genai_extension.py +0 -0
  14. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/_image_utils.py +0 -0
  15. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/genai_aqa.py +0 -0
  16. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/google_vector_store.py +0 -0
  17. {langchain_google_genai-1.0.5 → langchain_google_genai-1.0.7}/langchain_google_genai/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langchain-google-genai
3
- Version: 1.0.5
3
+ Version: 1.0.7
4
4
  Summary: An integration package connecting Google's genai package and LangChain
5
5
  Home-page: https://github.com/langchain-ai/langchain-google
6
6
  License: MIT
@@ -12,8 +12,8 @@ Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
14
  Provides-Extra: images
15
- Requires-Dist: google-generativeai (>=0.5.2,<0.6.0)
16
- Requires-Dist: langchain-core (>=0.2.0,<0.3)
15
+ Requires-Dist: google-generativeai (>=0.7.0,<0.8.0)
16
+ Requires-Dist: langchain-core (>=0.2.9,<0.3)
17
17
  Requires-Dist: pillow (>=10.1.0,<11.0.0) ; extra == "images"
18
18
  Project-URL: Repository, https://github.com/langchain-ai/langchain-google
19
19
  Project-URL: Source Code, https://github.com/langchain-ai/langchain-google/tree/main/libs/genai
@@ -0,0 +1,340 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import (
4
+ Any,
5
+ Callable,
6
+ Collection,
7
+ Dict,
8
+ List,
9
+ Literal,
10
+ Optional,
11
+ Sequence,
12
+ Type,
13
+ TypedDict,
14
+ Union,
15
+ cast,
16
+ )
17
+
18
+ import google.ai.generativelanguage as glm
19
+ from google.ai.generativelanguage import FunctionCallingConfig, FunctionDeclaration
20
+ from google.ai.generativelanguage import Tool as GoogleTool
21
+ from google.generativeai.types.content_types import ToolDict # type: ignore[import]
22
+ from langchain_core.pydantic_v1 import BaseModel
23
+ from langchain_core.tools import BaseTool
24
+ from langchain_core.tools import tool as callable_as_lc_tool
25
+ from langchain_core.utils.json_schema import dereference_refs
26
+
27
+ TYPE_ENUM = {
28
+ "string": glm.Type.STRING,
29
+ "number": glm.Type.NUMBER,
30
+ "integer": glm.Type.INTEGER,
31
+ "boolean": glm.Type.BOOLEAN,
32
+ "array": glm.Type.ARRAY,
33
+ "object": glm.Type.OBJECT,
34
+ }
35
+
36
+ TYPE_ENUM_REVERSE = {v: k for k, v in TYPE_ENUM.items()}
37
+
38
+
39
+ class _ToolDictLike(TypedDict):
40
+ function_declarations: _FunctionDeclarationLikeList
41
+
42
+
43
+ class _FunctionDeclarationDict(TypedDict):
44
+ name: str
45
+ description: str
46
+ parameters: Dict[str, Collection[str]]
47
+
48
+
49
+ class _ToolDict(TypedDict):
50
+ function_declarations: Sequence[_FunctionDeclarationDict]
51
+
52
+
53
+ # Info: This is a FunctionDeclaration(=fc).
54
+ _FunctionDeclarationLike = Union[
55
+ BaseTool, Type[BaseModel], FunctionDeclaration, Callable, Dict[str, Any]
56
+ ]
57
+
58
+ # Info: This mean one tool.
59
+ _FunctionDeclarationLikeList = Sequence[_FunctionDeclarationLike]
60
+
61
+
62
+ # Info: This means one tool=Sequence of FunctionDeclaration
63
+ # The dict should be GoogleTool like. {"function_declarations": [ { "name": ...}.
64
+ # OpenAI like dict is not be accepted. {{'type': 'function', 'function': {'name': ...}
65
+ _ToolsType = Union[
66
+ GoogleTool,
67
+ ToolDict,
68
+ _ToolDictLike,
69
+ _FunctionDeclarationLikeList,
70
+ _FunctionDeclarationLike,
71
+ ]
72
+
73
+
74
+ #
75
+ # Info: GoogleTool means function_declarations and proto.Message.
76
+ def convert_to_genai_function_declarations(
77
+ tool: _ToolsType,
78
+ ) -> GoogleTool:
79
+ if isinstance(tool, list):
80
+ # multiple _FunctionDeclarationLike
81
+ return GoogleTool(
82
+ function_declarations=_convert_fc_likes_to_genai_function(tool)
83
+ )
84
+ elif isinstance(tool, (BaseTool, FunctionDeclaration)):
85
+ # single _FunctionDeclarationLike
86
+ return GoogleTool(
87
+ function_declarations=[_convert_fc_like_to_genai_function(tool)]
88
+ )
89
+ elif isinstance(tool, type) and issubclass(tool, BaseModel):
90
+ # single _FunctionDeclarationLike
91
+ return GoogleTool(
92
+ function_declarations=[_convert_fc_like_to_genai_function(tool)]
93
+ )
94
+ elif isinstance(tool, GoogleTool):
95
+ return cast(GoogleTool, tool)
96
+ elif callable(tool):
97
+ return GoogleTool(
98
+ function_declarations=[
99
+ _convert_tool_to_genai_function(callable_as_lc_tool()(tool))
100
+ ]
101
+ )
102
+ elif isinstance(tool, dict):
103
+ return GoogleTool(function_declarations=_convert_dict_to_genai_functions(tool)) # type: ignore
104
+ else:
105
+ raise ValueError(f"Unsupported tool type {tool}")
106
+
107
+
108
+ def tool_to_dict(tool: GoogleTool) -> _ToolDict:
109
+ function_declarations = []
110
+ for function_declaration_proto in tool.function_declarations:
111
+ properties: Dict[str, Any] = {}
112
+ for property in function_declaration_proto.parameters.properties:
113
+ property_type = function_declaration_proto.parameters.properties[
114
+ property
115
+ ].type
116
+ property_dict = {"type": TYPE_ENUM_REVERSE[property_type]}
117
+ property_description = function_declaration_proto.parameters.properties[
118
+ property
119
+ ].description
120
+ if property_description:
121
+ property_dict["description"] = property_description
122
+ properties[property] = property_dict
123
+ name = function_declaration_proto.name
124
+ description = function_declaration_proto.description
125
+ parameters = {"type": "object", "properties": properties}
126
+ if function_declaration_proto.parameters.required:
127
+ parameters["required"] = function_declaration_proto.parameters.required
128
+ function_declaration = _FunctionDeclarationDict(
129
+ name=name, description=description, parameters=parameters
130
+ )
131
+ function_declarations.append(function_declaration)
132
+ return {"function_declarations": function_declarations}
133
+
134
+
135
+ def _convert_fc_likes_to_genai_function(
136
+ fc_likes: _FunctionDeclarationLikeList,
137
+ ) -> Sequence[FunctionDeclaration]:
138
+ if isinstance(fc_likes, list):
139
+ return [_convert_fc_like_to_genai_function(fc) for fc in fc_likes]
140
+ raise ValueError(f"Unsupported fc_likes type {fc_likes}")
141
+
142
+
143
+ def _convert_fc_like_to_genai_function(
144
+ fc_like: _FunctionDeclarationLike,
145
+ ) -> FunctionDeclaration:
146
+ if isinstance(fc_like, BaseTool):
147
+ return _convert_tool_to_genai_function(fc_like)
148
+ elif isinstance(fc_like, type) and issubclass(fc_like, BaseModel):
149
+ return _convert_pydantic_to_genai_function(fc_like)
150
+ elif isinstance(fc_like, dict):
151
+ # TODO: add declaration_index
152
+ return _convert_dict_to_genai_function(fc_like)
153
+ elif callable(fc_like):
154
+ return _convert_tool_to_genai_function(callable_as_lc_tool()(fc_like))
155
+ else:
156
+ raise ValueError(f"Unsupported fc_like type {fc_like}")
157
+
158
+
159
+ def _convert_tool_dict_to_genai_functions(
160
+ tool_dict: _ToolDictLike,
161
+ ) -> Sequence[FunctionDeclaration]:
162
+ if "function_declarations" in tool_dict:
163
+ return _convert_dicts_to_genai_functions(tool_dict["function_declarations"]) # type: ignore
164
+ else:
165
+ raise ValueError(f"Unsupported function tool_dict type {tool_dict}")
166
+
167
+
168
+ def _convert_dict_to_genai_functions(
169
+ function_declarations_dict: Dict[str, Any],
170
+ ) -> Sequence[FunctionDeclaration]:
171
+ if "function_declarations" in function_declarations_dict:
172
+ # GoogleTool like
173
+ return [
174
+ _convert_dict_to_genai_function(fc, i)
175
+ for i, fc in enumerate(function_declarations_dict["function_declarations"])
176
+ ]
177
+ d = function_declarations_dict
178
+ if "name" in d and "description" in d and "parameters" in d:
179
+ # _FunctionDeclarationDict
180
+ return [_convert_dict_to_genai_function(d)]
181
+ else:
182
+ # OpenAI like?
183
+ raise ValueError(f"Unsupported function call type {function_declarations_dict}")
184
+
185
+
186
+ def _convert_dicts_to_genai_functions(
187
+ function_declaration_dicts: Sequence[Dict[str, Any]],
188
+ ) -> Sequence[FunctionDeclaration]:
189
+ return [
190
+ _convert_dict_to_genai_function(function_declaration_dict, i)
191
+ for i, function_declaration_dict in enumerate(function_declaration_dicts)
192
+ ]
193
+
194
+
195
+ def _convert_dict_to_genai_function(
196
+ function_declaration_dict: Dict[str, Any], declaration_index: int = 0
197
+ ) -> FunctionDeclaration:
198
+ formatted_fc = {
199
+ "name": function_declaration_dict.get("name", f"unknown-{declaration_index}"),
200
+ "description": function_declaration_dict.get("description", "no-description"),
201
+ }
202
+ if "parameters" in function_declaration_dict:
203
+ formatted_fc["parameters"] = {
204
+ "properties": {
205
+ k: {
206
+ "type_": TYPE_ENUM[v["type"]],
207
+ "description": v.get("description"),
208
+ }
209
+ for k, v in function_declaration_dict["parameters"][
210
+ "properties"
211
+ ].items()
212
+ },
213
+ "required": function_declaration_dict.get("parameters", []).get(
214
+ "required", []
215
+ ),
216
+ "type_": TYPE_ENUM[function_declaration_dict["parameters"]["type"]],
217
+ }
218
+ return FunctionDeclaration(**formatted_fc)
219
+
220
+
221
+ def _convert_tool_to_genai_function(tool: BaseTool) -> FunctionDeclaration:
222
+ if tool.args_schema:
223
+ fc = tool.args_schema
224
+ if isinstance(fc, type) and issubclass(fc, BaseModel):
225
+ return _convert_pydantic_to_genai_function(
226
+ fc, tool_name=tool.name, tool_description=tool.description
227
+ )
228
+ raise ValueError(f"Unsupported function call type {fc}")
229
+ else:
230
+ return FunctionDeclaration(
231
+ name=tool.name,
232
+ description=tool.description,
233
+ parameters={
234
+ "properties": {
235
+ "__arg1": {"type_": TYPE_ENUM["string"]},
236
+ },
237
+ "required": ["__arg1"],
238
+ "type_": TYPE_ENUM["object"],
239
+ },
240
+ )
241
+
242
+
243
+ def _convert_pydantic_to_genai_function(
244
+ pydantic_model: Type[BaseModel],
245
+ tool_name: Optional[str] = None,
246
+ tool_description: Optional[str] = None,
247
+ ) -> FunctionDeclaration:
248
+ schema = dereference_refs(pydantic_model.schema())
249
+ schema.pop("definitions", None)
250
+ function_declaration = FunctionDeclaration(
251
+ name=tool_name if tool_name else schema.get("title"),
252
+ description=tool_description if tool_description else schema.get("description"),
253
+ parameters={
254
+ "properties": {
255
+ k: {
256
+ "type_": _get_type_from_schema(v),
257
+ "description": v.get("description"),
258
+ }
259
+ for k, v in schema["properties"].items()
260
+ },
261
+ "required": schema.get("required", []),
262
+ "type_": TYPE_ENUM[schema["type"]],
263
+ },
264
+ )
265
+ return function_declaration
266
+
267
+
268
+ def _get_type_from_schema(schema: Dict[str, Any]) -> int:
269
+ if "anyOf" in schema:
270
+ types = [_get_type_from_schema(sub_schema) for sub_schema in schema["anyOf"]]
271
+ types = [t for t in types if t is not None] # Remove None values
272
+ if types:
273
+ return types[-1] # TODO: update FunctionDeclaration and pass all types?
274
+ else:
275
+ pass
276
+ elif "type" in schema:
277
+ stype = str(schema["type"])
278
+ if stype in TYPE_ENUM:
279
+ return TYPE_ENUM[stype]
280
+ else:
281
+ pass
282
+ else:
283
+ pass
284
+ return TYPE_ENUM["string"] # Default to string if no valid types found
285
+
286
+
287
+ _ToolChoiceType = Union[
288
+ dict, List[str], str, Literal["auto", "none", "any"], Literal[True]
289
+ ]
290
+
291
+
292
+ class _FunctionCallingConfigDict(TypedDict):
293
+ mode: Union[FunctionCallingConfig.Mode, str]
294
+ allowed_function_names: Optional[List[str]]
295
+
296
+
297
+ class _ToolConfigDict(TypedDict):
298
+ function_calling_config: _FunctionCallingConfigDict
299
+
300
+
301
+ def _tool_choice_to_tool_config(
302
+ tool_choice: _ToolChoiceType,
303
+ all_names: List[str],
304
+ ) -> _ToolConfigDict:
305
+ allowed_function_names: Optional[List[str]] = None
306
+ if tool_choice is True or tool_choice == "any":
307
+ mode = "any"
308
+ allowed_function_names = all_names
309
+ elif tool_choice == "auto":
310
+ mode = "auto"
311
+ elif tool_choice == "none":
312
+ mode = "none"
313
+ elif isinstance(tool_choice, str):
314
+ mode = "any"
315
+ allowed_function_names = [tool_choice]
316
+ elif isinstance(tool_choice, list):
317
+ mode = "any"
318
+ allowed_function_names = tool_choice
319
+ elif isinstance(tool_choice, dict):
320
+ if "mode" in tool_choice:
321
+ mode = tool_choice["mode"]
322
+ allowed_function_names = tool_choice.get("allowed_function_names")
323
+ elif "function_calling_config" in tool_choice:
324
+ mode = tool_choice["function_calling_config"]["mode"]
325
+ allowed_function_names = tool_choice["function_calling_config"].get(
326
+ "allowed_function_names"
327
+ )
328
+ else:
329
+ raise ValueError(
330
+ f"Unrecognized tool choice format:\n\n{tool_choice=}\n\nShould match "
331
+ f"Google GenerativeAI ToolConfig or FunctionCallingConfig format."
332
+ )
333
+ else:
334
+ raise ValueError(f"Unrecognized tool choice format:\n\n{tool_choice=}")
335
+ return _ToolConfigDict(
336
+ function_calling_config={
337
+ "mode": mode,
338
+ "allowed_function_names": allowed_function_names,
339
+ }
340
+ )
@@ -8,6 +8,7 @@ import os
8
8
  import uuid
9
9
  import warnings
10
10
  from io import BytesIO
11
+ from operator import itemgetter
11
12
  from typing import (
12
13
  Any,
13
14
  AsyncIterator,
@@ -19,6 +20,7 @@ from typing import (
19
20
  Optional,
20
21
  Sequence,
21
22
  Tuple,
23
+ Type,
22
24
  Union,
23
25
  cast,
24
26
  )
@@ -51,7 +53,7 @@ from langchain_core.callbacks.manager import (
51
53
  CallbackManagerForLLMRun,
52
54
  )
53
55
  from langchain_core.language_models import LanguageModelInput
54
- from langchain_core.language_models.chat_models import BaseChatModel
56
+ from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
55
57
  from langchain_core.messages import (
56
58
  AIMessage,
57
59
  AIMessageChunk,
@@ -64,10 +66,16 @@ from langchain_core.messages import (
64
66
  ToolCallChunk,
65
67
  ToolMessage,
66
68
  )
67
- from langchain_core.output_parsers.openai_tools import parse_tool_calls
69
+ from langchain_core.messages.ai import UsageMetadata
70
+ from langchain_core.output_parsers.base import OutputParserLike
71
+ from langchain_core.output_parsers.openai_tools import (
72
+ JsonOutputToolsParser,
73
+ PydanticToolsParser,
74
+ parse_tool_calls,
75
+ )
68
76
  from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
69
- from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
70
- from langchain_core.runnables import Runnable
77
+ from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator
78
+ from langchain_core.runnables import Runnable, RunnablePassthrough
71
79
  from langchain_core.utils import get_from_dict_or_env
72
80
  from tenacity import (
73
81
  before_sleep_log,
@@ -526,6 +534,22 @@ def _response_to_result(
526
534
  """Converts a PaLM API response into a LangChain ChatResult."""
527
535
  llm_output = {"prompt_feedback": proto.Message.to_dict(response.prompt_feedback)}
528
536
 
537
+ # Get usage metadata
538
+ try:
539
+ input_tokens = response.usage_metadata.prompt_token_count
540
+ output_tokens = response.usage_metadata.candidates_token_count
541
+ total_tokens = response.usage_metadata.total_token_count
542
+ if input_tokens + output_tokens + total_tokens > 0:
543
+ lc_usage = UsageMetadata(
544
+ input_tokens=input_tokens,
545
+ output_tokens=output_tokens,
546
+ total_tokens=total_tokens,
547
+ )
548
+ else:
549
+ lc_usage = None
550
+ except AttributeError:
551
+ lc_usage = None
552
+
529
553
  generations: List[ChatGeneration] = []
530
554
 
531
555
  for candidate in response.candidates:
@@ -536,9 +560,11 @@ def _response_to_result(
536
560
  proto.Message.to_dict(safety_rating, use_integers_for_enums=False)
537
561
  for safety_rating in candidate.safety_ratings
538
562
  ]
563
+ message = _parse_response_candidate(candidate, streaming=stream)
564
+ message.usage_metadata = lc_usage
539
565
  generations.append(
540
566
  (ChatGenerationChunk if stream else ChatGeneration)(
541
- message=_parse_response_candidate(candidate, streaming=stream),
567
+ message=message,
542
568
  generation_info=generation_info,
543
569
  )
544
570
  )
@@ -593,8 +619,8 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
593
619
 
594
620
  convert_system_message_to_human: bool = False
595
621
  """Whether to merge any leading SystemMessage into the following HumanMessage.
596
-
597
- Gemini does not support system messages; any unsupported messages will
622
+
623
+ Gemini does not support system messages; any unsupported messages will
598
624
  raise an error."""
599
625
 
600
626
  class Config:
@@ -678,6 +704,23 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
678
704
  "safety_settings": self.safety_settings,
679
705
  }
680
706
 
707
+ def _get_ls_params(
708
+ self, stop: Optional[List[str]] = None, **kwargs: Any
709
+ ) -> LangSmithParams:
710
+ """Get standard params for tracing."""
711
+ params = self._get_invocation_params(stop=stop, **kwargs)
712
+ ls_params = LangSmithParams(
713
+ ls_provider="google_genai",
714
+ ls_model_name=self.model,
715
+ ls_model_type="chat",
716
+ ls_temperature=params.get("temperature", self.temperature),
717
+ )
718
+ if ls_max_tokens := params.get("max_output_tokens", self.max_output_tokens):
719
+ ls_params["ls_max_tokens"] = ls_max_tokens
720
+ if ls_stop := stop or params.get("stop", None):
721
+ ls_params["ls_stop"] = ls_stop
722
+ return ls_params
723
+
681
724
  def _prepare_params(
682
725
  self,
683
726
  stop: Optional[List[str]],
@@ -901,6 +944,33 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
901
944
  )
902
945
  return result.total_tokens
903
946
 
947
+ def with_structured_output(
948
+ self,
949
+ schema: Union[Dict, Type[BaseModel]],
950
+ *,
951
+ include_raw: bool = False,
952
+ **kwargs: Any,
953
+ ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
954
+ if kwargs:
955
+ raise ValueError(f"Received unsupported arguments {kwargs}")
956
+ if isinstance(schema, type) and issubclass(schema, BaseModel):
957
+ parser: OutputParserLike = PydanticToolsParser(
958
+ tools=[schema], first_tool_only=True
959
+ )
960
+ else:
961
+ parser = JsonOutputToolsParser()
962
+ llm = self.bind_tools([schema], tool_choice=False)
963
+ if include_raw:
964
+ parser_with_fallback = RunnablePassthrough.assign(
965
+ parsed=itemgetter("raw") | parser, parsing_error=lambda _: None
966
+ ).with_fallbacks(
967
+ [RunnablePassthrough.assign(parsed=lambda _: None)],
968
+ exception_key="parsing_error",
969
+ )
970
+ return {"raw": llm} | parser_with_fallback
971
+ else:
972
+ return llm | parser
973
+
904
974
  def bind_tools(
905
975
  self,
906
976
  tools: Sequence[Union[ToolDict, GoogleTool]],
@@ -927,7 +997,9 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
927
997
  f"both:\n\n{tool_choice=}\n\n{tool_config=}"
928
998
  )
929
999
  # Bind dicts for easier serialization/deserialization.
930
- genai_tools = [tool_to_dict(convert_to_genai_function_declarations(tools))]
1000
+ genai_tools = [
1001
+ tool_to_dict(convert_to_genai_function_declarations(tool)) for tool in tools
1002
+ ]
931
1003
  if tool_choice:
932
1004
  all_names = [
933
1005
  f["name"] # type: ignore[index]
@@ -935,4 +1007,5 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
935
1007
  for f in t["function_declarations"]
936
1008
  ]
937
1009
  tool_config = _tool_choice_to_tool_config(tool_choice, all_names)
1010
+
938
1011
  return self.bind(tools=genai_tools, tool_config=tool_config, **kwargs)
@@ -1,3 +1,5 @@
1
+ import re
2
+ import string
1
3
  from typing import Any, Dict, List, Optional
2
4
 
3
5
  # TODO: remove ignore once the google package is published with types
@@ -15,6 +17,9 @@ from langchain_google_genai._common import (
15
17
  )
16
18
  from langchain_google_genai._genai_extension import build_generative_service
17
19
 
20
+ _MAX_TOKENS_PER_BATCH = 20000
21
+ _DEFAULT_BATCH_SIZE = 100
22
+
18
23
 
19
24
  class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
20
25
  """`Google Generative AI Embeddings`.
@@ -81,6 +86,8 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
81
86
  google_api_key = get_from_dict_or_env(
82
87
  values, "google_api_key", "GOOGLE_API_KEY"
83
88
  )
89
+ if isinstance(google_api_key, SecretStr):
90
+ google_api_key = google_api_key.get_secret_value()
84
91
  client_info = get_client_info("GoogleGenerativeAIEmbeddings")
85
92
 
86
93
  values["client"] = build_generative_service(
@@ -91,6 +98,67 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
91
98
  )
92
99
  return values
93
100
 
101
+ @staticmethod
102
+ def _split_by_punctuation(text: str) -> List[str]:
103
+ """Splits a string by punctuation and whitespace characters."""
104
+ split_by = string.punctuation + "\t\n "
105
+ pattern = f"([{split_by}])"
106
+ # Using re.split to split the text based on the pattern
107
+ return [segment for segment in re.split(pattern, text) if segment]
108
+
109
+ @staticmethod
110
+ def _prepare_batches(texts: List[str], batch_size: int) -> List[List[str]]:
111
+ """Splits texts in batches based on current maximum batch size
112
+ and maximum tokens per request.
113
+ """
114
+ text_index = 0
115
+ texts_len = len(texts)
116
+ batch_token_len = 0
117
+ batches: List[List[str]] = []
118
+ current_batch: List[str] = []
119
+ if texts_len == 0:
120
+ return []
121
+ while text_index < texts_len:
122
+ current_text = texts[text_index]
123
+ # Number of tokens per a text is conservatively estimated
124
+ # as 2 times number of words, punctuation and whitespace characters.
125
+ # Using `count_tokens` API will make batching too expensive.
126
+ # Utilizing a tokenizer, would add a dependency that would not
127
+ # necessarily be reused by the application using this class.
128
+ current_text_token_cnt = (
129
+ len(GoogleGenerativeAIEmbeddings._split_by_punctuation(current_text))
130
+ * 2
131
+ )
132
+ end_of_batch = False
133
+ if current_text_token_cnt > _MAX_TOKENS_PER_BATCH:
134
+ # Current text is too big even for a single batch.
135
+ # Such request will fail, but we still make a batch
136
+ # so that the app can get the error from the API.
137
+ if len(current_batch) > 0:
138
+ # Adding current batch if not empty.
139
+ batches.append(current_batch)
140
+ current_batch = [current_text]
141
+ text_index += 1
142
+ end_of_batch = True
143
+ elif (
144
+ batch_token_len + current_text_token_cnt > _MAX_TOKENS_PER_BATCH
145
+ or len(current_batch) == batch_size
146
+ ):
147
+ end_of_batch = True
148
+ else:
149
+ if text_index == texts_len - 1:
150
+ # Last element - even though the batch may be not big,
151
+ # we still need to make it.
152
+ end_of_batch = True
153
+ batch_token_len += current_text_token_cnt
154
+ current_batch.append(current_text)
155
+ text_index += 1
156
+ if end_of_batch:
157
+ batches.append(current_batch)
158
+ current_batch = []
159
+ batch_token_len = 0
160
+ return batches
161
+
94
162
  def _prepare_request(
95
163
  self,
96
164
  text: str,
@@ -112,12 +180,14 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
112
180
  def embed_documents(
113
181
  self,
114
182
  texts: List[str],
183
+ *,
184
+ batch_size: int = _DEFAULT_BATCH_SIZE,
115
185
  task_type: Optional[str] = None,
116
186
  titles: Optional[List[str]] = None,
117
187
  output_dimensionality: Optional[int] = None,
118
188
  ) -> List[List[float]]:
119
- """Embed a list of strings. Vertex AI currently
120
- sets a max batch size of 5 strings.
189
+ """Embed a list of strings. Google Generative AI currently
190
+ sets a max batch size of 100 strings.
121
191
 
122
192
  Args:
123
193
  texts: List[str] The list of strings to embed.
@@ -127,28 +197,38 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
127
197
  Only applicable when TaskType is RETRIEVAL_DOCUMENT.
128
198
  output_dimensionality: Optional reduced dimension for the output embedding.
129
199
  https://ai.google.dev/api/rest/v1/models/batchEmbedContents#EmbedContentRequest
130
-
131
200
  Returns:
132
201
  List of embeddings, one for each text.
133
202
  """
134
- titles = titles if titles else [None] * len(texts) # type: ignore[list-item]
135
- requests = [
136
- self._prepare_request(
137
- text=text,
138
- task_type=task_type,
139
- title=title,
140
- output_dimensionality=output_dimensionality,
141
- )
142
- for text, title in zip(texts, titles)
143
- ]
203
+ embeddings: List[List[float]] = []
204
+ batch_start_index = 0
205
+ for batch in GoogleGenerativeAIEmbeddings._prepare_batches(texts, batch_size):
206
+ if titles:
207
+ titles_batch = titles[
208
+ batch_start_index : batch_start_index + len(batch)
209
+ ]
210
+ batch_start_index += len(batch)
211
+ else:
212
+ titles_batch = [None] * len(batch) # type: ignore[list-item]
144
213
 
145
- try:
146
- result = self.client.batch_embed_contents(
147
- BatchEmbedContentsRequest(requests=requests, model=self.model)
148
- )
149
- except Exception as e:
150
- raise GoogleGenerativeAIError(f"Error embedding content: {e}") from e
151
- return [e.values for e in result.embeddings]
214
+ requests = [
215
+ self._prepare_request(
216
+ text=text,
217
+ task_type=task_type,
218
+ title=title,
219
+ output_dimensionality=output_dimensionality,
220
+ )
221
+ for text, title in zip(batch, titles_batch)
222
+ ]
223
+
224
+ try:
225
+ result = self.client.batch_embed_contents(
226
+ BatchEmbedContentsRequest(requests=requests, model=self.model)
227
+ )
228
+ except Exception as e:
229
+ raise GoogleGenerativeAIError(f"Error embedding content: {e}") from e
230
+ embeddings.extend([list(e.values) for e in result.embeddings])
231
+ return embeddings
152
232
 
153
233
  def embed_query(
154
234
  self,
@@ -325,9 +325,16 @@ class GoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseLLM):
325
325
  run_manager: Optional[CallbackManagerForLLMRun] = None,
326
326
  **kwargs: Any,
327
327
  ) -> Iterator[GenerationChunk]:
328
- generation_config = kwargs.get("generation_config", {})
329
- if stop:
330
- generation_config["stop_sequences"] = stop
328
+ generation_config = {
329
+ "stop_sequences": stop,
330
+ "temperature": self.temperature,
331
+ "top_p": self.top_p,
332
+ "top_k": self.top_k,
333
+ "max_output_tokens": self.max_output_tokens,
334
+ "candidate_count": self.n,
335
+ }
336
+ generation_config = generation_config | kwargs.get("generation_config", {})
337
+
331
338
  for stream_resp in _completion_with_retry(
332
339
  self,
333
340
  prompt,
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "langchain-google-genai"
3
- version = "1.0.5"
3
+ version = "1.0.7"
4
4
  description = "An integration package connecting Google's genai package and LangChain"
5
5
  authors = []
6
6
  readme = "README.md"
@@ -12,8 +12,8 @@ license = "MIT"
12
12
 
13
13
  [tool.poetry.dependencies]
14
14
  python = ">=3.9,<4.0"
15
- langchain-core = ">=0.2.0,<0.3"
16
- google-generativeai = "^0.5.2"
15
+ langchain-core = ">=0.2.9,<0.3"
16
+ google-generativeai = "^0.7.0"
17
17
  pillow = { version = "^10.1.0", optional = true }
18
18
 
19
19
  [tool.poetry.extras]
@@ -32,6 +32,9 @@ pytest-asyncio = "^0.21.1"
32
32
  numpy = "^1.26.2"
33
33
  langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
34
34
 
35
+ [tool.codespell]
36
+ ignore-words-list = "rouge"
37
+
35
38
  [tool.poetry.group.codespell]
36
39
  optional = true
37
40
 
@@ -58,6 +61,7 @@ types-google-cloud-ndb = "^2.2.0.1"
58
61
  types-pillow = "^10.1.0.2"
59
62
  types-protobuf = "^4.24.0.20240302"
60
63
  langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
64
+ numpy = "^1.26.2"
61
65
 
62
66
  [tool.poetry.group.dev]
63
67
  optional = true
@@ -69,7 +73,7 @@ types-pillow = "^10.1.0.2"
69
73
  types-google-cloud-ndb = "^2.2.0.1"
70
74
  langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
71
75
 
72
- [tool.ruff]
76
+ [tool.ruff.lint]
73
77
  select = [
74
78
  "E", # pycodestyle
75
79
  "F", # pyflakes
@@ -1,237 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import (
4
- Any,
5
- Callable,
6
- Dict,
7
- List,
8
- Literal,
9
- Optional,
10
- Sequence,
11
- Type,
12
- TypedDict,
13
- Union,
14
- cast,
15
- )
16
-
17
- import google.ai.generativelanguage as glm
18
- from google.ai.generativelanguage import (
19
- FunctionCallingConfig,
20
- FunctionDeclaration,
21
- )
22
- from google.ai.generativelanguage import (
23
- Tool as GoogleTool,
24
- )
25
- from langchain_core.pydantic_v1 import BaseModel
26
- from langchain_core.tools import BaseTool
27
- from langchain_core.tools import tool as callable_as_lc_tool
28
- from langchain_core.utils.json_schema import dereference_refs
29
-
30
- TYPE_ENUM = {
31
- "string": glm.Type.STRING,
32
- "number": glm.Type.NUMBER,
33
- "integer": glm.Type.INTEGER,
34
- "boolean": glm.Type.BOOLEAN,
35
- "array": glm.Type.ARRAY,
36
- "object": glm.Type.OBJECT,
37
- }
38
-
39
- TYPE_ENUM_REVERSE = {v: k for k, v in TYPE_ENUM.items()}
40
-
41
- _FunctionDeclarationLike = Union[
42
- BaseTool, Type[BaseModel], dict, Callable, FunctionDeclaration
43
- ]
44
-
45
-
46
- class _ToolDict(TypedDict):
47
- function_declarations: Sequence[_FunctionDeclarationLike]
48
-
49
-
50
- def convert_to_genai_function_declarations(
51
- tool: Union[
52
- GoogleTool,
53
- _ToolDict,
54
- _FunctionDeclarationLike,
55
- Sequence[_FunctionDeclarationLike],
56
- ],
57
- ) -> GoogleTool:
58
- if isinstance(tool, GoogleTool):
59
- return cast(GoogleTool, tool)
60
- if isinstance(tool, type) and issubclass(tool, BaseModel):
61
- return GoogleTool(function_declarations=[_convert_to_genai_function(tool)])
62
- if callable(tool):
63
- return _convert_tool_to_genai_function(callable_as_lc_tool()(tool))
64
- if isinstance(tool, list):
65
- return convert_to_genai_function_declarations({"function_declarations": tool})
66
- if isinstance(tool, dict) and "function_declarations" in tool:
67
- return GoogleTool(
68
- function_declarations=[
69
- _convert_to_genai_function(fc) for fc in tool["function_declarations"]
70
- ],
71
- )
72
- return GoogleTool(function_declarations=[_convert_to_genai_function(tool)]) # type: ignore[arg-type]
73
-
74
-
75
- def tool_to_dict(tool: GoogleTool) -> _ToolDict:
76
- function_declarations = []
77
- for function_declaration_proto in tool.function_declarations:
78
- properties: Dict[str, Any] = {}
79
- for property in function_declaration_proto.parameters.properties:
80
- property_type = function_declaration_proto.parameters.properties[
81
- property
82
- ].type
83
- property_dict = {"type": TYPE_ENUM_REVERSE[property_type]}
84
- property_description = function_declaration_proto.parameters.properties[
85
- property
86
- ].description
87
- if property_description:
88
- property_dict["description"] = property_description
89
- properties[property] = property_dict
90
- function_declaration = {
91
- "name": function_declaration_proto.name,
92
- "description": function_declaration_proto.description,
93
- "parameters": {"type": "object", "properties": properties},
94
- }
95
- if function_declaration_proto.parameters.required:
96
- function_declaration["parameters"][ # type: ignore[index]
97
- "required"
98
- ] = function_declaration_proto.parameters.required
99
- function_declarations.append(function_declaration)
100
- return {"function_declarations": function_declarations}
101
-
102
-
103
- def _convert_to_genai_function(fc: _FunctionDeclarationLike) -> FunctionDeclaration:
104
- if isinstance(fc, BaseTool):
105
- return _convert_tool_to_genai_function(fc)
106
- elif isinstance(fc, type) and issubclass(fc, BaseModel):
107
- return _convert_pydantic_to_genai_function(fc)
108
- elif callable(fc):
109
- return _convert_tool_to_genai_function(callable_as_lc_tool()(fc))
110
- elif isinstance(fc, dict):
111
- formatted_fc = {"name": fc["name"], "description": fc.get("description")}
112
- if "parameters" in fc:
113
- formatted_fc["parameters"] = {
114
- "properties": {
115
- k: {
116
- "type_": TYPE_ENUM[v["type"]],
117
- "description": v.get("description"),
118
- }
119
- for k, v in fc["parameters"]["properties"].items()
120
- },
121
- "required": fc.get("parameters", []).get("required", []),
122
- "type_": TYPE_ENUM[fc["parameters"]["type"]],
123
- }
124
- return FunctionDeclaration(**formatted_fc)
125
- else:
126
- raise ValueError(f"Unsupported function call type {fc}")
127
-
128
-
129
- def _convert_tool_to_genai_function(tool: BaseTool) -> FunctionDeclaration:
130
- if tool.args_schema:
131
- schema = dereference_refs(tool.args_schema.schema())
132
- schema.pop("definitions", None)
133
- return FunctionDeclaration(
134
- name=tool.name or schema["title"],
135
- description=tool.description or schema["description"],
136
- parameters={
137
- "properties": {
138
- k: {
139
- "type_": TYPE_ENUM[v["type"]],
140
- "description": v.get("description"),
141
- }
142
- for k, v in schema["properties"].items()
143
- },
144
- "required": schema.get("required", []),
145
- "type_": TYPE_ENUM[schema["type"]],
146
- },
147
- )
148
- else:
149
- return FunctionDeclaration(
150
- name=tool.name,
151
- description=tool.description,
152
- parameters={
153
- "properties": {
154
- "__arg1": {"type_": TYPE_ENUM["string"]},
155
- },
156
- "required": ["__arg1"],
157
- "type_": TYPE_ENUM["object"],
158
- },
159
- )
160
-
161
-
162
- def _convert_pydantic_to_genai_function(
163
- pydantic_model: Type[BaseModel],
164
- ) -> FunctionDeclaration:
165
- schema = dereference_refs(pydantic_model.schema())
166
- schema.pop("definitions", None)
167
- return FunctionDeclaration(
168
- name=schema["title"],
169
- description=schema.get("description", ""),
170
- parameters={
171
- "properties": {
172
- k: {
173
- "type_": TYPE_ENUM[v["type"]],
174
- "description": v.get("description"),
175
- }
176
- for k, v in schema["properties"].items()
177
- },
178
- "required": schema["required"],
179
- "type_": TYPE_ENUM[schema["type"]],
180
- },
181
- )
182
-
183
-
184
- _ToolChoiceType = Union[
185
- dict, List[str], str, Literal["auto", "none", "any"], Literal[True]
186
- ]
187
-
188
-
189
- class _FunctionCallingConfigDict(TypedDict):
190
- mode: Union[FunctionCallingConfig.Mode, str]
191
- allowed_function_names: Optional[List[str]]
192
-
193
-
194
- class _ToolConfigDict(TypedDict):
195
- function_calling_config: _FunctionCallingConfigDict
196
-
197
-
198
- def _tool_choice_to_tool_config(
199
- tool_choice: _ToolChoiceType,
200
- all_names: List[str],
201
- ) -> _ToolConfigDict:
202
- allowed_function_names: Optional[List[str]] = None
203
- if tool_choice is True or tool_choice == "any":
204
- mode = "any"
205
- allowed_function_names = all_names
206
- elif tool_choice == "auto":
207
- mode = "auto"
208
- elif tool_choice == "none":
209
- mode = "none"
210
- elif isinstance(tool_choice, str):
211
- mode = "any"
212
- allowed_function_names = [tool_choice]
213
- elif isinstance(tool_choice, list):
214
- mode = "any"
215
- allowed_function_names = tool_choice
216
- elif isinstance(tool_choice, dict):
217
- if "mode" in tool_choice:
218
- mode = tool_choice["mode"]
219
- allowed_function_names = tool_choice.get("allowed_function_names")
220
- elif "function_calling_config" in tool_choice:
221
- mode = tool_choice["function_calling_config"]["mode"]
222
- allowed_function_names = tool_choice["function_calling_config"].get(
223
- "allowed_function_names"
224
- )
225
- else:
226
- raise ValueError(
227
- f"Unrecognized tool choice format:\n\n{tool_choice=}\n\nShould match "
228
- f"Google GenerativeAI ToolConfig or FunctionCallingConfig format."
229
- )
230
- else:
231
- raise ValueError(f"Unrecognized tool choice format:\n\n{tool_choice=}")
232
- return _ToolConfigDict(
233
- function_calling_config={
234
- "mode": mode,
235
- "allowed_function_names": allowed_function_names,
236
- }
237
- )