langchain-google-genai 2.1.5__py3-none-any.whl → 2.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-google-genai might be problematic. Click here for more details.

@@ -30,6 +30,7 @@ from langchain_core.utils.function_calling import (
30
30
  from langchain_core.utils.json_schema import dereference_refs
31
31
  from pydantic import BaseModel
32
32
  from pydantic.v1 import BaseModel as BaseModelV1
33
+ from typing_extensions import NotRequired
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
@@ -65,11 +66,15 @@ _GoogleSearchRetrievalLike = Union[
65
66
  gapic.GoogleSearchRetrieval,
66
67
  Dict[str, Any],
67
68
  ]
69
+ _GoogleSearchLike = Union[gapic.Tool.GoogleSearch, Dict[str, Any]]
70
+ _CodeExecutionLike = Union[gapic.CodeExecution, Dict[str, Any]]
68
71
 
69
72
 
70
73
  class _ToolDict(TypedDict):
71
74
  function_declarations: Sequence[_FunctionDeclarationLike]
72
75
  google_search_retrieval: Optional[_GoogleSearchRetrievalLike]
76
+ google_search: NotRequired[_GoogleSearchLike]
77
+ code_execution: NotRequired[_CodeExecutionLike]
73
78
 
74
79
 
75
80
  # Info: This means one tool=Sequence of FunctionDeclaration
@@ -158,6 +163,8 @@ def convert_to_genai_function_declarations(
158
163
  for f in [
159
164
  "function_declarations",
160
165
  "google_search_retrieval",
166
+ "google_search",
167
+ "code_execution",
161
168
  ]
162
169
  ):
163
170
  fd = _format_to_gapic_function_declaration(tool) # type: ignore[arg-type]
@@ -184,6 +191,12 @@ def convert_to_genai_function_declarations(
184
191
  gapic_tool.google_search_retrieval = gapic.GoogleSearchRetrieval(
185
192
  tool["google_search_retrieval"]
186
193
  )
194
+ if "google_search" in tool:
195
+ gapic_tool.google_search = gapic.Tool.GoogleSearch(
196
+ tool["google_search"]
197
+ )
198
+ if "code_execution" in tool:
199
+ gapic_tool.code_execution = gapic.CodeExecution(tool["code_execution"])
187
200
  else:
188
201
  fd = _format_to_gapic_function_declaration(tool) # type: ignore[arg-type]
189
202
  gapic_tool.function_declarations.append(fd)
@@ -520,3 +533,60 @@ def safe_import(module_name: str, attribute_name: str = "") -> bool:
520
533
  return True
521
534
  except ImportError:
522
535
  return False
536
+
537
+
538
+ def replace_defs_in_schema(original_schema: dict, defs: Optional[dict] = None) -> dict:
539
+ """Given an OpenAPI schema with a property '$defs' replaces all occurrences of
540
+ referenced items in the dictionary.
541
+
542
+ Args:
543
+ original_schema: Schema generated by `BaseModel.model_schema_json`
544
+ defs: Definitions for recursive calls.
545
+
546
+ Returns:
547
+ Schema with refs replaced.
548
+ """
549
+
550
+ new_defs = defs or original_schema.get("$defs")
551
+
552
+ if new_defs is None or not isinstance(new_defs, dict):
553
+ return original_schema.copy()
554
+
555
+ resulting_schema = {}
556
+
557
+ for key, value in original_schema.items():
558
+ if key == "$defs":
559
+ continue
560
+
561
+ if not isinstance(value, dict):
562
+ resulting_schema[key] = value
563
+ else:
564
+ if "$ref" in value:
565
+ new_value = value.copy()
566
+
567
+ path = new_value.pop("$ref")
568
+ def_key = _get_def_key_from_schema_path(path)
569
+ new_item = new_defs.get(def_key)
570
+
571
+ assert isinstance(new_item, dict)
572
+ new_value.update(new_item)
573
+
574
+ resulting_schema[key] = replace_defs_in_schema(new_value, defs=new_defs)
575
+ else:
576
+ resulting_schema[key] = replace_defs_in_schema(value, defs=new_defs)
577
+
578
+ return resulting_schema
579
+
580
+
581
+ def _get_def_key_from_schema_path(schema_path: str) -> str:
582
+ error_message = f"Malformed schema reference path {schema_path}"
583
+
584
+ if not isinstance(schema_path, str) or not schema_path.startswith("#/$defs/"):
585
+ raise ValueError(error_message)
586
+
587
+ # Schema has to have only one extra level.
588
+ parts = schema_path.split("/")
589
+ if len(parts) != 3:
590
+ raise ValueError(error_message)
591
+
592
+ return parts[-1]
@@ -2,11 +2,13 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import base64
5
+ import io
5
6
  import json
6
7
  import logging
7
8
  import mimetypes
8
9
  import uuid
9
10
  import warnings
11
+ import wave
10
12
  from difflib import get_close_matches
11
13
  from operator import itemgetter
12
14
  from typing import (
@@ -16,6 +18,7 @@ from typing import (
16
18
  Dict,
17
19
  Iterator,
18
20
  List,
21
+ Literal,
19
22
  Mapping,
20
23
  Optional,
21
24
  Sequence,
@@ -37,7 +40,9 @@ from google.ai.generativelanguage_v1beta.types import (
37
40
  Blob,
38
41
  Candidate,
39
42
  CodeExecution,
43
+ CodeExecutionResult,
40
44
  Content,
45
+ ExecutableCode,
41
46
  FileData,
42
47
  FunctionCall,
43
48
  FunctionDeclaration,
@@ -69,6 +74,7 @@ from langchain_core.messages import (
69
74
  )
70
75
  from langchain_core.messages.ai import UsageMetadata
71
76
  from langchain_core.messages.tool import invalid_tool_call, tool_call, tool_call_chunk
77
+ from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
72
78
  from langchain_core.output_parsers.base import OutputParserLike
73
79
  from langchain_core.output_parsers.openai_tools import (
74
80
  JsonOutputKeyToolsParser,
@@ -79,7 +85,11 @@ from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResu
79
85
  from langchain_core.runnables import Runnable, RunnableConfig, RunnablePassthrough
80
86
  from langchain_core.tools import BaseTool
81
87
  from langchain_core.utils import get_pydantic_field_names
82
- from langchain_core.utils.function_calling import convert_to_openai_tool
88
+ from langchain_core.utils.function_calling import (
89
+ convert_to_json_schema,
90
+ convert_to_openai_tool,
91
+ )
92
+ from langchain_core.utils.pydantic import is_basemodel_subclass
83
93
  from langchain_core.utils.utils import _build_model_kwargs
84
94
  from pydantic import (
85
95
  BaseModel,
@@ -88,6 +98,7 @@ from pydantic import (
88
98
  SecretStr,
89
99
  model_validator,
90
100
  )
101
+ from pydantic.v1 import BaseModel as BaseModelV1
91
102
  from tenacity import (
92
103
  before_sleep_log,
93
104
  retry,
@@ -104,12 +115,14 @@ from langchain_google_genai._common import (
104
115
  get_client_info,
105
116
  )
106
117
  from langchain_google_genai._function_utils import (
118
+ _dict_to_gapic_schema,
107
119
  _tool_choice_to_tool_config,
108
120
  _ToolChoiceType,
109
121
  _ToolConfigDict,
110
122
  _ToolDict,
111
123
  convert_to_genai_function_declarations,
112
124
  is_basemodel_subclass_safe,
125
+ replace_defs_in_schema,
113
126
  tool_to_dict,
114
127
  )
115
128
  from langchain_google_genai._image_utils import (
@@ -121,6 +134,7 @@ from . import _genai_extension as genaix
121
134
 
122
135
  logger = logging.getLogger(__name__)
123
136
 
137
+ _allowed_params_prediction_service = ["request", "timeout", "metadata", "labels"]
124
138
 
125
139
  _FunctionDeclarationType = Union[
126
140
  FunctionDeclaration,
@@ -207,7 +221,14 @@ def _chat_with_retry(generation_method: Callable, **kwargs: Any) -> Any:
207
221
  except Exception as e:
208
222
  raise e
209
223
 
210
- return _chat_with_retry(**kwargs)
224
+ params = (
225
+ {k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service}
226
+ if (request := kwargs.get("request"))
227
+ and hasattr(request, "model")
228
+ and "gemini" in request.model
229
+ else kwargs
230
+ )
231
+ return _chat_with_retry(**params)
211
232
 
212
233
 
213
234
  async def _achat_with_retry(generation_method: Callable, **kwargs: Any) -> Any:
@@ -240,7 +261,14 @@ async def _achat_with_retry(generation_method: Callable, **kwargs: Any) -> Any:
240
261
  except Exception as e:
241
262
  raise e
242
263
 
243
- return await _achat_with_retry(**kwargs)
264
+ params = (
265
+ {k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service}
266
+ if (request := kwargs.get("request"))
267
+ and hasattr(request, "model")
268
+ and "gemini" in request.model
269
+ else kwargs
270
+ )
271
+ return await _achat_with_retry(**params)
244
272
 
245
273
 
246
274
  def _is_lc_content_block(part: dict) -> bool:
@@ -331,6 +359,37 @@ def _convert_to_parts(
331
359
  metadata = VideoMetadata(part["video_metadata"])
332
360
  media_part.video_metadata = metadata
333
361
  parts.append(media_part)
362
+ elif part["type"] == "executable_code":
363
+ if "executable_code" not in part or "language" not in part:
364
+ raise ValueError(
365
+ "Executable code part must have 'code' and 'language' "
366
+ f"keys, got {part}"
367
+ )
368
+ executable_code_part = Part(
369
+ executable_code=ExecutableCode(
370
+ language=part["language"], code=part["executable_code"]
371
+ )
372
+ )
373
+ parts.append(executable_code_part)
374
+ elif part["type"] == "code_execution_result":
375
+ if "code_execution_result" not in part:
376
+ raise ValueError(
377
+ "Code execution result part must have "
378
+ f"'code_execution_result', got {part}"
379
+ )
380
+ if "outcome" in part:
381
+ outcome = part["outcome"]
382
+ else:
383
+ # Backward compatibility
384
+ outcome = 1 # Default to success if not specified
385
+ code_execution_result_part = Part(
386
+ code_execution_result=CodeExecutionResult(
387
+ output=part["code_execution_result"], outcome=outcome
388
+ )
389
+ )
390
+ parts.append(code_execution_result_part)
391
+ elif part["type"] == "thinking":
392
+ parts.append(Part(text=part["thinking"], thought=True))
334
393
  else:
335
394
  raise ValueError(
336
395
  f"Unrecognized message part type: {part['type']}. Only text, "
@@ -486,47 +545,54 @@ def _parse_chat_history(
486
545
  return system_instruction, messages
487
546
 
488
547
 
548
+ # Helper function to append content consistently
549
+ def _append_to_content(
550
+ current_content: Union[str, List[Any], None], new_item: Any
551
+ ) -> Union[str, List[Any]]:
552
+ """Appends a new item to the content, handling different initial content types."""
553
+ if current_content is None and isinstance(new_item, str):
554
+ return new_item
555
+ elif current_content is None:
556
+ return [new_item]
557
+ elif isinstance(current_content, str):
558
+ return [current_content, new_item]
559
+ elif isinstance(current_content, list):
560
+ current_content.append(new_item)
561
+ return current_content
562
+ else:
563
+ # This case should ideally not be reached with proper type checking,
564
+ # but it catches any unexpected types that might slip through.
565
+ raise TypeError(f"Unexpected content type: {type(current_content)}")
566
+
567
+
489
568
  def _parse_response_candidate(
490
569
  response_candidate: Candidate, streaming: bool = False
491
570
  ) -> AIMessage:
492
571
  content: Union[None, str, List[Union[str, dict]]] = None
493
- additional_kwargs = {}
572
+ additional_kwargs: Dict[str, Any] = {}
494
573
  tool_calls = []
495
574
  invalid_tool_calls = []
496
575
  tool_call_chunks = []
497
576
 
498
577
  for part in response_candidate.content.parts:
578
+ text: Optional[str] = None
499
579
  try:
500
- text: Optional[str] = part.text
501
- # Remove erroneous newline character if present
502
- if not streaming and text is not None:
503
- text = text.rstrip("\n")
580
+ if hasattr(part, "text") and part.text is not None:
581
+ text = part.text
582
+ # Remove erroneous newline character if present
583
+ if not streaming:
584
+ text = text.rstrip("\n")
504
585
  except AttributeError:
505
- text = None
586
+ pass
506
587
 
507
- if part.thought:
588
+ if hasattr(part, "thought") and part.thought:
508
589
  thinking_message = {
509
590
  "type": "thinking",
510
591
  "thinking": part.text,
511
592
  }
512
- if not content:
513
- content = [thinking_message]
514
- elif isinstance(content, str):
515
- content = [thinking_message, content]
516
- elif isinstance(content, list):
517
- content.append(thinking_message)
518
- else:
519
- raise Exception("Unexpected content type")
520
-
521
- elif text is not None:
522
- if not content:
523
- content = text
524
- elif isinstance(content, str) and text:
525
- content = [content, text]
526
- elif isinstance(content, list) and text:
527
- content.append(text)
528
- elif text:
529
- raise Exception("Unexpected content type")
593
+ content = _append_to_content(content, thinking_message)
594
+ elif text is not None and text:
595
+ content = _append_to_content(content, text)
530
596
 
531
597
  if hasattr(part, "executable_code") and part.executable_code is not None:
532
598
  if part.executable_code.code and part.executable_code.language:
@@ -535,14 +601,7 @@ def _parse_response_candidate(
535
601
  "executable_code": part.executable_code.code,
536
602
  "language": part.executable_code.language,
537
603
  }
538
- if not content:
539
- content = [code_message]
540
- elif isinstance(content, str):
541
- content = [content, code_message]
542
- elif isinstance(content, list):
543
- content.append(code_message)
544
- else:
545
- raise Exception("Unexpected content type")
604
+ content = _append_to_content(content, code_message)
546
605
 
547
606
  if (
548
607
  hasattr(part, "code_execution_result")
@@ -552,20 +611,25 @@ def _parse_response_candidate(
552
611
  execution_result = {
553
612
  "type": "code_execution_result",
554
613
  "code_execution_result": part.code_execution_result.output,
614
+ "outcome": part.code_execution_result.outcome,
555
615
  }
616
+ content = _append_to_content(content, execution_result)
556
617
 
557
- if not content:
558
- content = [execution_result]
559
- elif isinstance(content, str):
560
- content = [content, execution_result]
561
- elif isinstance(content, list):
562
- content.append(execution_result)
563
- else:
564
- raise Exception("Unexpected content type")
618
+ if part.inline_data.mime_type.startswith("audio/"):
619
+ buffer = io.BytesIO()
620
+
621
+ with wave.open(buffer, "wb") as wf:
622
+ wf.setnchannels(1)
623
+ wf.setsampwidth(2)
624
+ # TODO: Read Sample Rate from MIME content type.
625
+ wf.setframerate(24000)
626
+ wf.writeframes(part.inline_data.data)
627
+
628
+ additional_kwargs["audio"] = buffer.getvalue()
565
629
 
566
630
  if part.inline_data.mime_type.startswith("image/"):
567
631
  image_format = part.inline_data.mime_type[6:]
568
- message = {
632
+ image_message = {
569
633
  "type": "image_url",
570
634
  "image_url": {
571
635
  "url": image_bytes_to_b64_string(
@@ -573,15 +637,7 @@ def _parse_response_candidate(
573
637
  )
574
638
  },
575
639
  }
576
-
577
- if not content:
578
- content = [message]
579
- elif isinstance(content, str) and message:
580
- content = [content, message]
581
- elif isinstance(content, list) and message:
582
- content.append(message)
583
- elif message:
584
- raise Exception("Unexpected content type")
640
+ content = _append_to_content(content, image_message)
585
641
 
586
642
  if part.function_call:
587
643
  function_call = {"name": part.function_call.name}
@@ -1074,6 +1130,21 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1074
1130
  Gemini does not support system messages; any unsupported messages will
1075
1131
  raise an error."""
1076
1132
 
1133
+ response_mime_type: Optional[str] = None
1134
+ """Optional. Output response mimetype of the generated candidate text. Only
1135
+ supported in Gemini 1.5 and later models. Supported mimetype:
1136
+ * "text/plain": (default) Text output.
1137
+ * "application/json": JSON response in the candidates.
1138
+ * "text/x.enum": Enum in plain text.
1139
+ The model also needs to be prompted to output the appropriate response
1140
+ type, otherwise the behavior is undefined. This is a preview feature.
1141
+ """
1142
+
1143
+ response_schema: Optional[Dict[str, Any]] = None
1144
+ """ Optional. Enforce an schema to the output.
1145
+ The format of the dictionary should follow Open API schema.
1146
+ """
1147
+
1077
1148
  cached_content: Optional[str] = None
1078
1149
  """The name of the cached content used as context to serve the prediction.
1079
1150
 
@@ -1281,6 +1352,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1281
1352
  self,
1282
1353
  stop: Optional[List[str]],
1283
1354
  generation_config: Optional[Dict[str, Any]] = None,
1355
+ **kwargs: Any,
1284
1356
  ) -> GenerationConfig:
1285
1357
  gen_config = {
1286
1358
  k: v
@@ -1311,6 +1383,24 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1311
1383
  }
1312
1384
  if generation_config:
1313
1385
  gen_config = {**gen_config, **generation_config}
1386
+
1387
+ response_mime_type = kwargs.get("response_mime_type", self.response_mime_type)
1388
+ if response_mime_type is not None:
1389
+ gen_config["response_mime_type"] = response_mime_type
1390
+
1391
+ response_schema = kwargs.get("response_schema", self.response_schema)
1392
+ if response_schema is not None:
1393
+ allowed_mime_types = ("application/json", "text/x.enum")
1394
+ if response_mime_type not in allowed_mime_types:
1395
+ error_message = (
1396
+ "`response_schema` is only supported when "
1397
+ f"`response_mime_type` is set to one of {allowed_mime_types}"
1398
+ )
1399
+ raise ValueError(error_message)
1400
+
1401
+ gapic_response_schema = _dict_to_gapic_schema(response_schema)
1402
+ if gapic_response_schema is not None:
1403
+ gen_config["response_schema"] = gapic_response_schema
1314
1404
  return GenerationConfig(**gen_config)
1315
1405
 
1316
1406
  def _generate(
@@ -1338,6 +1428,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1338
1428
  generation_config=generation_config,
1339
1429
  cached_content=cached_content or self.cached_content,
1340
1430
  tool_choice=tool_choice,
1431
+ **kwargs,
1341
1432
  )
1342
1433
  response: GenerateContentResponse = _chat_with_retry(
1343
1434
  request=request,
@@ -1387,6 +1478,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1387
1478
  generation_config=generation_config,
1388
1479
  cached_content=cached_content or self.cached_content,
1389
1480
  tool_choice=tool_choice,
1481
+ **kwargs,
1390
1482
  )
1391
1483
  response: GenerateContentResponse = await _achat_with_retry(
1392
1484
  request=request,
@@ -1421,6 +1513,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1421
1513
  generation_config=generation_config,
1422
1514
  cached_content=cached_content or self.cached_content,
1423
1515
  tool_choice=tool_choice,
1516
+ **kwargs,
1424
1517
  )
1425
1518
  response: GenerateContentResponse = _chat_with_retry(
1426
1519
  request=request,
@@ -1499,6 +1592,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1499
1592
  generation_config=generation_config,
1500
1593
  cached_content=cached_content or self.cached_content,
1501
1594
  tool_choice=tool_choice,
1595
+ **kwargs,
1502
1596
  )
1503
1597
  prev_usage_metadata: UsageMetadata | None = None
1504
1598
  async for chunk in await _achat_with_retry(
@@ -1546,6 +1640,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1546
1640
  tool_choice: Optional[Union[_ToolChoiceType, bool]] = None,
1547
1641
  generation_config: Optional[Dict[str, Any]] = None,
1548
1642
  cached_content: Optional[str] = None,
1643
+ **kwargs: Any,
1549
1644
  ) -> Tuple[GenerateContentRequest, Dict[str, Any]]:
1550
1645
  if tool_choice and tool_config:
1551
1646
  raise ValueError(
@@ -1617,7 +1712,9 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1617
1712
  tool_config=formatted_tool_config,
1618
1713
  safety_settings=formatted_safety_settings,
1619
1714
  generation_config=self._prepare_params(
1620
- stop, generation_config=generation_config
1715
+ stop,
1716
+ generation_config=generation_config,
1717
+ **kwargs,
1621
1718
  ),
1622
1719
  cached_content=cached_content,
1623
1720
  )
@@ -1645,33 +1742,65 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1645
1742
  def with_structured_output(
1646
1743
  self,
1647
1744
  schema: Union[Dict, Type[BaseModel]],
1745
+ method: Optional[Literal["function_calling", "json_mode"]] = "function_calling",
1648
1746
  *,
1649
1747
  include_raw: bool = False,
1650
1748
  **kwargs: Any,
1651
1749
  ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
1652
- _ = kwargs.pop("method", None)
1653
1750
  _ = kwargs.pop("strict", None)
1654
1751
  if kwargs:
1655
1752
  raise ValueError(f"Received unsupported arguments {kwargs}")
1656
- tool_name = _get_tool_name(schema) # type: ignore[arg-type]
1657
- if isinstance(schema, type) and is_basemodel_subclass_safe(schema):
1658
- parser: OutputParserLike = PydanticToolsParser(
1659
- tools=[schema], first_tool_only=True
1660
- )
1661
- else:
1662
- parser = JsonOutputKeyToolsParser(key_name=tool_name, first_tool_only=True)
1663
- tool_choice = tool_name if self._supports_tool_choice else None
1664
- try:
1665
- llm = self.bind_tools(
1666
- [schema],
1667
- tool_choice=tool_choice,
1753
+
1754
+ parser: OutputParserLike
1755
+
1756
+ if method == "json_mode":
1757
+ if isinstance(schema, type) and is_basemodel_subclass(schema):
1758
+ if issubclass(schema, BaseModelV1):
1759
+ schema_json = schema.schema()
1760
+ else:
1761
+ schema_json = schema.model_json_schema()
1762
+ parser = PydanticOutputParser(pydantic_object=schema)
1763
+ else:
1764
+ if is_typeddict(schema):
1765
+ schema_json = convert_to_json_schema(schema)
1766
+ elif isinstance(schema, dict):
1767
+ schema_json = schema
1768
+ else:
1769
+ raise ValueError(f"Unsupported schema type {type(schema)}")
1770
+ parser = JsonOutputParser()
1771
+
1772
+ # Resolve refs in schema because they are not supported
1773
+ # by the Gemini API.
1774
+ schema_json = replace_defs_in_schema(schema_json)
1775
+
1776
+ llm = self.bind(
1777
+ response_mime_type="application/json",
1778
+ response_schema=schema_json,
1668
1779
  ls_structured_output_format={
1669
- "kwargs": {"method": "function_calling"},
1670
- "schema": convert_to_openai_tool(schema),
1780
+ "kwargs": {"method": method},
1781
+ "schema": schema_json,
1671
1782
  },
1672
1783
  )
1673
- except Exception:
1674
- llm = self.bind_tools([schema], tool_choice=tool_choice)
1784
+ else:
1785
+ tool_name = _get_tool_name(schema) # type: ignore[arg-type]
1786
+ if isinstance(schema, type) and is_basemodel_subclass_safe(schema):
1787
+ parser = PydanticToolsParser(tools=[schema], first_tool_only=True)
1788
+ else:
1789
+ parser = JsonOutputKeyToolsParser(
1790
+ key_name=tool_name, first_tool_only=True
1791
+ )
1792
+ tool_choice = tool_name if self._supports_tool_choice else None
1793
+ try:
1794
+ llm = self.bind_tools(
1795
+ [schema],
1796
+ tool_choice=tool_choice,
1797
+ ls_structured_output_format={
1798
+ "kwargs": {"method": "function_calling"},
1799
+ "schema": convert_to_openai_tool(schema),
1800
+ },
1801
+ )
1802
+ except Exception:
1803
+ llm = self.bind_tools([schema], tool_choice=tool_choice)
1675
1804
  if include_raw:
1676
1805
  parser_with_fallback = RunnablePassthrough.assign(
1677
1806
  parsed=itemgetter("raw") | parser, parsing_error=lambda _: None
@@ -98,6 +98,7 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
98
98
  api_key=google_api_key,
99
99
  client_info=client_info,
100
100
  client_options=self.client_options,
101
+ transport=self.transport,
101
102
  )
102
103
  return self
103
104
 
@@ -253,7 +254,9 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
253
254
  Returns:
254
255
  Embedding for the text.
255
256
  """
256
- task_type = self.task_type or "RETRIEVAL_QUERY"
257
+ task_type_to_use = task_type if task_type else self.task_type
258
+ if task_type_to_use is None:
259
+ task_type_to_use = "RETRIEVAL_QUERY" # Default to RETRIEVAL_QUERY
257
260
  try:
258
261
  request: EmbedContentRequest = self._prepare_request(
259
262
  text=text,
@@ -0,0 +1,260 @@
1
+ Metadata-Version: 2.1
2
+ Name: langchain-google-genai
3
+ Version: 2.1.6
4
+ Summary: An integration package connecting Google's genai package and LangChain
5
+ Home-page: https://github.com/langchain-ai/langchain-google
6
+ License: MIT
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: filetype (>=1.2.0,<2.0.0)
15
+ Requires-Dist: google-ai-generativelanguage (>=0.6.18,<0.7.0)
16
+ Requires-Dist: langchain-core (>=0.3.66,<0.4.0)
17
+ Requires-Dist: pydantic (>=2,<3)
18
+ Project-URL: Repository, https://github.com/langchain-ai/langchain-google
19
+ Project-URL: Source Code, https://github.com/langchain-ai/langchain-google/tree/main/libs/genai
20
+ Description-Content-Type: text/markdown
21
+
22
+ # langchain-google-genai
23
+
24
+ **LangChain integration for Google Gemini models using the `generative-ai` SDK**
25
+
26
+ This package enables seamless access to Google Gemini's chat, vision, embeddings, and retrieval-augmented generation (RAG) features within the LangChain ecosystem.
27
+
28
+ ---
29
+
30
+ ## Table of Contents
31
+
32
+ - [Overview](#overview)
33
+ - [Installation](#installation)
34
+ - [Quickstart](#quickstart)
35
+ - [Chat Models](#chat-models)
36
+ - [Multimodal Inputs](#multimodal-inputs)
37
+ - [Multimodal Outputs](#multimodal-outputs)
38
+ - [Multimodal Outputs in Chains](#multimodal-outputs-in-chains)
39
+ - [Thinking Support](#thinking-support)
40
+ - [Embeddings](#embeddings)
41
+ - [Semantic Retrieval (RAG)](#semantic-retrieval-rag)
42
+
43
+ ---
44
+
45
+ ## Overview
46
+
47
+ This package provides LangChain support for Google Gemini models (via the official [Google Generative AI SDK](https://googleapis.github.io/python-genai/)). It supports:
48
+
49
+ - Text and vision-based chat models
50
+ - Embeddings for semantic search
51
+ - Multimodal inputs and outputs
52
+ - Retrieval-Augmented Generation (RAG)
53
+ - Thought tracing with reasoning tokens
54
+
55
+ ---
56
+
57
+ ## Installation
58
+
59
+ ```bash
60
+ pip install -U langchain-google-genai
61
+ ````
62
+
63
+ ---
64
+
65
+ ## Quickstart
66
+
67
+ Set up your environment variable with your Gemini API key:
68
+
69
+ ```bash
70
+ export GOOGLE_API_KEY=your-api-key
71
+ ```
72
+
73
+ Then use the `ChatGoogleGenerativeAI` interface:
74
+
75
+ ```python
76
+ from langchain_google_genai import ChatGoogleGenerativeAI
77
+
78
+ llm = ChatGoogleGenerativeAI(model="gemini-pro")
79
+ response = llm.invoke("Sing a ballad of LangChain.")
80
+ print(response.content)
81
+ ```
82
+
83
+ ---
84
+
85
+ ## Chat Models
86
+
87
+ The main interface for Gemini chat models is `ChatGoogleGenerativeAI`.
88
+
89
+ ### Multimodal Inputs
90
+
91
+ Gemini vision models support image inputs in single messages.
92
+
93
+ ```python
94
+ from langchain_core.messages import HumanMessage
95
+ from langchain_google_genai import ChatGoogleGenerativeAI
96
+
97
+ llm = ChatGoogleGenerativeAI(model="gemini-pro-vision")
98
+
99
+ message = HumanMessage(
100
+ content=[
101
+ {"type": "text", "text": "What's in this image?"},
102
+ {"type": "image_url", "image_url": "https://picsum.photos/seed/picsum/200/300"},
103
+ ]
104
+ )
105
+
106
+ response = llm.invoke([message])
107
+ print(response.content)
108
+ ```
109
+
110
+ ✅ `image_url` can be:
111
+
112
+ * A public image URL
113
+ * A Google Cloud Storage path (`gcs://...`)
114
+ * A base64-encoded image (e.g., `data:image/png;base64,...`)
115
+
116
+ ---
117
+
118
+ ### Multimodal Outputs
119
+
120
+ The Gemini 2.0 Flash Experimental model supports both text and inline image outputs.
121
+
122
+ ```python
123
+ from langchain_google_genai import ChatGoogleGenerativeAI
124
+
125
+ llm = ChatGoogleGenerativeAI(model="models/gemini-2.0-flash-exp-image-generation")
126
+
127
+ response = llm.invoke(
128
+ "Generate an image of a cat and say meow",
129
+ generation_config=dict(response_modalities=["TEXT", "IMAGE"]),
130
+ )
131
+
132
+ image_base64 = response.content[0].get("image_url").get("url").split(",")[-1]
133
+ meow_text = response.content[1]
134
+ print(meow_text)
135
+ ```
136
+
137
+ ---
138
+
139
+ ### Audio Output
140
+
141
+ ```
142
+ from langchain_google_genai import ChatGoogleGenerativeAI
143
+
144
+ llm = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash-preview-tts")
145
+ # example
146
+ response = llm.invoke(
147
+ "Please say The quick brown fox jumps over the lazy dog",
148
+ generation_config=dict(response_modalities=["AUDIO"]),
149
+ )
150
+
151
+ # Base64 encoded binary data of the image
152
+ wav_data = response.additional_kwargs.get("audio")
153
+ with open("output.wav", "wb") as f:
154
+ f.write(wav_data)
155
+ ```
156
+
157
+ ---
158
+
159
+ ### Multimodal Outputs in Chains
160
+
161
+ You can use Gemini models in a LangChain chain:
162
+
163
+ ```python
164
+ from langchain_core.runnables import RunnablePassthrough
165
+ from langchain_core.prompts import ChatPromptTemplate
166
+ from langchain_google_genai import ChatGoogleGenerativeAI, Modality
167
+
168
+ llm = ChatGoogleGenerativeAI(
169
+ model="models/gemini-2.0-flash-exp-image-generation",
170
+ response_modalities=[Modality.TEXT, Modality.IMAGE],
171
+ )
172
+
173
+ prompt = ChatPromptTemplate.from_messages([
174
+ ("human", "Generate an image of {animal} and tell me the sound it makes.")
175
+ ])
176
+
177
+ chain = {"animal": RunnablePassthrough()} | prompt | llm
178
+ response = chain.invoke("cat")
179
+ ```
180
+
181
+ ---
182
+
183
+ ### Thinking Support
184
+
185
+ Gemini 2.5 Flash Preview supports internal reasoning ("thoughts").
186
+
187
+ ```python
188
+ from langchain_google_genai import ChatGoogleGenerativeAI
189
+
190
+ llm = ChatGoogleGenerativeAI(
191
+ model="models/gemini-2.5-flash-preview-04-17",
192
+ thinking_budget=1024
193
+ )
194
+
195
+ response = llm.invoke("How many O's are in Google? How did you verify your answer?")
196
+ reasoning_score = response.usage_metadata["output_token_details"]["reasoning"]
197
+
198
+ print("Response:", response.content)
199
+ print("Reasoning tokens used:", reasoning_score)
200
+ ```
201
+
202
+ ---
203
+
204
+ ## Embeddings
205
+
206
+ You can use Gemini embeddings in LangChain:
207
+
208
+ ```python
209
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
210
+
211
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
212
+ vector = embeddings.embed_query("hello, world!")
213
+ print(vector)
214
+ ```
215
+
216
+ ---
217
+
218
+ ## Semantic Retrieval (RAG)
219
+
220
+ Use Gemini with RAG to retrieve relevant documents from your knowledge base.
221
+
222
+ ```python
223
+ from langchain_google_genai.vectorstores import GoogleVectorStore
224
+ from langchain_text_splitters import CharacterTextSplitter
225
+ from langchain_community.document_loaders import DirectoryLoader
226
+
227
+ # Create a corpus (collection of documents)
228
+ corpus_store = GoogleVectorStore.create_corpus(display_name="My Corpus")
229
+
230
+ # Create a document under that corpus
231
+ document_store = GoogleVectorStore.create_document(
232
+ corpus_id=corpus_store.corpus_id, display_name="My Document"
233
+ )
234
+
235
+ # Load and upload documents
236
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
237
+ for file in DirectoryLoader(path="data/").load():
238
+ chunks = text_splitter.split_documents([file])
239
+ document_store.add_documents(chunks)
240
+
241
+ # Query the document corpus
242
+ aqa = corpus_store.as_aqa()
243
+ response = aqa.invoke("What is the meaning of life?")
244
+
245
+ print("Answer:", response.answer)
246
+ print("Passages:", response.attributed_passages)
247
+ print("Answerable probability:", response.answerable_probability)
248
+ ```
249
+
250
+ ---
251
+
252
+
253
+ ## Resources
254
+
255
+ * [LangChain Documentation](https://docs.langchain.com/)
256
+ * [Google Generative AI SDK](https://googleapis.github.io/python-genai/)
257
+ * [Gemini Model Documentation](https://ai.google.dev/)
258
+
259
+
260
+
@@ -1,16 +1,16 @@
1
1
  langchain_google_genai/__init__.py,sha256=IsTvA3UcECLDckt3zWxK6u-n3MEa5KeEQpqsS-Z8shM,2784
2
2
  langchain_google_genai/_common.py,sha256=TeAmAwcRPk_dTXiIQQubamHAhuKNb_XsWy7yllkKW4w,5936
3
3
  langchain_google_genai/_enums.py,sha256=Zj3BXXLlkm_UybegCi6fLsfFhriJCt_LAJvgatgPWQ0,252
4
- langchain_google_genai/_function_utils.py,sha256=KO5KGNcPZo17qksQNYBsLRWjms8zOKcNhGRIuSHeyFU,19257
4
+ langchain_google_genai/_function_utils.py,sha256=CkQyxfUVGqHfmZQwf4JtMXBsS9SOzk99Pa2TytnuLbE,21648
5
5
  langchain_google_genai/_genai_extension.py,sha256=81a4ly5ZHlqMf37uJfdB8K41qE6J5ujLnbUypIfFf2o,20775
6
6
  langchain_google_genai/_image_utils.py,sha256=tPrQyMvVmO8xkuow1SvA91omxUEv9ZUy1EMHNGjMAKY,5202
7
- langchain_google_genai/chat_models.py,sha256=mgphbM_X-aTus3rPIXATgg2zA3vE9sGP1bLw4z-9SdI,67808
8
- langchain_google_genai/embeddings.py,sha256=syN-GXcLAeuHEnF8Yqp2AQPD7rKEaR9l29jSLmt9dwM,10468
7
+ langchain_google_genai/chat_models.py,sha256=WUt41wshRAKS1p7NjGCod_q3Vhry7CWIrXTFEPvqc-E,73558
8
+ langchain_google_genai/embeddings.py,sha256=IwM6PBnuweABvYmzjgShOxrVffKbQnnIzPfKBFf8xms,10636
9
9
  langchain_google_genai/genai_aqa.py,sha256=qB6h3-BSXqe0YLR3eeVllYzmNKK6ofI6xJLdBahUVZo,4300
10
10
  langchain_google_genai/google_vector_store.py,sha256=4wvhIiOmc3Fo046FyafPmT9NBCLek-9bgluvuTfrbpQ,16148
11
11
  langchain_google_genai/llms.py,sha256=ASjrEk2T_1hUXVNJlfPB8PKC4PbhPe00H3_UHunMc_Q,5334
12
12
  langchain_google_genai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- langchain_google_genai-2.1.5.dist-info/LICENSE,sha256=DppmdYJVSc1jd0aio6ptnMUn5tIHrdAhQ12SclEBfBg,1072
14
- langchain_google_genai-2.1.5.dist-info/METADATA,sha256=tDwwkxn2yvypZUZVNbDqXWbB9M02G_eHPH1xh_wQBFw,5165
15
- langchain_google_genai-2.1.5.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
16
- langchain_google_genai-2.1.5.dist-info/RECORD,,
13
+ langchain_google_genai-2.1.6.dist-info/LICENSE,sha256=DppmdYJVSc1jd0aio6ptnMUn5tIHrdAhQ12SclEBfBg,1072
14
+ langchain_google_genai-2.1.6.dist-info/METADATA,sha256=An9rP3RLphq82natJt0lhu4sjG-OKx2g-I1bx4YfN_Q,6995
15
+ langchain_google_genai-2.1.6.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
16
+ langchain_google_genai-2.1.6.dist-info/RECORD,,
@@ -1,174 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: langchain-google-genai
3
- Version: 2.1.5
4
- Summary: An integration package connecting Google's genai package and LangChain
5
- Home-page: https://github.com/langchain-ai/langchain-google
6
- License: MIT
7
- Requires-Python: >=3.9,<4.0
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.9
11
- Classifier: Programming Language :: Python :: 3.10
12
- Classifier: Programming Language :: Python :: 3.11
13
- Classifier: Programming Language :: Python :: 3.12
14
- Requires-Dist: filetype (>=1.2.0,<2.0.0)
15
- Requires-Dist: google-ai-generativelanguage (>=0.6.18,<0.7.0)
16
- Requires-Dist: langchain-core (>=0.3.62,<0.4.0)
17
- Requires-Dist: pydantic (>=2,<3)
18
- Project-URL: Repository, https://github.com/langchain-ai/langchain-google
19
- Project-URL: Source Code, https://github.com/langchain-ai/langchain-google/tree/main/libs/genai
20
- Description-Content-Type: text/markdown
21
-
22
- # langchain-google-genai
23
-
24
- This package contains the LangChain integrations for Gemini through their generative-ai SDK.
25
-
26
- ## Installation
27
-
28
- ```bash
29
- pip install -U langchain-google-genai
30
- ```
31
-
32
-
33
- ## Chat Models
34
-
35
- This package contains the `ChatGoogleGenerativeAI` class, which is the recommended way to interface with the Google Gemini series of models.
36
-
37
- To use, install the requirements, and configure your environment.
38
-
39
- ```bash
40
- export GOOGLE_API_KEY=your-api-key
41
- ```
42
-
43
- Then initialize
44
-
45
- ```python
46
- from langchain_google_genai import ChatGoogleGenerativeAI
47
-
48
- llm = ChatGoogleGenerativeAI(model="gemini-pro")
49
- llm.invoke("Sing a ballad of LangChain.")
50
- ```
51
-
52
- #### Multimodal inputs
53
-
54
- Gemini vision model supports image inputs when providing a single chat message. Example:
55
-
56
- ```
57
- from langchain_core.messages import HumanMessage
58
- from langchain_google_genai import ChatGoogleGenerativeAI
59
-
60
- llm = ChatGoogleGenerativeAI(model="gemini-pro-vision")
61
- # example
62
- message = HumanMessage(
63
- content=[
64
- {
65
- "type": "text",
66
- "text": "What's in this image?",
67
- }, # You can optionally provide text parts
68
- {"type": "image_url", "image_url": "https://picsum.photos/seed/picsum/200/300"},
69
- ]
70
- )
71
- llm.invoke([message])
72
- ```
73
-
74
- The value of `image_url` can be any of the following:
75
-
76
- - A public image URL
77
- - An accessible gcs file (e.g., "gcs://path/to/file.png")
78
- - A base64 encoded image (e.g., `data:image/png;base64,abcd124`)
79
-
80
- #### Multimodal outputs
81
-
82
- Gemini 2.0 Flash Experimental model supports text output with inline images
83
-
84
- ```
85
- from langchain_google_genai import ChatGoogleGenerativeAI
86
-
87
- llm = ChatGoogleGenerativeAI(model="models/gemini-2.0-flash-exp-image-generation")
88
- # example
89
- response = llm.invoke(
90
- "Generate an image of a cat and say meow",
91
- generation_config=dict(response_modalities=["TEXT", "IMAGE"]),
92
- )
93
-
94
- # Base64 encoded binary data of the image
95
- image_base64 = response.content[0].get("image_url").get("url").split(",")[-1]
96
- meow_str = response.content[1]
97
- ```
98
-
99
- #### Multimodal Outputs in Chains
100
-
101
- ```
102
- from langchain_core.runnables import RunnablePassthrough
103
- from langchain_core.prompts import ChatPromptTemplate
104
-
105
- from langchain_google_genai import ChatGoogleGenerativeAI, Modality
106
-
107
- llm = ChatGoogleGenerativeAI(
108
- model="models/gemini-2.0-flash-exp-image-generation",
109
- response_modalities=[Modality.TEXT, Modality.IMAGE],
110
- )
111
-
112
- prompt = ChatPromptTemplate(
113
- [("human", "Generate an image of {animal} and tell me the sound of the animal")]
114
- )
115
- chain = {"animal": RunnablePassthrough()} | prompt | llm
116
- res = chain.invoke("cat")
117
- ```
118
-
119
- #### Thinking support
120
-
121
- Gemini 2.5 Flash model supports reasoning through their thoughts
122
-
123
- ```
124
- from langchain_google_genai import ChatGoogleGenerativeAI
125
-
126
- llm = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash-preview-04-17", thinking_budget=1024)
127
-
128
- response = llm.invoke(
129
- "How many O's are in Google? Please tell me how you double checked the result"
130
- )
131
-
132
- assert response.usage_metadata["output_token_details"]["reasoning"] > 0
133
- ```
134
-
135
- ## Embeddings
136
-
137
- This package also adds support for google's embeddings models.
138
-
139
- ```
140
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
141
-
142
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
143
- embeddings.embed_query("hello, world!")
144
- ```
145
-
146
- ## Semantic Retrieval
147
-
148
- Enables retrieval augmented generation (RAG) in your application.
149
-
150
- ```
151
- # Create a new store for housing your documents.
152
- corpus_store = GoogleVectorStore.create_corpus(display_name="My Corpus")
153
-
154
- # Create a new document under the above corpus.
155
- document_store = GoogleVectorStore.create_document(
156
- corpus_id=corpus_store.corpus_id, display_name="My Document"
157
- )
158
-
159
- # Upload some texts to the document.
160
- text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
161
- for file in DirectoryLoader(path="data/").load():
162
- documents = text_splitter.split_documents([file])
163
- document_store.add_documents(documents)
164
-
165
- # Talk to your entire corpus with possibly many documents.
166
- aqa = corpus_store.as_aqa()
167
- answer = aqa.invoke("What is the meaning of life?")
168
-
169
- # Read the response along with the attributed passages and answerability.
170
- print(response.answer)
171
- print(response.attributed_passages)
172
- print(response.answerable_probability)
173
- ```
174
-