langchain-google-genai 2.1.5__py3-none-any.whl → 2.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,6 +30,7 @@ from langchain_core.utils.function_calling import (
30
30
  from langchain_core.utils.json_schema import dereference_refs
31
31
  from pydantic import BaseModel
32
32
  from pydantic.v1 import BaseModel as BaseModelV1
33
+ from typing_extensions import NotRequired
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
@@ -65,11 +66,15 @@ _GoogleSearchRetrievalLike = Union[
65
66
  gapic.GoogleSearchRetrieval,
66
67
  Dict[str, Any],
67
68
  ]
69
+ _GoogleSearchLike = Union[gapic.Tool.GoogleSearch, Dict[str, Any]]
70
+ _CodeExecutionLike = Union[gapic.CodeExecution, Dict[str, Any]]
68
71
 
69
72
 
70
73
  class _ToolDict(TypedDict):
71
74
  function_declarations: Sequence[_FunctionDeclarationLike]
72
75
  google_search_retrieval: Optional[_GoogleSearchRetrievalLike]
76
+ google_search: NotRequired[_GoogleSearchLike]
77
+ code_execution: NotRequired[_CodeExecutionLike]
73
78
 
74
79
 
75
80
  # Info: This means one tool=Sequence of FunctionDeclaration
@@ -158,6 +163,8 @@ def convert_to_genai_function_declarations(
158
163
  for f in [
159
164
  "function_declarations",
160
165
  "google_search_retrieval",
166
+ "google_search",
167
+ "code_execution",
161
168
  ]
162
169
  ):
163
170
  fd = _format_to_gapic_function_declaration(tool) # type: ignore[arg-type]
@@ -184,6 +191,12 @@ def convert_to_genai_function_declarations(
184
191
  gapic_tool.google_search_retrieval = gapic.GoogleSearchRetrieval(
185
192
  tool["google_search_retrieval"]
186
193
  )
194
+ if "google_search" in tool:
195
+ gapic_tool.google_search = gapic.Tool.GoogleSearch(
196
+ tool["google_search"]
197
+ )
198
+ if "code_execution" in tool:
199
+ gapic_tool.code_execution = gapic.CodeExecution(tool["code_execution"])
187
200
  else:
188
201
  fd = _format_to_gapic_function_declaration(tool) # type: ignore[arg-type]
189
202
  gapic_tool.function_declarations.append(fd)
@@ -520,3 +533,60 @@ def safe_import(module_name: str, attribute_name: str = "") -> bool:
520
533
  return True
521
534
  except ImportError:
522
535
  return False
536
+
537
+
538
+ def replace_defs_in_schema(original_schema: dict, defs: Optional[dict] = None) -> dict:
539
+ """Given an OpenAPI schema with a property '$defs' replaces all occurrences of
540
+ referenced items in the dictionary.
541
+
542
+ Args:
543
+ original_schema: Schema generated by `BaseModel.model_schema_json`
544
+ defs: Definitions for recursive calls.
545
+
546
+ Returns:
547
+ Schema with refs replaced.
548
+ """
549
+
550
+ new_defs = defs or original_schema.get("$defs")
551
+
552
+ if new_defs is None or not isinstance(new_defs, dict):
553
+ return original_schema.copy()
554
+
555
+ resulting_schema = {}
556
+
557
+ for key, value in original_schema.items():
558
+ if key == "$defs":
559
+ continue
560
+
561
+ if not isinstance(value, dict):
562
+ resulting_schema[key] = value
563
+ else:
564
+ if "$ref" in value:
565
+ new_value = value.copy()
566
+
567
+ path = new_value.pop("$ref")
568
+ def_key = _get_def_key_from_schema_path(path)
569
+ new_item = new_defs.get(def_key)
570
+
571
+ assert isinstance(new_item, dict)
572
+ new_value.update(new_item)
573
+
574
+ resulting_schema[key] = replace_defs_in_schema(new_value, defs=new_defs)
575
+ else:
576
+ resulting_schema[key] = replace_defs_in_schema(value, defs=new_defs)
577
+
578
+ return resulting_schema
579
+
580
+
581
+ def _get_def_key_from_schema_path(schema_path: str) -> str:
582
+ error_message = f"Malformed schema reference path {schema_path}"
583
+
584
+ if not isinstance(schema_path, str) or not schema_path.startswith("#/$defs/"):
585
+ raise ValueError(error_message)
586
+
587
+ # Schema has to have only one extra level.
588
+ parts = schema_path.split("/")
589
+ if len(parts) != 3:
590
+ raise ValueError(error_message)
591
+
592
+ return parts[-1]
@@ -2,11 +2,13 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import base64
5
+ import io
5
6
  import json
6
7
  import logging
7
8
  import mimetypes
8
9
  import uuid
9
10
  import warnings
11
+ import wave
10
12
  from difflib import get_close_matches
11
13
  from operator import itemgetter
12
14
  from typing import (
@@ -16,6 +18,7 @@ from typing import (
16
18
  Dict,
17
19
  Iterator,
18
20
  List,
21
+ Literal,
19
22
  Mapping,
20
23
  Optional,
21
24
  Sequence,
@@ -37,7 +40,9 @@ from google.ai.generativelanguage_v1beta.types import (
37
40
  Blob,
38
41
  Candidate,
39
42
  CodeExecution,
43
+ CodeExecutionResult,
40
44
  Content,
45
+ ExecutableCode,
41
46
  FileData,
42
47
  FunctionCall,
43
48
  FunctionDeclaration,
@@ -67,8 +72,9 @@ from langchain_core.messages import (
67
72
  ToolMessage,
68
73
  is_data_content_block,
69
74
  )
70
- from langchain_core.messages.ai import UsageMetadata
75
+ from langchain_core.messages.ai import UsageMetadata, add_usage, subtract_usage
71
76
  from langchain_core.messages.tool import invalid_tool_call, tool_call, tool_call_chunk
77
+ from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
72
78
  from langchain_core.output_parsers.base import OutputParserLike
73
79
  from langchain_core.output_parsers.openai_tools import (
74
80
  JsonOutputKeyToolsParser,
@@ -79,7 +85,11 @@ from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResu
79
85
  from langchain_core.runnables import Runnable, RunnableConfig, RunnablePassthrough
80
86
  from langchain_core.tools import BaseTool
81
87
  from langchain_core.utils import get_pydantic_field_names
82
- from langchain_core.utils.function_calling import convert_to_openai_tool
88
+ from langchain_core.utils.function_calling import (
89
+ convert_to_json_schema,
90
+ convert_to_openai_tool,
91
+ )
92
+ from langchain_core.utils.pydantic import is_basemodel_subclass
83
93
  from langchain_core.utils.utils import _build_model_kwargs
84
94
  from pydantic import (
85
95
  BaseModel,
@@ -88,6 +98,7 @@ from pydantic import (
88
98
  SecretStr,
89
99
  model_validator,
90
100
  )
101
+ from pydantic.v1 import BaseModel as BaseModelV1
91
102
  from tenacity import (
92
103
  before_sleep_log,
93
104
  retry,
@@ -104,12 +115,14 @@ from langchain_google_genai._common import (
104
115
  get_client_info,
105
116
  )
106
117
  from langchain_google_genai._function_utils import (
118
+ _dict_to_gapic_schema,
107
119
  _tool_choice_to_tool_config,
108
120
  _ToolChoiceType,
109
121
  _ToolConfigDict,
110
122
  _ToolDict,
111
123
  convert_to_genai_function_declarations,
112
124
  is_basemodel_subclass_safe,
125
+ replace_defs_in_schema,
113
126
  tool_to_dict,
114
127
  )
115
128
  from langchain_google_genai._image_utils import (
@@ -121,6 +134,7 @@ from . import _genai_extension as genaix
121
134
 
122
135
  logger = logging.getLogger(__name__)
123
136
 
137
+ _allowed_params_prediction_service = ["request", "timeout", "metadata", "labels"]
124
138
 
125
139
  _FunctionDeclarationType = Union[
126
140
  FunctionDeclaration,
@@ -207,7 +221,14 @@ def _chat_with_retry(generation_method: Callable, **kwargs: Any) -> Any:
207
221
  except Exception as e:
208
222
  raise e
209
223
 
210
- return _chat_with_retry(**kwargs)
224
+ params = (
225
+ {k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service}
226
+ if (request := kwargs.get("request"))
227
+ and hasattr(request, "model")
228
+ and "gemini" in request.model
229
+ else kwargs
230
+ )
231
+ return _chat_with_retry(**params)
211
232
 
212
233
 
213
234
  async def _achat_with_retry(generation_method: Callable, **kwargs: Any) -> Any:
@@ -240,7 +261,14 @@ async def _achat_with_retry(generation_method: Callable, **kwargs: Any) -> Any:
240
261
  except Exception as e:
241
262
  raise e
242
263
 
243
- return await _achat_with_retry(**kwargs)
264
+ params = (
265
+ {k: v for k, v in kwargs.items() if k in _allowed_params_prediction_service}
266
+ if (request := kwargs.get("request"))
267
+ and hasattr(request, "model")
268
+ and "gemini" in request.model
269
+ else kwargs
270
+ )
271
+ return await _achat_with_retry(**params)
244
272
 
245
273
 
246
274
  def _is_lc_content_block(part: dict) -> bool:
@@ -331,6 +359,37 @@ def _convert_to_parts(
331
359
  metadata = VideoMetadata(part["video_metadata"])
332
360
  media_part.video_metadata = metadata
333
361
  parts.append(media_part)
362
+ elif part["type"] == "executable_code":
363
+ if "executable_code" not in part or "language" not in part:
364
+ raise ValueError(
365
+ "Executable code part must have 'code' and 'language' "
366
+ f"keys, got {part}"
367
+ )
368
+ executable_code_part = Part(
369
+ executable_code=ExecutableCode(
370
+ language=part["language"], code=part["executable_code"]
371
+ )
372
+ )
373
+ parts.append(executable_code_part)
374
+ elif part["type"] == "code_execution_result":
375
+ if "code_execution_result" not in part:
376
+ raise ValueError(
377
+ "Code execution result part must have "
378
+ f"'code_execution_result', got {part}"
379
+ )
380
+ if "outcome" in part:
381
+ outcome = part["outcome"]
382
+ else:
383
+ # Backward compatibility
384
+ outcome = 1 # Default to success if not specified
385
+ code_execution_result_part = Part(
386
+ code_execution_result=CodeExecutionResult(
387
+ output=part["code_execution_result"], outcome=outcome
388
+ )
389
+ )
390
+ parts.append(code_execution_result_part)
391
+ elif part["type"] == "thinking":
392
+ parts.append(Part(text=part["thinking"], thought=True))
334
393
  else:
335
394
  raise ValueError(
336
395
  f"Unrecognized message part type: {part['type']}. Only text, "
@@ -486,47 +545,54 @@ def _parse_chat_history(
486
545
  return system_instruction, messages
487
546
 
488
547
 
548
+ # Helper function to append content consistently
549
+ def _append_to_content(
550
+ current_content: Union[str, List[Any], None], new_item: Any
551
+ ) -> Union[str, List[Any]]:
552
+ """Appends a new item to the content, handling different initial content types."""
553
+ if current_content is None and isinstance(new_item, str):
554
+ return new_item
555
+ elif current_content is None:
556
+ return [new_item]
557
+ elif isinstance(current_content, str):
558
+ return [current_content, new_item]
559
+ elif isinstance(current_content, list):
560
+ current_content.append(new_item)
561
+ return current_content
562
+ else:
563
+ # This case should ideally not be reached with proper type checking,
564
+ # but it catches any unexpected types that might slip through.
565
+ raise TypeError(f"Unexpected content type: {type(current_content)}")
566
+
567
+
489
568
  def _parse_response_candidate(
490
569
  response_candidate: Candidate, streaming: bool = False
491
570
  ) -> AIMessage:
492
571
  content: Union[None, str, List[Union[str, dict]]] = None
493
- additional_kwargs = {}
572
+ additional_kwargs: Dict[str, Any] = {}
494
573
  tool_calls = []
495
574
  invalid_tool_calls = []
496
575
  tool_call_chunks = []
497
576
 
498
577
  for part in response_candidate.content.parts:
578
+ text: Optional[str] = None
499
579
  try:
500
- text: Optional[str] = part.text
501
- # Remove erroneous newline character if present
502
- if not streaming and text is not None:
503
- text = text.rstrip("\n")
580
+ if hasattr(part, "text") and part.text is not None:
581
+ text = part.text
582
+ # Remove erroneous newline character if present
583
+ if not streaming:
584
+ text = text.rstrip("\n")
504
585
  except AttributeError:
505
- text = None
586
+ pass
506
587
 
507
- if part.thought:
588
+ if hasattr(part, "thought") and part.thought:
508
589
  thinking_message = {
509
590
  "type": "thinking",
510
591
  "thinking": part.text,
511
592
  }
512
- if not content:
513
- content = [thinking_message]
514
- elif isinstance(content, str):
515
- content = [thinking_message, content]
516
- elif isinstance(content, list):
517
- content.append(thinking_message)
518
- else:
519
- raise Exception("Unexpected content type")
520
-
521
- elif text is not None:
522
- if not content:
523
- content = text
524
- elif isinstance(content, str) and text:
525
- content = [content, text]
526
- elif isinstance(content, list) and text:
527
- content.append(text)
528
- elif text:
529
- raise Exception("Unexpected content type")
593
+ content = _append_to_content(content, thinking_message)
594
+ elif text is not None and text:
595
+ content = _append_to_content(content, text)
530
596
 
531
597
  if hasattr(part, "executable_code") and part.executable_code is not None:
532
598
  if part.executable_code.code and part.executable_code.language:
@@ -535,14 +601,7 @@ def _parse_response_candidate(
535
601
  "executable_code": part.executable_code.code,
536
602
  "language": part.executable_code.language,
537
603
  }
538
- if not content:
539
- content = [code_message]
540
- elif isinstance(content, str):
541
- content = [content, code_message]
542
- elif isinstance(content, list):
543
- content.append(code_message)
544
- else:
545
- raise Exception("Unexpected content type")
604
+ content = _append_to_content(content, code_message)
546
605
 
547
606
  if (
548
607
  hasattr(part, "code_execution_result")
@@ -552,20 +611,25 @@ def _parse_response_candidate(
552
611
  execution_result = {
553
612
  "type": "code_execution_result",
554
613
  "code_execution_result": part.code_execution_result.output,
614
+ "outcome": part.code_execution_result.outcome,
555
615
  }
616
+ content = _append_to_content(content, execution_result)
556
617
 
557
- if not content:
558
- content = [execution_result]
559
- elif isinstance(content, str):
560
- content = [content, execution_result]
561
- elif isinstance(content, list):
562
- content.append(execution_result)
563
- else:
564
- raise Exception("Unexpected content type")
618
+ if part.inline_data.mime_type.startswith("audio/"):
619
+ buffer = io.BytesIO()
620
+
621
+ with wave.open(buffer, "wb") as wf:
622
+ wf.setnchannels(1)
623
+ wf.setsampwidth(2)
624
+ # TODO: Read Sample Rate from MIME content type.
625
+ wf.setframerate(24000)
626
+ wf.writeframes(part.inline_data.data)
627
+
628
+ additional_kwargs["audio"] = buffer.getvalue()
565
629
 
566
630
  if part.inline_data.mime_type.startswith("image/"):
567
631
  image_format = part.inline_data.mime_type[6:]
568
- message = {
632
+ image_message = {
569
633
  "type": "image_url",
570
634
  "image_url": {
571
635
  "url": image_bytes_to_b64_string(
@@ -573,15 +637,7 @@ def _parse_response_candidate(
573
637
  )
574
638
  },
575
639
  }
576
-
577
- if not content:
578
- content = [message]
579
- elif isinstance(content, str) and message:
580
- content = [content, message]
581
- elif isinstance(content, list) and message:
582
- content.append(message)
583
- elif message:
584
- raise Exception("Unexpected content type")
640
+ content = _append_to_content(content, image_message)
585
641
 
586
642
  if part.function_call:
587
643
  function_call = {"name": part.function_call.name}
@@ -660,35 +716,43 @@ def _response_to_result(
660
716
  """Converts a PaLM API response into a LangChain ChatResult."""
661
717
  llm_output = {"prompt_feedback": proto.Message.to_dict(response.prompt_feedback)}
662
718
 
663
- # previous usage metadata needs to be subtracted because gemini api returns
664
- # already-accumulated token counts with each chunk
665
- prev_input_tokens = prev_usage["input_tokens"] if prev_usage else 0
666
- prev_output_tokens = prev_usage["output_tokens"] if prev_usage else 0
667
- prev_total_tokens = prev_usage["total_tokens"] if prev_usage else 0
668
-
669
719
  # Get usage metadata
670
720
  try:
671
721
  input_tokens = response.usage_metadata.prompt_token_count
672
- output_tokens = response.usage_metadata.candidates_token_count
673
- total_tokens = response.usage_metadata.total_token_count
674
722
  thought_tokens = response.usage_metadata.thoughts_token_count
723
+ output_tokens = response.usage_metadata.candidates_token_count + thought_tokens
724
+ total_tokens = response.usage_metadata.total_token_count
675
725
  cache_read_tokens = response.usage_metadata.cached_content_token_count
676
726
  if input_tokens + output_tokens + cache_read_tokens + total_tokens > 0:
677
727
  if thought_tokens > 0:
678
- lc_usage = UsageMetadata(
679
- input_tokens=input_tokens - prev_input_tokens,
680
- output_tokens=output_tokens - prev_output_tokens,
681
- total_tokens=total_tokens - prev_total_tokens,
728
+ cumulative_usage = UsageMetadata(
729
+ input_tokens=input_tokens,
730
+ output_tokens=output_tokens,
731
+ total_tokens=total_tokens,
682
732
  input_token_details={"cache_read": cache_read_tokens},
683
733
  output_token_details={"reasoning": thought_tokens},
684
734
  )
685
735
  else:
686
- lc_usage = UsageMetadata(
687
- input_tokens=input_tokens - prev_input_tokens,
688
- output_tokens=output_tokens - prev_output_tokens,
689
- total_tokens=total_tokens - prev_total_tokens,
736
+ cumulative_usage = UsageMetadata(
737
+ input_tokens=input_tokens,
738
+ output_tokens=output_tokens,
739
+ total_tokens=total_tokens,
690
740
  input_token_details={"cache_read": cache_read_tokens},
691
741
  )
742
+ # previous usage metadata needs to be subtracted because gemini api returns
743
+ # already-accumulated token counts with each chunk
744
+ lc_usage = subtract_usage(cumulative_usage, prev_usage)
745
+ if prev_usage and cumulative_usage["input_tokens"] < prev_usage.get(
746
+ "input_tokens", 0
747
+ ):
748
+ # Gemini 1.5 and 2.0 return a lower cumulative count of prompt tokens
749
+ # in the final chunk. We take this count to be ground truth because
750
+ # it's consistent with the reported total tokens. So we need to
751
+ # ensure this chunk compensates (the subtract_usage funcction floors
752
+ # at zero).
753
+ lc_usage["input_tokens"] = cumulative_usage[
754
+ "input_tokens"
755
+ ] - prev_usage.get("input_tokens", 0)
692
756
  else:
693
757
  lc_usage = None
694
758
  except AttributeError:
@@ -1074,6 +1138,21 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1074
1138
  Gemini does not support system messages; any unsupported messages will
1075
1139
  raise an error."""
1076
1140
 
1141
+ response_mime_type: Optional[str] = None
1142
+ """Optional. Output response mimetype of the generated candidate text. Only
1143
+ supported in Gemini 1.5 and later models. Supported mimetype:
1144
+ * "text/plain": (default) Text output.
1145
+ * "application/json": JSON response in the candidates.
1146
+ * "text/x.enum": Enum in plain text.
1147
+ The model also needs to be prompted to output the appropriate response
1148
+ type, otherwise the behavior is undefined. This is a preview feature.
1149
+ """
1150
+
1151
+ response_schema: Optional[Dict[str, Any]] = None
1152
+ """ Optional. Enforce an schema to the output.
1153
+ The format of the dictionary should follow Open API schema.
1154
+ """
1155
+
1077
1156
  cached_content: Optional[str] = None
1078
1157
  """The name of the cached content used as context to serve the prediction.
1079
1158
 
@@ -1281,6 +1360,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1281
1360
  self,
1282
1361
  stop: Optional[List[str]],
1283
1362
  generation_config: Optional[Dict[str, Any]] = None,
1363
+ **kwargs: Any,
1284
1364
  ) -> GenerationConfig:
1285
1365
  gen_config = {
1286
1366
  k: v
@@ -1311,6 +1391,24 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1311
1391
  }
1312
1392
  if generation_config:
1313
1393
  gen_config = {**gen_config, **generation_config}
1394
+
1395
+ response_mime_type = kwargs.get("response_mime_type", self.response_mime_type)
1396
+ if response_mime_type is not None:
1397
+ gen_config["response_mime_type"] = response_mime_type
1398
+
1399
+ response_schema = kwargs.get("response_schema", self.response_schema)
1400
+ if response_schema is not None:
1401
+ allowed_mime_types = ("application/json", "text/x.enum")
1402
+ if response_mime_type not in allowed_mime_types:
1403
+ error_message = (
1404
+ "`response_schema` is only supported when "
1405
+ f"`response_mime_type` is set to one of {allowed_mime_types}"
1406
+ )
1407
+ raise ValueError(error_message)
1408
+
1409
+ gapic_response_schema = _dict_to_gapic_schema(response_schema)
1410
+ if gapic_response_schema is not None:
1411
+ gen_config["response_schema"] = gapic_response_schema
1314
1412
  return GenerationConfig(**gen_config)
1315
1413
 
1316
1414
  def _generate(
@@ -1338,6 +1436,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1338
1436
  generation_config=generation_config,
1339
1437
  cached_content=cached_content or self.cached_content,
1340
1438
  tool_choice=tool_choice,
1439
+ **kwargs,
1341
1440
  )
1342
1441
  response: GenerateContentResponse = _chat_with_retry(
1343
1442
  request=request,
@@ -1387,6 +1486,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1387
1486
  generation_config=generation_config,
1388
1487
  cached_content=cached_content or self.cached_content,
1389
1488
  tool_choice=tool_choice,
1489
+ **kwargs,
1390
1490
  )
1391
1491
  response: GenerateContentResponse = await _achat_with_retry(
1392
1492
  request=request,
@@ -1421,6 +1521,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1421
1521
  generation_config=generation_config,
1422
1522
  cached_content=cached_content or self.cached_content,
1423
1523
  tool_choice=tool_choice,
1524
+ **kwargs,
1424
1525
  )
1425
1526
  response: GenerateContentResponse = _chat_with_retry(
1426
1527
  request=request,
@@ -1429,7 +1530,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1429
1530
  metadata=self.default_metadata,
1430
1531
  )
1431
1532
 
1432
- prev_usage_metadata: UsageMetadata | None = None
1533
+ prev_usage_metadata: UsageMetadata | None = None # cumulative usage
1433
1534
  for chunk in response:
1434
1535
  _chat_result = _response_to_result(
1435
1536
  chunk, stream=True, prev_usage=prev_usage_metadata
@@ -1437,21 +1538,10 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1437
1538
  gen = cast(ChatGenerationChunk, _chat_result.generations[0])
1438
1539
  message = cast(AIMessageChunk, gen.message)
1439
1540
 
1440
- curr_usage_metadata: UsageMetadata | dict[str, int] = (
1441
- message.usage_metadata or {}
1442
- )
1443
-
1444
1541
  prev_usage_metadata = (
1445
1542
  message.usage_metadata
1446
1543
  if prev_usage_metadata is None
1447
- else UsageMetadata(
1448
- input_tokens=prev_usage_metadata.get("input_tokens", 0)
1449
- + curr_usage_metadata.get("input_tokens", 0),
1450
- output_tokens=prev_usage_metadata.get("output_tokens", 0)
1451
- + curr_usage_metadata.get("output_tokens", 0),
1452
- total_tokens=prev_usage_metadata.get("total_tokens", 0)
1453
- + curr_usage_metadata.get("total_tokens", 0),
1454
- )
1544
+ else add_usage(prev_usage_metadata, message.usage_metadata)
1455
1545
  )
1456
1546
 
1457
1547
  if run_manager:
@@ -1499,8 +1589,9 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1499
1589
  generation_config=generation_config,
1500
1590
  cached_content=cached_content or self.cached_content,
1501
1591
  tool_choice=tool_choice,
1592
+ **kwargs,
1502
1593
  )
1503
- prev_usage_metadata: UsageMetadata | None = None
1594
+ prev_usage_metadata: UsageMetadata | None = None # cumulative usage
1504
1595
  async for chunk in await _achat_with_retry(
1505
1596
  request=request,
1506
1597
  generation_method=self.async_client.stream_generate_content,
@@ -1513,21 +1604,10 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1513
1604
  gen = cast(ChatGenerationChunk, _chat_result.generations[0])
1514
1605
  message = cast(AIMessageChunk, gen.message)
1515
1606
 
1516
- curr_usage_metadata: UsageMetadata | dict[str, int] = (
1517
- message.usage_metadata or {}
1518
- )
1519
-
1520
1607
  prev_usage_metadata = (
1521
1608
  message.usage_metadata
1522
1609
  if prev_usage_metadata is None
1523
- else UsageMetadata(
1524
- input_tokens=prev_usage_metadata.get("input_tokens", 0)
1525
- + curr_usage_metadata.get("input_tokens", 0),
1526
- output_tokens=prev_usage_metadata.get("output_tokens", 0)
1527
- + curr_usage_metadata.get("output_tokens", 0),
1528
- total_tokens=prev_usage_metadata.get("total_tokens", 0)
1529
- + curr_usage_metadata.get("total_tokens", 0),
1530
- )
1610
+ else add_usage(prev_usage_metadata, message.usage_metadata)
1531
1611
  )
1532
1612
 
1533
1613
  if run_manager:
@@ -1546,6 +1626,7 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1546
1626
  tool_choice: Optional[Union[_ToolChoiceType, bool]] = None,
1547
1627
  generation_config: Optional[Dict[str, Any]] = None,
1548
1628
  cached_content: Optional[str] = None,
1629
+ **kwargs: Any,
1549
1630
  ) -> Tuple[GenerateContentRequest, Dict[str, Any]]:
1550
1631
  if tool_choice and tool_config:
1551
1632
  raise ValueError(
@@ -1617,7 +1698,9 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1617
1698
  tool_config=formatted_tool_config,
1618
1699
  safety_settings=formatted_safety_settings,
1619
1700
  generation_config=self._prepare_params(
1620
- stop, generation_config=generation_config
1701
+ stop,
1702
+ generation_config=generation_config,
1703
+ **kwargs,
1621
1704
  ),
1622
1705
  cached_content=cached_content,
1623
1706
  )
@@ -1645,33 +1728,65 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
1645
1728
  def with_structured_output(
1646
1729
  self,
1647
1730
  schema: Union[Dict, Type[BaseModel]],
1731
+ method: Optional[Literal["function_calling", "json_mode"]] = "function_calling",
1648
1732
  *,
1649
1733
  include_raw: bool = False,
1650
1734
  **kwargs: Any,
1651
1735
  ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
1652
- _ = kwargs.pop("method", None)
1653
1736
  _ = kwargs.pop("strict", None)
1654
1737
  if kwargs:
1655
1738
  raise ValueError(f"Received unsupported arguments {kwargs}")
1656
- tool_name = _get_tool_name(schema) # type: ignore[arg-type]
1657
- if isinstance(schema, type) and is_basemodel_subclass_safe(schema):
1658
- parser: OutputParserLike = PydanticToolsParser(
1659
- tools=[schema], first_tool_only=True
1660
- )
1661
- else:
1662
- parser = JsonOutputKeyToolsParser(key_name=tool_name, first_tool_only=True)
1663
- tool_choice = tool_name if self._supports_tool_choice else None
1664
- try:
1665
- llm = self.bind_tools(
1666
- [schema],
1667
- tool_choice=tool_choice,
1739
+
1740
+ parser: OutputParserLike
1741
+
1742
+ if method == "json_mode":
1743
+ if isinstance(schema, type) and is_basemodel_subclass(schema):
1744
+ if issubclass(schema, BaseModelV1):
1745
+ schema_json = schema.schema()
1746
+ else:
1747
+ schema_json = schema.model_json_schema()
1748
+ parser = PydanticOutputParser(pydantic_object=schema)
1749
+ else:
1750
+ if is_typeddict(schema):
1751
+ schema_json = convert_to_json_schema(schema)
1752
+ elif isinstance(schema, dict):
1753
+ schema_json = schema
1754
+ else:
1755
+ raise ValueError(f"Unsupported schema type {type(schema)}")
1756
+ parser = JsonOutputParser()
1757
+
1758
+ # Resolve refs in schema because they are not supported
1759
+ # by the Gemini API.
1760
+ schema_json = replace_defs_in_schema(schema_json)
1761
+
1762
+ llm = self.bind(
1763
+ response_mime_type="application/json",
1764
+ response_schema=schema_json,
1668
1765
  ls_structured_output_format={
1669
- "kwargs": {"method": "function_calling"},
1670
- "schema": convert_to_openai_tool(schema),
1766
+ "kwargs": {"method": method},
1767
+ "schema": schema_json,
1671
1768
  },
1672
1769
  )
1673
- except Exception:
1674
- llm = self.bind_tools([schema], tool_choice=tool_choice)
1770
+ else:
1771
+ tool_name = _get_tool_name(schema) # type: ignore[arg-type]
1772
+ if isinstance(schema, type) and is_basemodel_subclass_safe(schema):
1773
+ parser = PydanticToolsParser(tools=[schema], first_tool_only=True)
1774
+ else:
1775
+ parser = JsonOutputKeyToolsParser(
1776
+ key_name=tool_name, first_tool_only=True
1777
+ )
1778
+ tool_choice = tool_name if self._supports_tool_choice else None
1779
+ try:
1780
+ llm = self.bind_tools(
1781
+ [schema],
1782
+ tool_choice=tool_choice,
1783
+ ls_structured_output_format={
1784
+ "kwargs": {"method": "function_calling"},
1785
+ "schema": convert_to_openai_tool(schema),
1786
+ },
1787
+ )
1788
+ except Exception:
1789
+ llm = self.bind_tools([schema], tool_choice=tool_choice)
1675
1790
  if include_raw:
1676
1791
  parser_with_fallback = RunnablePassthrough.assign(
1677
1792
  parsed=itemgetter("raw") | parser, parsing_error=lambda _: None
@@ -98,6 +98,7 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
98
98
  api_key=google_api_key,
99
99
  client_info=client_info,
100
100
  client_options=self.client_options,
101
+ transport=self.transport,
101
102
  )
102
103
  return self
103
104
 
@@ -253,7 +254,9 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
253
254
  Returns:
254
255
  Embedding for the text.
255
256
  """
256
- task_type = self.task_type or "RETRIEVAL_QUERY"
257
+ task_type_to_use = task_type if task_type else self.task_type
258
+ if task_type_to_use is None:
259
+ task_type_to_use = "RETRIEVAL_QUERY" # Default to RETRIEVAL_QUERY
257
260
  try:
258
261
  request: EmbedContentRequest = self._prepare_request(
259
262
  text=text,
@@ -0,0 +1,260 @@
1
+ Metadata-Version: 2.1
2
+ Name: langchain-google-genai
3
+ Version: 2.1.7
4
+ Summary: An integration package connecting Google's genai package and LangChain
5
+ Home-page: https://github.com/langchain-ai/langchain-google
6
+ License: MIT
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: filetype (>=1.2.0,<2.0.0)
15
+ Requires-Dist: google-ai-generativelanguage (>=0.6.18,<0.7.0)
16
+ Requires-Dist: langchain-core (>=0.3.68,<0.4.0)
17
+ Requires-Dist: pydantic (>=2,<3)
18
+ Project-URL: Repository, https://github.com/langchain-ai/langchain-google
19
+ Project-URL: Source Code, https://github.com/langchain-ai/langchain-google/tree/main/libs/genai
20
+ Description-Content-Type: text/markdown
21
+
22
+ # langchain-google-genai
23
+
24
+ **LangChain integration for Google Gemini models using the `generative-ai` SDK**
25
+
26
+ This package enables seamless access to Google Gemini's chat, vision, embeddings, and retrieval-augmented generation (RAG) features within the LangChain ecosystem.
27
+
28
+ ---
29
+
30
+ ## Table of Contents
31
+
32
+ - [Overview](#overview)
33
+ - [Installation](#installation)
34
+ - [Quickstart](#quickstart)
35
+ - [Chat Models](#chat-models)
36
+ - [Multimodal Inputs](#multimodal-inputs)
37
+ - [Multimodal Outputs](#multimodal-outputs)
38
+ - [Multimodal Outputs in Chains](#multimodal-outputs-in-chains)
39
+ - [Thinking Support](#thinking-support)
40
+ - [Embeddings](#embeddings)
41
+ - [Semantic Retrieval (RAG)](#semantic-retrieval-rag)
42
+
43
+ ---
44
+
45
+ ## Overview
46
+
47
+ This package provides LangChain support for Google Gemini models (via the official [Google Generative AI SDK](https://googleapis.github.io/python-genai/)). It supports:
48
+
49
+ - Text and vision-based chat models
50
+ - Embeddings for semantic search
51
+ - Multimodal inputs and outputs
52
+ - Retrieval-Augmented Generation (RAG)
53
+ - Thought tracing with reasoning tokens
54
+
55
+ ---
56
+
57
+ ## Installation
58
+
59
+ ```bash
60
+ pip install -U langchain-google-genai
61
+ ````
62
+
63
+ ---
64
+
65
+ ## Quickstart
66
+
67
+ Set up your environment variable with your Gemini API key:
68
+
69
+ ```bash
70
+ export GOOGLE_API_KEY=your-api-key
71
+ ```
72
+
73
+ Then use the `ChatGoogleGenerativeAI` interface:
74
+
75
+ ```python
76
+ from langchain_google_genai import ChatGoogleGenerativeAI
77
+
78
+ llm = ChatGoogleGenerativeAI(model="gemini-pro")
79
+ response = llm.invoke("Sing a ballad of LangChain.")
80
+ print(response.content)
81
+ ```
82
+
83
+ ---
84
+
85
+ ## Chat Models
86
+
87
+ The main interface for Gemini chat models is `ChatGoogleGenerativeAI`.
88
+
89
+ ### Multimodal Inputs
90
+
91
+ Gemini vision models support image inputs in single messages.
92
+
93
+ ```python
94
+ from langchain_core.messages import HumanMessage
95
+ from langchain_google_genai import ChatGoogleGenerativeAI
96
+
97
+ llm = ChatGoogleGenerativeAI(model="gemini-pro-vision")
98
+
99
+ message = HumanMessage(
100
+ content=[
101
+ {"type": "text", "text": "What's in this image?"},
102
+ {"type": "image_url", "image_url": "https://picsum.photos/seed/picsum/200/300"},
103
+ ]
104
+ )
105
+
106
+ response = llm.invoke([message])
107
+ print(response.content)
108
+ ```
109
+
110
+ ✅ `image_url` can be:
111
+
112
+ * A public image URL
113
+ * A Google Cloud Storage path (`gcs://...`)
114
+ * A base64-encoded image (e.g., `data:image/png;base64,...`)
115
+
116
+ ---
117
+
118
+ ### Multimodal Outputs
119
+
120
+ The Gemini 2.0 Flash Experimental model supports both text and inline image outputs.
121
+
122
+ ```python
123
+ from langchain_google_genai import ChatGoogleGenerativeAI
124
+
125
+ llm = ChatGoogleGenerativeAI(model="models/gemini-2.0-flash-exp-image-generation")
126
+
127
+ response = llm.invoke(
128
+ "Generate an image of a cat and say meow",
129
+ generation_config=dict(response_modalities=["TEXT", "IMAGE"]),
130
+ )
131
+
132
+ image_base64 = response.content[0].get("image_url").get("url").split(",")[-1]
133
+ meow_text = response.content[1]
134
+ print(meow_text)
135
+ ```
136
+
137
+ ---
138
+
139
+ ### Audio Output
140
+
141
+ ```
142
+ from langchain_google_genai import ChatGoogleGenerativeAI
143
+
144
+ llm = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash-preview-tts")
145
+ # example
146
+ response = llm.invoke(
147
+ "Please say The quick brown fox jumps over the lazy dog",
148
+ generation_config=dict(response_modalities=["AUDIO"]),
149
+ )
150
+
151
+ # Base64 encoded binary data of the image
152
+ wav_data = response.additional_kwargs.get("audio")
153
+ with open("output.wav", "wb") as f:
154
+ f.write(wav_data)
155
+ ```
156
+
157
+ ---
158
+
159
+ ### Multimodal Outputs in Chains
160
+
161
+ You can use Gemini models in a LangChain chain:
162
+
163
+ ```python
164
+ from langchain_core.runnables import RunnablePassthrough
165
+ from langchain_core.prompts import ChatPromptTemplate
166
+ from langchain_google_genai import ChatGoogleGenerativeAI, Modality
167
+
168
+ llm = ChatGoogleGenerativeAI(
169
+ model="models/gemini-2.0-flash-exp-image-generation",
170
+ response_modalities=[Modality.TEXT, Modality.IMAGE],
171
+ )
172
+
173
+ prompt = ChatPromptTemplate.from_messages([
174
+ ("human", "Generate an image of {animal} and tell me the sound it makes.")
175
+ ])
176
+
177
+ chain = {"animal": RunnablePassthrough()} | prompt | llm
178
+ response = chain.invoke("cat")
179
+ ```
180
+
181
+ ---
182
+
183
+ ### Thinking Support
184
+
185
+ Gemini 2.5 Flash Preview supports internal reasoning ("thoughts").
186
+
187
+ ```python
188
+ from langchain_google_genai import ChatGoogleGenerativeAI
189
+
190
+ llm = ChatGoogleGenerativeAI(
191
+ model="models/gemini-2.5-flash-preview-04-17",
192
+ thinking_budget=1024
193
+ )
194
+
195
+ response = llm.invoke("How many O's are in Google? How did you verify your answer?")
196
+ reasoning_score = response.usage_metadata["output_token_details"]["reasoning"]
197
+
198
+ print("Response:", response.content)
199
+ print("Reasoning tokens used:", reasoning_score)
200
+ ```
201
+
202
+ ---
203
+
204
+ ## Embeddings
205
+
206
+ You can use Gemini embeddings in LangChain:
207
+
208
+ ```python
209
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
210
+
211
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
212
+ vector = embeddings.embed_query("hello, world!")
213
+ print(vector)
214
+ ```
215
+
216
+ ---
217
+
218
+ ## Semantic Retrieval (RAG)
219
+
220
+ Use Gemini with RAG to retrieve relevant documents from your knowledge base.
221
+
222
+ ```python
223
+ from langchain_google_genai.vectorstores import GoogleVectorStore
224
+ from langchain_text_splitters import CharacterTextSplitter
225
+ from langchain_community.document_loaders import DirectoryLoader
226
+
227
+ # Create a corpus (collection of documents)
228
+ corpus_store = GoogleVectorStore.create_corpus(display_name="My Corpus")
229
+
230
+ # Create a document under that corpus
231
+ document_store = GoogleVectorStore.create_document(
232
+ corpus_id=corpus_store.corpus_id, display_name="My Document"
233
+ )
234
+
235
+ # Load and upload documents
236
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
237
+ for file in DirectoryLoader(path="data/").load():
238
+ chunks = text_splitter.split_documents([file])
239
+ document_store.add_documents(chunks)
240
+
241
+ # Query the document corpus
242
+ aqa = corpus_store.as_aqa()
243
+ response = aqa.invoke("What is the meaning of life?")
244
+
245
+ print("Answer:", response.answer)
246
+ print("Passages:", response.attributed_passages)
247
+ print("Answerable probability:", response.answerable_probability)
248
+ ```
249
+
250
+ ---
251
+
252
+
253
+ ## Resources
254
+
255
+ * [LangChain Documentation](https://docs.langchain.com/)
256
+ * [Google Generative AI SDK](https://googleapis.github.io/python-genai/)
257
+ * [Gemini Model Documentation](https://ai.google.dev/)
258
+
259
+
260
+
@@ -1,16 +1,16 @@
1
1
  langchain_google_genai/__init__.py,sha256=IsTvA3UcECLDckt3zWxK6u-n3MEa5KeEQpqsS-Z8shM,2784
2
2
  langchain_google_genai/_common.py,sha256=TeAmAwcRPk_dTXiIQQubamHAhuKNb_XsWy7yllkKW4w,5936
3
3
  langchain_google_genai/_enums.py,sha256=Zj3BXXLlkm_UybegCi6fLsfFhriJCt_LAJvgatgPWQ0,252
4
- langchain_google_genai/_function_utils.py,sha256=KO5KGNcPZo17qksQNYBsLRWjms8zOKcNhGRIuSHeyFU,19257
4
+ langchain_google_genai/_function_utils.py,sha256=CkQyxfUVGqHfmZQwf4JtMXBsS9SOzk99Pa2TytnuLbE,21648
5
5
  langchain_google_genai/_genai_extension.py,sha256=81a4ly5ZHlqMf37uJfdB8K41qE6J5ujLnbUypIfFf2o,20775
6
6
  langchain_google_genai/_image_utils.py,sha256=tPrQyMvVmO8xkuow1SvA91omxUEv9ZUy1EMHNGjMAKY,5202
7
- langchain_google_genai/chat_models.py,sha256=mgphbM_X-aTus3rPIXATgg2zA3vE9sGP1bLw4z-9SdI,67808
8
- langchain_google_genai/embeddings.py,sha256=syN-GXcLAeuHEnF8Yqp2AQPD7rKEaR9l29jSLmt9dwM,10468
7
+ langchain_google_genai/chat_models.py,sha256=HtvdPkivOMKzHRHYZkwQxDOJBwpqN47Z2dPRDfYnbNo,72934
8
+ langchain_google_genai/embeddings.py,sha256=IwM6PBnuweABvYmzjgShOxrVffKbQnnIzPfKBFf8xms,10636
9
9
  langchain_google_genai/genai_aqa.py,sha256=qB6h3-BSXqe0YLR3eeVllYzmNKK6ofI6xJLdBahUVZo,4300
10
10
  langchain_google_genai/google_vector_store.py,sha256=4wvhIiOmc3Fo046FyafPmT9NBCLek-9bgluvuTfrbpQ,16148
11
11
  langchain_google_genai/llms.py,sha256=ASjrEk2T_1hUXVNJlfPB8PKC4PbhPe00H3_UHunMc_Q,5334
12
12
  langchain_google_genai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- langchain_google_genai-2.1.5.dist-info/LICENSE,sha256=DppmdYJVSc1jd0aio6ptnMUn5tIHrdAhQ12SclEBfBg,1072
14
- langchain_google_genai-2.1.5.dist-info/METADATA,sha256=tDwwkxn2yvypZUZVNbDqXWbB9M02G_eHPH1xh_wQBFw,5165
15
- langchain_google_genai-2.1.5.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
16
- langchain_google_genai-2.1.5.dist-info/RECORD,,
13
+ langchain_google_genai-2.1.7.dist-info/LICENSE,sha256=DppmdYJVSc1jd0aio6ptnMUn5tIHrdAhQ12SclEBfBg,1072
14
+ langchain_google_genai-2.1.7.dist-info/METADATA,sha256=eoFaqwS54B_f1bM25aItVsFdwFzlToTCpZHkx_1Vqw8,6995
15
+ langchain_google_genai-2.1.7.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
16
+ langchain_google_genai-2.1.7.dist-info/RECORD,,
@@ -1,174 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: langchain-google-genai
3
- Version: 2.1.5
4
- Summary: An integration package connecting Google's genai package and LangChain
5
- Home-page: https://github.com/langchain-ai/langchain-google
6
- License: MIT
7
- Requires-Python: >=3.9,<4.0
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.9
11
- Classifier: Programming Language :: Python :: 3.10
12
- Classifier: Programming Language :: Python :: 3.11
13
- Classifier: Programming Language :: Python :: 3.12
14
- Requires-Dist: filetype (>=1.2.0,<2.0.0)
15
- Requires-Dist: google-ai-generativelanguage (>=0.6.18,<0.7.0)
16
- Requires-Dist: langchain-core (>=0.3.62,<0.4.0)
17
- Requires-Dist: pydantic (>=2,<3)
18
- Project-URL: Repository, https://github.com/langchain-ai/langchain-google
19
- Project-URL: Source Code, https://github.com/langchain-ai/langchain-google/tree/main/libs/genai
20
- Description-Content-Type: text/markdown
21
-
22
- # langchain-google-genai
23
-
24
- This package contains the LangChain integrations for Gemini through their generative-ai SDK.
25
-
26
- ## Installation
27
-
28
- ```bash
29
- pip install -U langchain-google-genai
30
- ```
31
-
32
-
33
- ## Chat Models
34
-
35
- This package contains the `ChatGoogleGenerativeAI` class, which is the recommended way to interface with the Google Gemini series of models.
36
-
37
- To use, install the requirements, and configure your environment.
38
-
39
- ```bash
40
- export GOOGLE_API_KEY=your-api-key
41
- ```
42
-
43
- Then initialize
44
-
45
- ```python
46
- from langchain_google_genai import ChatGoogleGenerativeAI
47
-
48
- llm = ChatGoogleGenerativeAI(model="gemini-pro")
49
- llm.invoke("Sing a ballad of LangChain.")
50
- ```
51
-
52
- #### Multimodal inputs
53
-
54
- Gemini vision model supports image inputs when providing a single chat message. Example:
55
-
56
- ```
57
- from langchain_core.messages import HumanMessage
58
- from langchain_google_genai import ChatGoogleGenerativeAI
59
-
60
- llm = ChatGoogleGenerativeAI(model="gemini-pro-vision")
61
- # example
62
- message = HumanMessage(
63
- content=[
64
- {
65
- "type": "text",
66
- "text": "What's in this image?",
67
- }, # You can optionally provide text parts
68
- {"type": "image_url", "image_url": "https://picsum.photos/seed/picsum/200/300"},
69
- ]
70
- )
71
- llm.invoke([message])
72
- ```
73
-
74
- The value of `image_url` can be any of the following:
75
-
76
- - A public image URL
77
- - An accessible gcs file (e.g., "gcs://path/to/file.png")
78
- - A base64 encoded image (e.g., ``)
79
-
80
- #### Multimodal outputs
81
-
82
- Gemini 2.0 Flash Experimental model supports text output with inline images
83
-
84
- ```
85
- from langchain_google_genai import ChatGoogleGenerativeAI
86
-
87
- llm = ChatGoogleGenerativeAI(model="models/gemini-2.0-flash-exp-image-generation")
88
- # example
89
- response = llm.invoke(
90
- "Generate an image of a cat and say meow",
91
- generation_config=dict(response_modalities=["TEXT", "IMAGE"]),
92
- )
93
-
94
- # Base64 encoded binary data of the image
95
- image_base64 = response.content[0].get("image_url").get("url").split(",")[-1]
96
- meow_str = response.content[1]
97
- ```
98
-
99
- #### Multimodal Outputs in Chains
100
-
101
- ```
102
- from langchain_core.runnables import RunnablePassthrough
103
- from langchain_core.prompts import ChatPromptTemplate
104
-
105
- from langchain_google_genai import ChatGoogleGenerativeAI, Modality
106
-
107
- llm = ChatGoogleGenerativeAI(
108
- model="models/gemini-2.0-flash-exp-image-generation",
109
- response_modalities=[Modality.TEXT, Modality.IMAGE],
110
- )
111
-
112
- prompt = ChatPromptTemplate(
113
- [("human", "Generate an image of {animal} and tell me the sound of the animal")]
114
- )
115
- chain = {"animal": RunnablePassthrough()} | prompt | llm
116
- res = chain.invoke("cat")
117
- ```
118
-
119
- #### Thinking support
120
-
121
- Gemini 2.5 Flash model supports reasoning through their thoughts
122
-
123
- ```
124
- from langchain_google_genai import ChatGoogleGenerativeAI
125
-
126
- llm = ChatGoogleGenerativeAI(model="models/gemini-2.5-flash-preview-04-17", thinking_budget=1024)
127
-
128
- response = llm.invoke(
129
- "How many O's are in Google? Please tell me how you double checked the result"
130
- )
131
-
132
- assert response.usage_metadata["output_token_details"]["reasoning"] > 0
133
- ```
134
-
135
- ## Embeddings
136
-
137
- This package also adds support for google's embeddings models.
138
-
139
- ```
140
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
141
-
142
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
143
- embeddings.embed_query("hello, world!")
144
- ```
145
-
146
- ## Semantic Retrieval
147
-
148
- Enables retrieval augmented generation (RAG) in your application.
149
-
150
- ```
151
- # Create a new store for housing your documents.
152
- corpus_store = GoogleVectorStore.create_corpus(display_name="My Corpus")
153
-
154
- # Create a new document under the above corpus.
155
- document_store = GoogleVectorStore.create_document(
156
- corpus_id=corpus_store.corpus_id, display_name="My Document"
157
- )
158
-
159
- # Upload some texts to the document.
160
- text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
161
- for file in DirectoryLoader(path="data/").load():
162
- documents = text_splitter.split_documents([file])
163
- document_store.add_documents(documents)
164
-
165
- # Talk to your entire corpus with possibly many documents.
166
- aqa = corpus_store.as_aqa()
167
- answer = aqa.invoke("What is the meaning of life?")
168
-
169
- # Read the response along with the attributed passages and answerability.
170
- print(response.answer)
171
- print(response.attributed_passages)
172
- print(response.answerable_probability)
173
- ```
174
-