langchain-google-genai 1.0.4__tar.gz → 1.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain-google-genai might be problematic. Click here for more details.

Files changed (16) hide show
  1. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/PKG-INFO +2 -2
  2. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/chat_models.py +69 -9
  3. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/embeddings.py +98 -20
  4. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/pyproject.toml +5 -2
  5. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/LICENSE +0 -0
  6. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/README.md +0 -0
  7. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/__init__.py +0 -0
  8. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/_common.py +0 -0
  9. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/_enums.py +0 -0
  10. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/_function_utils.py +0 -0
  11. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/_genai_extension.py +0 -0
  12. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/_image_utils.py +0 -0
  13. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/genai_aqa.py +0 -0
  14. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/google_vector_store.py +0 -0
  15. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/llms.py +0 -0
  16. {langchain_google_genai-1.0.4 → langchain_google_genai-1.0.6}/langchain_google_genai/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langchain-google-genai
3
- Version: 1.0.4
3
+ Version: 1.0.6
4
4
  Summary: An integration package connecting Google's genai package and LangChain
5
5
  Home-page: https://github.com/langchain-ai/langchain-google
6
6
  License: MIT
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
14
  Provides-Extra: images
15
15
  Requires-Dist: google-generativeai (>=0.5.2,<0.6.0)
16
- Requires-Dist: langchain-core (>=0.1.45,<0.3)
16
+ Requires-Dist: langchain-core (>=0.2.2,<0.3)
17
17
  Requires-Dist: pillow (>=10.1.0,<11.0.0) ; extra == "images"
18
18
  Project-URL: Repository, https://github.com/langchain-ai/langchain-google
19
19
  Project-URL: Source Code, https://github.com/langchain-ai/langchain-google/tree/main/libs/genai
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  import base64
4
5
  import json
5
6
  import logging
@@ -50,7 +51,7 @@ from langchain_core.callbacks.manager import (
50
51
  CallbackManagerForLLMRun,
51
52
  )
52
53
  from langchain_core.language_models import LanguageModelInput
53
- from langchain_core.language_models.chat_models import BaseChatModel
54
+ from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
54
55
  from langchain_core.messages import (
55
56
  AIMessage,
56
57
  AIMessageChunk,
@@ -63,6 +64,7 @@ from langchain_core.messages import (
63
64
  ToolCallChunk,
64
65
  ToolMessage,
65
66
  )
67
+ from langchain_core.messages.ai import UsageMetadata
66
68
  from langchain_core.output_parsers.openai_tools import parse_tool_calls
67
69
  from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
68
70
  from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
@@ -525,6 +527,22 @@ def _response_to_result(
525
527
  """Converts a PaLM API response into a LangChain ChatResult."""
526
528
  llm_output = {"prompt_feedback": proto.Message.to_dict(response.prompt_feedback)}
527
529
 
530
+ # Get usage metadata
531
+ try:
532
+ input_tokens = response.usage_metadata.prompt_token_count
533
+ output_tokens = response.usage_metadata.candidates_token_count
534
+ total_tokens = response.usage_metadata.total_token_count
535
+ if input_tokens + output_tokens + total_tokens > 0:
536
+ lc_usage = UsageMetadata(
537
+ input_tokens=input_tokens,
538
+ output_tokens=output_tokens,
539
+ total_tokens=total_tokens,
540
+ )
541
+ else:
542
+ lc_usage = None
543
+ except AttributeError:
544
+ lc_usage = None
545
+
528
546
  generations: List[ChatGeneration] = []
529
547
 
530
548
  for candidate in response.candidates:
@@ -535,9 +553,11 @@ def _response_to_result(
535
553
  proto.Message.to_dict(safety_rating, use_integers_for_enums=False)
536
554
  for safety_rating in candidate.safety_ratings
537
555
  ]
556
+ message = _parse_response_candidate(candidate, streaming=stream)
557
+ message.usage_metadata = lc_usage
538
558
  generations.append(
539
559
  (ChatGenerationChunk if stream else ChatGeneration)(
540
- message=_parse_response_candidate(candidate, streaming=stream),
560
+ message=message,
541
561
  generation_info=generation_info,
542
562
  )
543
563
  )
@@ -558,6 +578,14 @@ def _response_to_result(
558
578
  return ChatResult(generations=generations, llm_output=llm_output)
559
579
 
560
580
 
581
+ def _is_event_loop_running() -> bool:
582
+ try:
583
+ asyncio.get_running_loop()
584
+ return True
585
+ except RuntimeError:
586
+ return False
587
+
588
+
561
589
  class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
562
590
  """`Google Generative AI` Chat models API.
563
591
 
@@ -639,13 +667,22 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
639
667
  client_options=values.get("client_options"),
640
668
  transport=transport,
641
669
  )
642
- values["async_client"] = genaix.build_generative_async_service(
643
- credentials=values.get("credentials"),
644
- api_key=google_api_key,
645
- client_info=client_info,
646
- client_options=values.get("client_options"),
647
- transport=transport,
648
- )
670
+
671
+ # NOTE: genaix.build_generative_async_service requires
672
+ # a running event loop, which causes an error
673
+ # when initialized inside a ThreadPoolExecutor.
674
+ # this check ensures that async client is only initialized
675
+ # within an asyncio event loop to avoid the error
676
+ if _is_event_loop_running():
677
+ values["async_client"] = genaix.build_generative_async_service(
678
+ credentials=values.get("credentials"),
679
+ api_key=google_api_key,
680
+ client_info=client_info,
681
+ client_options=values.get("client_options"),
682
+ transport=transport,
683
+ )
684
+ else:
685
+ values["async_client"] = None
649
686
 
650
687
  return values
651
688
 
@@ -660,6 +697,23 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
660
697
  "safety_settings": self.safety_settings,
661
698
  }
662
699
 
700
+ def _get_ls_params(
701
+ self, stop: Optional[List[str]] = None, **kwargs: Any
702
+ ) -> LangSmithParams:
703
+ """Get standard params for tracing."""
704
+ params = self._get_invocation_params(stop=stop, **kwargs)
705
+ ls_params = LangSmithParams(
706
+ ls_provider="google_genai",
707
+ ls_model_name=self.model,
708
+ ls_model_type="chat",
709
+ ls_temperature=params.get("temperature", self.temperature),
710
+ )
711
+ if ls_max_tokens := params.get("max_output_tokens", self.max_output_tokens):
712
+ ls_params["ls_max_tokens"] = ls_max_tokens
713
+ if ls_stop := stop or params.get("stop", None):
714
+ ls_params["ls_stop"] = ls_stop
715
+ return ls_params
716
+
663
717
  def _prepare_params(
664
718
  self,
665
719
  stop: Optional[List[str]],
@@ -724,6 +778,12 @@ class ChatGoogleGenerativeAI(_BaseGoogleGenerativeAI, BaseChatModel):
724
778
  generation_config: Optional[Dict[str, Any]] = None,
725
779
  **kwargs: Any,
726
780
  ) -> ChatResult:
781
+ if not self.async_client:
782
+ raise RuntimeError(
783
+ "Initialize ChatGoogleGenerativeAI with a running event loop "
784
+ "to use async methods."
785
+ )
786
+
727
787
  request = self._prepare_request(
728
788
  messages,
729
789
  stop=stop,
@@ -1,3 +1,5 @@
1
+ import re
2
+ import string
1
3
  from typing import Any, Dict, List, Optional
2
4
 
3
5
  # TODO: remove ignore once the google package is published with types
@@ -15,6 +17,9 @@ from langchain_google_genai._common import (
15
17
  )
16
18
  from langchain_google_genai._genai_extension import build_generative_service
17
19
 
20
+ _MAX_TOKENS_PER_BATCH = 20000
21
+ _DEFAULT_BATCH_SIZE = 100
22
+
18
23
 
19
24
  class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
20
25
  """`Google Generative AI Embeddings`.
@@ -91,6 +96,67 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
91
96
  )
92
97
  return values
93
98
 
99
+ @staticmethod
100
+ def _split_by_punctuation(text: str) -> List[str]:
101
+ """Splits a string by punctuation and whitespace characters."""
102
+ split_by = string.punctuation + "\t\n "
103
+ pattern = f"([{split_by}])"
104
+ # Using re.split to split the text based on the pattern
105
+ return [segment for segment in re.split(pattern, text) if segment]
106
+
107
+ @staticmethod
108
+ def _prepare_batches(texts: List[str], batch_size: int) -> List[List[str]]:
109
+ """Splits texts in batches based on current maximum batch size
110
+ and maximum tokens per request.
111
+ """
112
+ text_index = 0
113
+ texts_len = len(texts)
114
+ batch_token_len = 0
115
+ batches: List[List[str]] = []
116
+ current_batch: List[str] = []
117
+ if texts_len == 0:
118
+ return []
119
+ while text_index < texts_len:
120
+ current_text = texts[text_index]
121
+ # Number of tokens per a text is conservatively estimated
122
+ # as 2 times number of words, punctuation and whitespace characters.
123
+ # Using `count_tokens` API will make batching too expensive.
124
+ # Utilizing a tokenizer, would add a dependency that would not
125
+ # necessarily be reused by the application using this class.
126
+ current_text_token_cnt = (
127
+ len(GoogleGenerativeAIEmbeddings._split_by_punctuation(current_text))
128
+ * 2
129
+ )
130
+ end_of_batch = False
131
+ if current_text_token_cnt > _MAX_TOKENS_PER_BATCH:
132
+ # Current text is too big even for a single batch.
133
+ # Such request will fail, but we still make a batch
134
+ # so that the app can get the error from the API.
135
+ if len(current_batch) > 0:
136
+ # Adding current batch if not empty.
137
+ batches.append(current_batch)
138
+ current_batch = [current_text]
139
+ text_index += 1
140
+ end_of_batch = True
141
+ elif (
142
+ batch_token_len + current_text_token_cnt > _MAX_TOKENS_PER_BATCH
143
+ or len(current_batch) == batch_size
144
+ ):
145
+ end_of_batch = True
146
+ else:
147
+ if text_index == texts_len - 1:
148
+ # Last element - even though the batch may be not big,
149
+ # we still need to make it.
150
+ end_of_batch = True
151
+ batch_token_len += current_text_token_cnt
152
+ current_batch.append(current_text)
153
+ text_index += 1
154
+ if end_of_batch:
155
+ batches.append(current_batch)
156
+ current_batch = []
157
+ batch_token_len = 0
158
+ return batches
159
+
94
160
  def _prepare_request(
95
161
  self,
96
162
  text: str,
@@ -112,12 +178,14 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
112
178
  def embed_documents(
113
179
  self,
114
180
  texts: List[str],
181
+ *,
182
+ batch_size: int = _DEFAULT_BATCH_SIZE,
115
183
  task_type: Optional[str] = None,
116
184
  titles: Optional[List[str]] = None,
117
185
  output_dimensionality: Optional[int] = None,
118
186
  ) -> List[List[float]]:
119
- """Embed a list of strings. Vertex AI currently
120
- sets a max batch size of 5 strings.
187
+ """Embed a list of strings. Google Generative AI currently
188
+ sets a max batch size of 100 strings.
121
189
 
122
190
  Args:
123
191
  texts: List[str] The list of strings to embed.
@@ -127,28 +195,38 @@ class GoogleGenerativeAIEmbeddings(BaseModel, Embeddings):
127
195
  Only applicable when TaskType is RETRIEVAL_DOCUMENT.
128
196
  output_dimensionality: Optional reduced dimension for the output embedding.
129
197
  https://ai.google.dev/api/rest/v1/models/batchEmbedContents#EmbedContentRequest
130
-
131
198
  Returns:
132
199
  List of embeddings, one for each text.
133
200
  """
134
- titles = titles if titles else [None] * len(texts) # type: ignore[list-item]
135
- requests = [
136
- self._prepare_request(
137
- text=text,
138
- task_type=task_type,
139
- title=title,
140
- output_dimensionality=output_dimensionality,
141
- )
142
- for text, title in zip(texts, titles)
143
- ]
201
+ embeddings: List[List[float]] = []
202
+ batch_start_index = 0
203
+ for batch in GoogleGenerativeAIEmbeddings._prepare_batches(texts, batch_size):
204
+ if titles:
205
+ titles_batch = titles[
206
+ batch_start_index : batch_start_index + len(batch)
207
+ ]
208
+ batch_start_index += len(batch)
209
+ else:
210
+ titles_batch = [None] * len(batch) # type: ignore[list-item]
144
211
 
145
- try:
146
- result = self.client.batch_embed_contents(
147
- BatchEmbedContentsRequest(requests=requests, model=self.model)
148
- )
149
- except Exception as e:
150
- raise GoogleGenerativeAIError(f"Error embedding content: {e}") from e
151
- return [e.values for e in result.embeddings]
212
+ requests = [
213
+ self._prepare_request(
214
+ text=text,
215
+ task_type=task_type,
216
+ title=title,
217
+ output_dimensionality=output_dimensionality,
218
+ )
219
+ for text, title in zip(batch, titles_batch)
220
+ ]
221
+
222
+ try:
223
+ result = self.client.batch_embed_contents(
224
+ BatchEmbedContentsRequest(requests=requests, model=self.model)
225
+ )
226
+ except Exception as e:
227
+ raise GoogleGenerativeAIError(f"Error embedding content: {e}") from e
228
+ embeddings.extend([list(e.values) for e in result.embeddings])
229
+ return embeddings
152
230
 
153
231
  def embed_query(
154
232
  self,
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "langchain-google-genai"
3
- version = "1.0.4"
3
+ version = "1.0.6"
4
4
  description = "An integration package connecting Google's genai package and LangChain"
5
5
  authors = []
6
6
  readme = "README.md"
@@ -12,7 +12,7 @@ license = "MIT"
12
12
 
13
13
  [tool.poetry.dependencies]
14
14
  python = ">=3.9,<4.0"
15
- langchain-core = ">=0.1.45,<0.3"
15
+ langchain-core = ">=0.2.2,<0.3"
16
16
  google-generativeai = "^0.5.2"
17
17
  pillow = { version = "^10.1.0", optional = true }
18
18
 
@@ -32,6 +32,9 @@ pytest-asyncio = "^0.21.1"
32
32
  numpy = "^1.26.2"
33
33
  langchain-core = { git = "https://github.com/langchain-ai/langchain.git", subdirectory = "libs/core" }
34
34
 
35
+ [tool.codespell]
36
+ ignore-words-list = "rouge"
37
+
35
38
  [tool.poetry.group.codespell]
36
39
  optional = true
37
40