gaik 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gaik/parsers/vision.py ADDED
@@ -0,0 +1,365 @@
1
+ """Vision-enabled PDF to Markdown parsing utilities.
2
+
3
+ This module exposes :class:`VisionParser`, a helper that converts PDF pages to images
4
+ and sends them to OpenAI's vision-enabled chat completions (including Azure
5
+ OpenAI deployments).
6
+
7
+ Example
8
+ -------
9
+ >>> from gaik.parsers.vision import VisionParser, get_openai_config
10
+ >>> parser = VisionParser(get_openai_config(use_azure=True))
11
+ >>> markdown_pages = parser.convert_pdf("invoice.pdf")
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import base64
17
+ import logging
18
+ import os
19
+ from collections.abc import Iterable, Sequence
20
+ from dataclasses import dataclass
21
+ from io import BytesIO
22
+
23
+ try: # Optional dependency, documented via extra: gaik[vision]
24
+ from dotenv import load_dotenv as _load_dotenv # type: ignore
25
+ except ImportError: # pragma: no cover - optional dependency
26
+ _load_dotenv = None
27
+
28
+ try:
29
+ from openai import AzureOpenAI, OpenAI
30
+ except ImportError as exc: # pragma: no cover - optional dependency guard
31
+ raise ImportError(
32
+ "VisionParser requires the 'openai' package. Install extras with 'pip install gaik[vision]'"
33
+ ) from exc
34
+
35
+ try:
36
+ from pdf2image import convert_from_path
37
+ except ImportError as exc: # pragma: no cover - optional dependency guard
38
+ raise ImportError(
39
+ "VisionParser requires the 'pdf2image' package. Install extras with "
40
+ "'pip install gaik[vision]'"
41
+ ) from exc
42
+
43
+ try:
44
+ from PIL import Image
45
+ except ImportError as exc: # pragma: no cover - optional dependency guard
46
+ raise ImportError(
47
+ "VisionParser requires the 'Pillow' package. Install extras with 'pip install gaik[vision]'"
48
+ ) from exc
49
+
50
+ __all__ = ["OpenAIConfig", "VisionParser", "get_openai_config"]
51
+
52
+ logger = logging.getLogger(__name__)
53
+
54
+
55
+ def _load_env() -> None:
56
+ """Load environment variables from ``.env`` if python-dotenv is available."""
57
+
58
+ if _load_dotenv is not None:
59
+ _load_dotenv()
60
+
61
+
62
+ def _first_env(*keys: str) -> str | None:
63
+ """Return the first environment variable value that is set."""
64
+
65
+ for key in keys:
66
+ value = os.getenv(key)
67
+ if value:
68
+ return value
69
+ return None
70
+
71
+
72
+ @dataclass
73
+ class OpenAIConfig:
74
+ """Configuration for OpenAI or Azure OpenAI vision requests."""
75
+
76
+ model: str
77
+ use_azure: bool = True
78
+ api_key: str | None = None
79
+ azure_endpoint: str | None = None
80
+ azure_audio_endpoint: str | None = None
81
+ api_version: str | None = None
82
+
83
+ def azure_base_endpoint(self) -> str | None:
84
+ """Return the sanitized Azure endpoint without deployment path."""
85
+
86
+ if not self.azure_endpoint:
87
+ return None
88
+
89
+ endpoint = self.azure_endpoint
90
+ # Azure SDK expects the base endpoint, not deployment-specific.
91
+ if "/openai/deployments/" in endpoint:
92
+ endpoint = endpoint.split("/openai/deployments/")[0]
93
+ return endpoint.rstrip("?&")
94
+
95
+
96
+ def get_openai_config(use_azure: bool = True) -> OpenAIConfig:
97
+ """Build a default :class:`OpenAIConfig` from environment variables.
98
+
99
+ Parameters
100
+ ----------
101
+ use_azure:
102
+ Prefer Azure OpenAI environment variables when ``True``. When ``False``,
103
+ fall back to standard OpenAI API credentials.
104
+ """
105
+
106
+ _load_env()
107
+
108
+ if use_azure:
109
+ api_key = _first_env("AZURE_API_KEY", "AZURE_OPENAI_API_KEY")
110
+ endpoint = _first_env("AZURE_ENDPOINT", "AZURE_OPENAI_ENDPOINT", "AZURE_OPENAI_BASE")
111
+ api_version = _first_env(
112
+ "AZURE_API_VERSION",
113
+ "AZURE_OPENAI_API_VERSION",
114
+ "2024-12-01-preview",
115
+ )
116
+ model = _first_env(
117
+ "AZURE_DEPLOYMENT", "AZURE_OPENAI_DEPLOYMENT", "AZURE_OPENAI_MODEL", "gpt-4.1"
118
+ )
119
+ return OpenAIConfig(
120
+ use_azure=True,
121
+ api_key=api_key,
122
+ azure_endpoint=endpoint,
123
+ api_version=api_version,
124
+ model=model or "gpt-4.1",
125
+ )
126
+
127
+ api_key = _first_env("OPENAI_API_KEY")
128
+ model = _first_env("OPENAI_MODEL", "gpt-4o-2024-11-20") or "gpt-4o-2024-11-20"
129
+ return OpenAIConfig(
130
+ use_azure=False,
131
+ api_key=api_key,
132
+ model=model,
133
+ )
134
+
135
+
136
+ class VisionParser:
137
+ """Convert PDFs to Markdown using OpenAI vision models."""
138
+
139
+ def __init__(
140
+ self,
141
+ openai_config: OpenAIConfig,
142
+ *,
143
+ custom_prompt: str | None = None,
144
+ poppler_path: str | None = None,
145
+ use_context: bool = True,
146
+ max_tokens: int = 16_000,
147
+ temperature: float = 0.0,
148
+ ) -> None:
149
+ self.config = openai_config
150
+ self.custom_prompt = custom_prompt or self._default_prompt()
151
+ self.poppler_path = poppler_path
152
+ self.use_context = use_context
153
+ self.max_tokens = max_tokens
154
+ self.temperature = temperature
155
+ self._client = self._initialize_client()
156
+
157
+ # ---------------------------------------------------------------------
158
+ # Public API
159
+ # ---------------------------------------------------------------------
160
+ def convert_pdf(self, pdf_path: str, *, dpi: int = 200, clean_output: bool = True) -> list[str]:
161
+ """Convert a PDF into Markdown pages.
162
+
163
+ Parameters
164
+ ----------
165
+ pdf_path:
166
+ Absolute or relative path to the PDF.
167
+ dpi:
168
+ Rendering resolution for the PDF to image conversion (default ``200``).
169
+ clean_output:
170
+ When ``True`` merge and clean multi-page output via a post-processing
171
+ LLM call.
172
+ """
173
+
174
+ images = self._pdf_to_images(pdf_path, dpi=dpi)
175
+ markdown_pages: list[str] = []
176
+
177
+ for index, image in enumerate(images, start=1):
178
+ context = markdown_pages[-1] if (markdown_pages and self.use_context) else None
179
+ markdown = self._parse_image(image, page=index, previous_context=context)
180
+ markdown_pages.append(markdown)
181
+
182
+ if clean_output and len(markdown_pages) > 1:
183
+ return [self._clean_markdown(markdown_pages)]
184
+ return markdown_pages
185
+
186
+ def save_markdown(
187
+ self,
188
+ markdown_pages: Sequence[str],
189
+ output_path: str,
190
+ *,
191
+ separator: str = "\n\n---\n\n",
192
+ ) -> None:
193
+ """Persist Markdown pages to disk."""
194
+
195
+ if len(markdown_pages) == 1:
196
+ payload = markdown_pages[0]
197
+ else:
198
+ payload = separator.join(markdown_pages)
199
+
200
+ os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
201
+ with open(output_path, "w", encoding="utf-8") as handle:
202
+ handle.write(payload)
203
+ logger.info("Markdown saved to %s", output_path)
204
+
205
+ # ------------------------------------------------------------------
206
+ # Internal helpers
207
+ # ------------------------------------------------------------------
208
+ def _initialize_client(self) -> AzureOpenAI | OpenAI:
209
+ config = self.config
210
+
211
+ if not config.api_key:
212
+ raise ValueError(
213
+ "OpenAI API key is required. Provide it in OpenAIConfig or via env vars."
214
+ )
215
+
216
+ if config.use_azure:
217
+ endpoint = config.azure_base_endpoint()
218
+ if not endpoint:
219
+ raise ValueError(
220
+ "Azure endpoint is required when use_azure=True. Set 'azure_endpoint' "
221
+ "in OpenAIConfig"
222
+ )
223
+
224
+ if not config.api_version:
225
+ raise ValueError("Azure API version is required when use_azure=True.")
226
+
227
+ logger.debug("Initializing Azure OpenAI client for endpoint %s", endpoint)
228
+ return AzureOpenAI(
229
+ api_key=config.api_key,
230
+ api_version=config.api_version,
231
+ azure_endpoint=endpoint,
232
+ )
233
+
234
+ logger.debug("Initializing standard OpenAI client")
235
+ return OpenAI(api_key=config.api_key)
236
+
237
+ def _pdf_to_images(self, pdf_path: str, *, dpi: int) -> Iterable[Image.Image]:
238
+ logger.info("Converting PDF %s to images at %s DPI", pdf_path, dpi)
239
+ images = convert_from_path(pdf_path, dpi=dpi, poppler_path=self.poppler_path)
240
+ logger.debug("Converted %s pages", len(images))
241
+ return images
242
+
243
+ def _image_to_base64(self, image: Image.Image) -> str:
244
+ buffer = BytesIO()
245
+ image.save(buffer, format="PNG")
246
+ return base64.b64encode(buffer.getvalue()).decode("utf-8")
247
+
248
+ def _parse_image(
249
+ self,
250
+ image: Image.Image,
251
+ *,
252
+ page: int,
253
+ previous_context: str | None,
254
+ ) -> str:
255
+ logger.info("Parsing page %s", page)
256
+
257
+ payload = [
258
+ {
259
+ "type": "text",
260
+ "text": self._build_prompt(previous_context),
261
+ },
262
+ {
263
+ "type": "image_url",
264
+ "image_url": {"url": f"data:image/png;base64,{self._image_to_base64(image)}"},
265
+ },
266
+ ]
267
+
268
+ response = self._client.chat.completions.create(
269
+ model=self.config.model,
270
+ messages=[{"role": "user", "content": payload}],
271
+ max_tokens=self.max_tokens,
272
+ temperature=self.temperature,
273
+ )
274
+
275
+ content = response.choices[0].message.content
276
+ if content is None:
277
+ raise RuntimeError("Vision model returned empty content")
278
+ return content
279
+
280
+ def _clean_markdown(self, markdown_pages: Sequence[str]) -> str:
281
+ logger.info("Cleaning and merging markdown output")
282
+
283
+ combined = "\n\n---PAGE_BREAK---\n\n".join(markdown_pages)
284
+ cleanup_prompt = self._cleanup_prompt().format(markdown=combined)
285
+
286
+ response = self._client.chat.completions.create(
287
+ model=self.config.model,
288
+ messages=[{"role": "user", "content": cleanup_prompt}],
289
+ max_tokens=self.max_tokens,
290
+ temperature=self.temperature,
291
+ )
292
+
293
+ content = response.choices[0].message.content
294
+ if not content:
295
+ raise RuntimeError("Cleanup LLM returned empty output")
296
+
297
+ trimmed = content.strip()
298
+ if trimmed.startswith("```"):
299
+ trimmed = trimmed.strip("`").strip()
300
+ return trimmed
301
+
302
+ def _build_prompt(self, previous_context: str | None) -> str:
303
+ if not (previous_context and self.use_context):
304
+ return self.custom_prompt
305
+
306
+ tail = previous_context[-500:]
307
+ return (
308
+ f"{self.custom_prompt}\n\n"
309
+ "CONTEXT FROM PREVIOUS PAGE:\n"
310
+ "The previous page ended with the following content (last 500 characters):\n"
311
+ "```\n"
312
+ f"{tail}\n"
313
+ "```\n\n"
314
+ "If this page continues a table or section from the previous page, "
315
+ "continue it seamlessly without repeating headers."
316
+ )
317
+
318
+ @staticmethod
319
+ def _default_prompt() -> str:
320
+ return (
321
+ "Please convert this document page to markdown format with the following "
322
+ "requirements:\n\n"
323
+ "1. Preserve ALL content exactly as it appears\n"
324
+ "2. Maintain the document structure and hierarchy\n"
325
+ "3. For tables:\n"
326
+ " - Use proper markdown table syntax with | separators\n"
327
+ " - If this page continues a table from the previous page, continue the table "
328
+ "seamlessly\n"
329
+ " - Do NOT repeat table headers unless they appear on this page\n"
330
+ " - Preserve multi-row cells by repeating content or using appropriate formatting\n"
331
+ " - Maintain column alignment\n"
332
+ " - Keep all headers and data intact\n"
333
+ " - For item descriptions or notes within table cells, keep them in the same row\n"
334
+ "4. Preserve formatting like bold, italic, lists, etc.\n"
335
+ "5. For images or charts, describe them briefly in [Image: description] format\n"
336
+ "6. Maintain the reading order and layout flow\n"
337
+ "7. Keep numbers, dates, and special characters exactly as shown\n\n"
338
+ "Return ONLY the markdown content, no explanations."
339
+ )
340
+
341
+ @staticmethod
342
+ def _cleanup_prompt() -> str:
343
+ return (
344
+ "You are a document processing expert. Clean up and merge this multi-page markdown "
345
+ "document.\n\n"
346
+ "TASKS:\n"
347
+ "1. **Remove artifacts**: Delete any empty table rows or hallucinated content "
348
+ "(rows with only pipe separators and no data)\n"
349
+ "2. **Merge broken tables**: When a table continues across pages (separated by "
350
+ "---PAGE_BREAK---):\n"
351
+ " - Keep only ONE table header\n"
352
+ " - Merge all data rows into a single continuous table\n"
353
+ " - Remove page break markers within tables\n"
354
+ "3. **Handle incomplete rows**: If a table row is split across pages, merge it into a "
355
+ "complete row\n"
356
+ "4. **Preserve all real content**: Keep all actual data, headers, footers, and text\n"
357
+ "5. **Clean up formatting**: Ensure proper markdown syntax throughout\n"
358
+ "6. **Do NOT hallucinate**: Only output what you see in the input\n\n"
359
+ "INPUT MARKDOWN:\n"
360
+ "```markdown\n"
361
+ "{markdown}\n"
362
+ "```\n\n"
363
+ "OUTPUT: Return ONLY the cleaned, merged markdown. No explanations, no code block "
364
+ "wrappers."
365
+ )
@@ -0,0 +1,63 @@
1
+ """LLM provider registry for GAIK toolkit.
2
+
3
+ This module provides a unified interface for working with different LLM providers.
4
+ All GAIK modules (extract, summarize, etc.) use this shared provider infrastructure.
5
+
6
+ Available providers:
7
+ - openai: OpenAI GPT models (default: gpt-4.1)
8
+ - anthropic: Anthropic Claude models (default: claude-sonnet-4-5-20250929)
9
+ - azure: Azure OpenAI models (default: gpt-4.1)
10
+ - google: Google Gemini models (default: gemini-2.5-flash)
11
+
12
+ Example:
13
+ >>> from gaik.providers import get_provider
14
+ >>> provider = get_provider("openai")
15
+ >>> model = provider.create_chat_model(model="gpt-4o")
16
+ """
17
+
18
+ from .anthropic import AnthropicProvider
19
+ from .azure import AzureProvider
20
+ from .base import LLMProvider
21
+ from .google import GoogleProvider
22
+ from .openai import OpenAIProvider
23
+
24
+ # Provider registry
25
+ PROVIDERS: dict[str, LLMProvider] = {
26
+ "openai": OpenAIProvider(),
27
+ "anthropic": AnthropicProvider(),
28
+ "azure": AzureProvider(),
29
+ "google": GoogleProvider(),
30
+ }
31
+
32
+
33
+ def get_provider(name: str) -> LLMProvider:
34
+ """Get provider instance by name.
35
+
36
+ Args:
37
+ name: Provider name (e.g., "openai", "anthropic", "azure", "google")
38
+
39
+ Returns:
40
+ LLMProvider: Provider instance
41
+
42
+ Raises:
43
+ ValueError: If provider name is not recognized
44
+
45
+ Example:
46
+ >>> provider = get_provider("anthropic")
47
+ >>> model = provider.create_chat_model()
48
+ """
49
+ if name not in PROVIDERS:
50
+ available = ", ".join(PROVIDERS.keys())
51
+ raise ValueError(f"Unknown provider: '{name}'. Available providers: {available}")
52
+ return PROVIDERS[name]
53
+
54
+
55
+ __all__ = [
56
+ "LLMProvider",
57
+ "OpenAIProvider",
58
+ "AnthropicProvider",
59
+ "AzureProvider",
60
+ "GoogleProvider",
61
+ "PROVIDERS",
62
+ "get_provider",
63
+ ]
@@ -0,0 +1,54 @@
1
+ """Anthropic provider implementation."""
2
+
3
+ from typing import Any
4
+
5
+ from langchain_anthropic import ChatAnthropic
6
+ from langchain_core.language_models import BaseChatModel
7
+
8
+ from .base import LLMProvider, _build_model_kwargs
9
+
10
+
11
+ class AnthropicProvider(LLMProvider):
12
+ """Anthropic LLM provider using LangChain's ChatAnthropic.
13
+
14
+ Supports Claude models including Claude 3.5 Sonnet, Claude 3 Opus, and others.
15
+ Requires ANTHROPIC_API_KEY environment variable or api_key parameter.
16
+ """
17
+
18
+ @property
19
+ def default_model(self) -> str:
20
+ """Return Anthropic's default model.
21
+
22
+ Returns:
23
+ str: "claude-sonnet-4-5-20250929" (Claude Sonnet 4.5)
24
+ """
25
+ return "claude-sonnet-4-5-20250929"
26
+
27
+ def create_chat_model(
28
+ self,
29
+ model: str | None = None,
30
+ api_key: str | None = None,
31
+ **kwargs: Any,
32
+ ) -> BaseChatModel:
33
+ """Create Anthropic chat model instance.
34
+
35
+ Args:
36
+ model: Model name (e.g., "claude-sonnet-4-5-20250929", "claude-3-5-sonnet-20241022").
37
+ Defaults to "claude-sonnet-4-5-20250929".
38
+ api_key: Anthropic API key. If None, uses ANTHROPIC_API_KEY environment variable.
39
+ **kwargs: Additional parameters passed to ChatAnthropic (e.g., temperature, max_tokens).
40
+
41
+ Returns:
42
+ ChatAnthropic: Configured Anthropic chat model
43
+
44
+ Example:
45
+ >>> provider = AnthropicProvider()
46
+ >>> model = provider.create_chat_model(
47
+ ... model="claude-3-5-sonnet-20241022",
48
+ ... temperature=0.7
49
+ ... )
50
+ """
51
+ model_kwargs = _build_model_kwargs(
52
+ model=model or self.default_model, api_key=api_key, **kwargs
53
+ )
54
+ return ChatAnthropic(**model_kwargs)
@@ -0,0 +1,62 @@
1
+ """Azure OpenAI provider implementation."""
2
+
3
+ from typing import Any
4
+
5
+ from langchain_core.language_models import BaseChatModel
6
+ from langchain_openai import AzureChatOpenAI
7
+
8
+ from .base import LLMProvider, _build_model_kwargs
9
+
10
+
11
+ class AzureProvider(LLMProvider):
12
+ """Azure OpenAI LLM provider using LangChain's AzureChatOpenAI.
13
+
14
+ Supports OpenAI models deployed on Azure. Requires:
15
+ - AZURE_OPENAI_API_KEY environment variable or api_key parameter
16
+ - AZURE_OPENAI_ENDPOINT environment variable or azure_endpoint parameter
17
+ - azure_deployment parameter (deployment name in Azure)
18
+ """
19
+
20
+ @property
21
+ def default_model(self) -> str:
22
+ """Return Azure OpenAI's default model.
23
+
24
+ Returns:
25
+ str: "gpt-4.1"
26
+ """
27
+ return "gpt-4.1"
28
+
29
+ def create_chat_model(
30
+ self,
31
+ model: str | None = None,
32
+ api_key: str | None = None,
33
+ **kwargs: Any,
34
+ ) -> BaseChatModel:
35
+ """Create Azure OpenAI chat model instance.
36
+
37
+ Args:
38
+ model: Model name (e.g., "gpt-4.1", "gpt-4o", "gpt-35-turbo").
39
+ Defaults to "gpt-4.1".
40
+ api_key: Azure OpenAI API key. If None, uses AZURE_OPENAI_API_KEY environment variable.
41
+ **kwargs: Additional parameters passed to AzureChatOpenAI:
42
+ - azure_endpoint (str): Azure OpenAI endpoint URL
43
+ - azure_deployment (str): Deployment name in Azure
44
+ - api_version (str): API version (default: "2024-02-01")
45
+ - temperature, max_tokens, etc.
46
+
47
+ Returns:
48
+ AzureChatOpenAI: Configured Azure OpenAI chat model
49
+
50
+ Example:
51
+ >>> provider = AzureProvider()
52
+ >>> model = provider.create_chat_model(
53
+ ... model="gpt-4o",
54
+ ... azure_endpoint="https://your-resource.openai.azure.com/",
55
+ ... azure_deployment="gpt-4o-deployment",
56
+ ... api_key="your-api-key"
57
+ ... )
58
+ """
59
+ model_kwargs = _build_model_kwargs(
60
+ model=model or self.default_model, api_key=api_key, **kwargs
61
+ )
62
+ return AzureChatOpenAI(**model_kwargs)
gaik/providers/base.py ADDED
@@ -0,0 +1,67 @@
1
+ """Abstract base class for LLM providers."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any
5
+
6
+ from langchain_core.language_models import BaseChatModel
7
+
8
+
9
+ def _build_model_kwargs(
10
+ model: str,
11
+ api_key: str | None = None,
12
+ **kwargs: Any,
13
+ ) -> dict[str, Any]:
14
+ """Build kwargs dict for LangChain model initialization.
15
+
16
+ Only includes api_key if explicitly provided (not None), allowing
17
+ LangChain models to fall back to environment variables.
18
+
19
+ Args:
20
+ model: Model name to use
21
+ api_key: Optional API key. If None, not included in kwargs.
22
+ **kwargs: Additional model parameters
23
+
24
+ Returns:
25
+ dict: Keyword arguments for model initialization
26
+ """
27
+ model_kwargs = {"model": model, **kwargs}
28
+ if api_key is not None:
29
+ model_kwargs["api_key"] = api_key
30
+ return model_kwargs
31
+
32
+
33
+ class LLMProvider(ABC):
34
+ """Abstract base class for LLM providers.
35
+
36
+ All provider implementations must inherit from this class and implement
37
+ the required methods. This ensures consistent interface across all providers.
38
+ """
39
+
40
+ @property
41
+ @abstractmethod
42
+ def default_model(self) -> str:
43
+ """Return the default model name for this provider.
44
+
45
+ Returns:
46
+ str: The default model identifier
47
+ """
48
+ pass
49
+
50
+ @abstractmethod
51
+ def create_chat_model(
52
+ self,
53
+ model: str | None = None,
54
+ api_key: str | None = None,
55
+ **kwargs: Any,
56
+ ) -> BaseChatModel:
57
+ """Create a LangChain chat model instance.
58
+
59
+ Args:
60
+ model: Model name. If None, uses the provider's default model.
61
+ api_key: API key for authentication. If None, uses environment variable.
62
+ **kwargs: Additional provider-specific parameters.
63
+
64
+ Returns:
65
+ BaseChatModel: Configured LangChain chat model instance
66
+ """
67
+ pass
@@ -0,0 +1,52 @@
1
+ """Google provider implementation."""
2
+
3
+ from typing import Any
4
+
5
+ from langchain_core.language_models import BaseChatModel
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+
8
+ from .base import LLMProvider, _build_model_kwargs
9
+
10
+
11
+ class GoogleProvider(LLMProvider):
12
+ """Google LLM provider using LangChain's ChatGoogleGenerativeAI.
13
+
14
+ Supports Google's Gemini models including Gemini 2.5 and Gemini 1.5.
15
+ Requires GOOGLE_API_KEY environment variable or api_key parameter.
16
+ """
17
+
18
+ @property
19
+ def default_model(self) -> str:
20
+ """Return Google's default model.
21
+
22
+ Returns:
23
+ str: "gemini-2.5-flash" (Gemini 2.5)
24
+ """
25
+ return "gemini-2.5-flash"
26
+
27
+ def create_chat_model(
28
+ self,
29
+ model: str | None = None,
30
+ api_key: str | None = None,
31
+ **kwargs: Any,
32
+ ) -> BaseChatModel:
33
+ """Create Google chat model instance.
34
+
35
+ Args:
36
+ model: Model name (e.g., "gemini-2.5-flash", "gemini-2.5-flash").
37
+ Defaults to "gemini-2.5-flash".
38
+ api_key: Google API key. If None, uses GOOGLE_API_KEY environment variable.
39
+ **kwargs: Additional parameters passed to ChatGoogleGenerativeAI
40
+ (e.g., temperature, max_tokens).
41
+
42
+ Returns:
43
+ ChatGoogleGenerativeAI: Configured Google chat model
44
+
45
+ Example:
46
+ >>> provider = GoogleProvider()
47
+ >>> model = provider.create_chat_model(model="gemini-2.5-flash", temperature=0.7)
48
+ """
49
+ model_kwargs = _build_model_kwargs(
50
+ model=model or self.default_model, api_key=api_key, **kwargs
51
+ )
52
+ return ChatGoogleGenerativeAI(**model_kwargs)