crewplus 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crewplus might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: crewplus
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Base services for CrewPlus AI applications
5
5
  Author-Email: Tim Liu <tim@opsmateai.com>
6
6
  License: MIT
@@ -12,6 +12,9 @@ Requires-Python: <4.0,>=3.11
12
12
  Requires-Dist: langchain==0.3.25
13
13
  Requires-Dist: langchain-openai==0.3.24
14
14
  Requires-Dist: google-genai==1.21.1
15
+ Requires-Dist: mkdocs<2.0.0,>=1.6.1
16
+ Requires-Dist: mkdocs-material<10.0.0,>=9.6.14
17
+ Requires-Dist: mkdocstrings-python<2.0.0,>=1.16.12
15
18
  Description-Content-Type: text/markdown
16
19
 
17
20
  # CrewPlus
@@ -44,6 +47,12 @@ CrewPlus is designed as a modular and extensible ecosystem of packages. This all
44
47
  - **Vector DB Services:** Abstractions for working with popular vector stores for retrieval-augmented generation (RAG).
45
48
  - **Centralized Configuration:** Manage application settings and secrets from a single source of truth (`core/config.py`).
46
49
 
50
+ ## Documentation
51
+
52
+ For detailed guides and API references, please see the `docs/` folder.
53
+
54
+ - **[GeminiChatModel Documentation](./docs/GeminiChatModel.md)**: A comprehensive guide to using the `GeminiChatModel` for text, image, and video understanding.
55
+
47
56
  ## Installation
48
57
 
49
58
  To install the core `crewplus` package, run the following command:
@@ -85,9 +94,6 @@ crewplus-base/ # GitHub repo name
85
94
  │ └── __init__.py
86
95
  │ └── gemini_chat_model.py
87
96
  │ └── model_load_balancer.py
88
- │ └── vdb_service.py
89
- │ └── ...
90
- │ └── vectorstores/
91
97
  │ └── ...
92
98
  │ └── core/
93
99
  │ └── __init__.py
@@ -95,6 +101,8 @@ crewplus-base/ # GitHub repo name
95
101
  │ └── ...
96
102
  ├── tests/
97
103
  │ └── ...
104
+ ├── docs/
105
+ │ └── ...
98
106
  └── notebooks/
99
107
  └── ...
100
108
 
@@ -28,6 +28,12 @@ CrewPlus is designed as a modular and extensible ecosystem of packages. This all
28
28
  - **Vector DB Services:** Abstractions for working with popular vector stores for retrieval-augmented generation (RAG).
29
29
  - **Centralized Configuration:** Manage application settings and secrets from a single source of truth (`core/config.py`).
30
30
 
31
+ ## Documentation
32
+
33
+ For detailed guides and API references, please see the `docs/` folder.
34
+
35
+ - **[GeminiChatModel Documentation](./docs/GeminiChatModel.md)**: A comprehensive guide to using the `GeminiChatModel` for text, image, and video understanding.
36
+
31
37
  ## Installation
32
38
 
33
39
  To install the core `crewplus` package, run the following command:
@@ -69,9 +75,6 @@ crewplus-base/ # GitHub repo name
69
75
  │ └── __init__.py
70
76
  │ └── gemini_chat_model.py
71
77
  │ └── model_load_balancer.py
72
- │ └── vdb_service.py
73
- │ └── ...
74
- │ └── vectorstores/
75
78
  │ └── ...
76
79
  │ └── core/
77
80
  │ └── __init__.py
@@ -79,6 +82,8 @@ crewplus-base/ # GitHub repo name
79
82
  │ └── ...
80
83
  ├── tests/
81
84
  │ └── ...
85
+ ├── docs/
86
+ │ └── ...
82
87
  └── notebooks/
83
88
  └── ...
84
89
 
@@ -0,0 +1,590 @@
1
+ import os
2
+ import asyncio
3
+ import logging
4
+ from typing import Any, Dict, Iterator, List, Optional, AsyncIterator, Union, Tuple
5
+ from google import genai
6
+ from google.genai import types
7
+ import base64
8
+ import requests
9
+ from langchain_core.language_models import BaseChatModel
10
+ from langchain_core.messages import (
11
+ AIMessage,
12
+ AIMessageChunk,
13
+ BaseMessage,
14
+ HumanMessage,
15
+ SystemMessage,
16
+ )
17
+ from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
18
+ from langchain_core.callbacks import (
19
+ CallbackManagerForLLMRun,
20
+ AsyncCallbackManagerForLLMRun
21
+ )
22
+ from pydantic import Field, SecretStr
23
+ from langchain_core.utils import convert_to_secret_str
24
+
25
+ class GeminiChatModel(BaseChatModel):
26
+ """Custom chat model for Google Gemini, supporting text, image, and video.
27
+
28
+ This model provides a robust interface to Google's Gemini Pro and Flash models,
29
+ handling various data formats for multimodal inputs while maintaining compatibility
30
+ with the LangChain ecosystem.
31
+
32
+ It supports standard invocation, streaming, and asynchronous operations.
33
+ API keys can be provided directly or loaded from the `GOOGLE_API_KEY`
34
+ environment variable.
35
+
36
+ Attributes:
37
+ model_name (str): The Google model name to use (e.g., "gemini-1.5-flash").
38
+ google_api_key (Optional[SecretStr]): Your Google API key.
39
+ temperature (Optional[float]): The sampling temperature for generation.
40
+ max_tokens (Optional[int]): The maximum number of tokens to generate.
41
+ top_p (Optional[float]): The top-p (nucleus) sampling parameter.
42
+ top_k (Optional[int]): The top-k sampling parameter.
43
+ logger (Optional[logging.Logger]): An optional logger instance.
44
+
45
+ Example:
46
+ .. code-block:: python
47
+
48
+ from crewplus.services import GeminiChatModel
49
+ from langchain_core.messages import HumanMessage
50
+ import base64
51
+ import logging
52
+
53
+ # Initialize the model with optional logger
54
+ logger = logging.getLogger("my_app.gemini")
55
+ model = GeminiChatModel(model_name="gemini-2.0-flash", logger=logger)
56
+
57
+ # --- Text-only usage ---
58
+ response = model.invoke("Hello, how are you?")
59
+ print("Text response:", response.content)
60
+
61
+ # --- Image processing with base64 data URI ---
62
+ # Replace with a path to your image
63
+ image_path = "path/to/your/image.jpg"
64
+ try:
65
+ with open(image_path, "rb") as image_file:
66
+ encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
67
+
68
+ image_message = HumanMessage(
69
+ content=[
70
+ {"type": "text", "text": "What is in this image?"},
71
+ {
72
+ "type": "image_url",
73
+ "image_url": {
74
+ "url": f"data:image/jpeg;base64,{encoded_string}"
75
+ }
76
+ },
77
+ ]
78
+ )
79
+ image_response = model.invoke([image_message])
80
+ print("Image response (base64):", image_response.content)
81
+ except FileNotFoundError:
82
+ print(f"Image file not found at {image_path}, skipping base64 example.")
83
+
84
+
85
+ # --- Image processing with URL ---
86
+ url_message = HumanMessage(
87
+ content=[
88
+ {"type": "text", "text": "Describe this image:"},
89
+ {
90
+ "type": "image_url",
91
+ "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
92
+ },
93
+ ]
94
+ )
95
+ url_response = model.invoke([url_message])
96
+ print("Image response (URL):", url_response.content)
97
+
98
+ # --- Video processing with file path (>=20MB) ---
99
+ video_path = "path/to/your/video.mp4"
100
+ video_file = client.files.upload(file=video_path)
101
+
102
+ try:
103
+ video_message = HumanMessage(
104
+ content=[
105
+ {"type": "text", "text": "Summarize this video."},
106
+ {"type": "video_file", "file": video_file},
107
+ ]
108
+ )
109
+ video_response = model.invoke([video_message])
110
+ print("Video response (file path):", video_response.content)
111
+ except Exception as e:
112
+ print(f"Video processing with file path failed: {e}")
113
+
114
+ # --- Video processing with raw bytes (<20MB) ---
115
+ video_path = "path/to/your/video.mp4"
116
+ try:
117
+ with open(video_path, "rb") as video_file:
118
+ video_bytes = video_file.read()
119
+
120
+ video_message = HumanMessage(
121
+ content=[
122
+ {"type": "text", "text": "What is happening in this video?"},
123
+ {
124
+ "type": "video_file",
125
+ "data": video_bytes,
126
+ "mime_type": "video/mp4"
127
+ },
128
+ ]
129
+ )
130
+ video_response = model.invoke([video_message])
131
+ print("Video response (bytes):", video_response.content)
132
+ except FileNotFoundError:
133
+ print(f"Video file not found at {video_path}, skipping bytes example.")
134
+ except Exception as e:
135
+ print(f"Video processing with bytes failed: {e}")
136
+
137
+ # --- Streaming usage (works with text, images, and video) ---
138
+ print("Streaming response:")
139
+ for chunk in model.stream([url_message]):
140
+ print(chunk.content, end="", flush=True)
141
+ """
142
+
143
+ # Model configuration
144
+ model_name: str = Field(default="gemini-2.0-flash", description="The Google model name to use")
145
+ google_api_key: Optional[SecretStr] = Field(default=None, description="Google API key")
146
+ temperature: Optional[float] = Field(default=0.7, description="Sampling temperature")
147
+ max_tokens: Optional[int] = Field(default=None, description="Maximum tokens to generate")
148
+ top_p: Optional[float] = Field(default=None, description="Top-p sampling parameter")
149
+ top_k: Optional[int] = Field(default=None, description="Top-k sampling parameter")
150
+ logger: Optional[logging.Logger] = Field(default=None, description="Optional logger instance")
151
+
152
+ # Internal client
153
+ _client: Optional[genai.Client] = None
154
+
155
+ def __init__(self, **kwargs):
156
+ super().__init__(**kwargs)
157
+
158
+ # Initialize logger
159
+ if self.logger is None:
160
+ self.logger = logging.getLogger(f"{self.__class__.__module__}.{self.__class__.__name__}")
161
+ if not self.logger.handlers: # and not getattr(self.logger, 'propagate', True):
162
+ self.logger.addHandler(logging.StreamHandler())
163
+ self.logger.setLevel(logging.INFO)
164
+
165
+ # Get API key from environment if not provided
166
+ if self.google_api_key is None:
167
+ api_key = os.getenv("GOOGLE_API_KEY")
168
+ if api_key:
169
+ self.google_api_key = convert_to_secret_str(api_key)
170
+
171
+ # Initialize the Google GenAI client
172
+ if self.google_api_key:
173
+ self._client = genai.Client(
174
+ api_key=self.google_api_key.get_secret_value()
175
+ )
176
+ self.logger.info(f"Initialized GeminiChatModel with model: {self.model_name}")
177
+ else:
178
+ error_msg = "Google API key is required. Set GOOGLE_API_KEY environment variable or pass google_api_key parameter."
179
+ self.logger.error(error_msg)
180
+ raise ValueError(error_msg)
181
+
182
+ @property
183
+ def _llm_type(self) -> str:
184
+ """Return identifier for the model type."""
185
+ return "custom_google_genai"
186
+
187
+ @property
188
+ def _identifying_params(self) -> Dict[str, Any]:
189
+ """Return a dictionary of identifying parameters for tracing."""
190
+ return {
191
+ "model_name": self.model_name,
192
+ "temperature": self.temperature,
193
+ "max_tokens": self.max_tokens,
194
+ "top_p": self.top_p,
195
+ "top_k": self.top_k,
196
+ }
197
+
198
+ def _convert_messages(self, messages: List[BaseMessage]) -> Union[types.ContentListUnion, types.ContentListUnionDict]:
199
+ """
200
+ Converts LangChain messages to a format suitable for the GenAI API.
201
+ - For single, multi-part HumanMessage, returns a direct list of parts (e.g., [File, "text"]).
202
+ - For multi-turn chats, returns a list of Content objects.
203
+ - For simple text, returns a string.
204
+ """
205
+ self.logger.debug(f"Converting {len(messages)} messages.")
206
+
207
+ # Filter out system messages (handled in generation_config)
208
+ chat_messages = [msg for msg in messages if not isinstance(msg, SystemMessage)]
209
+
210
+ # Case 1: A single HumanMessage. This is the most common path for single prompts.
211
+ if len(chat_messages) == 1 and isinstance(chat_messages[0], HumanMessage):
212
+ content = chat_messages[0].content
213
+ # For a simple string, return it directly.
214
+ if isinstance(content, str):
215
+ return content
216
+ # For a list of parts, parse them into a direct list for the API.
217
+ return list(self._parse_message_content(content, is_simple=True))
218
+
219
+
220
+ # Case 2: Multi-turn chat history. This requires a list of Content objects.
221
+ self.logger.debug("Handling as a multi-turn chat conversation.")
222
+ genai_contents: List[types.Content] = []
223
+ for msg in chat_messages:
224
+ role = "model" if isinstance(msg, AIMessage) else "user"
225
+ parts = []
226
+
227
+ # Process each part and ensure proper typing
228
+ for part in self._parse_message_content(msg.content, is_simple=False):
229
+ if isinstance(part, types.File):
230
+ # put File directly into types.Content
231
+ parts.append(part)
232
+ elif isinstance(part, types.Part):
233
+ parts.append(part)
234
+ else:
235
+ self.logger.warning(f"Unexpected part type: {type(part)}")
236
+
237
+ if parts:
238
+ genai_contents.append(types.Content(parts=parts, role=role))
239
+
240
+ # If there's only one Content object, return it directly instead of a list
241
+ if len(genai_contents) == 1:
242
+ return genai_contents[0]
243
+
244
+ return genai_contents
245
+
246
+ def _create_image_part(self, image_info: Dict[str, Any]) -> Union[types.Part, types.File]:
247
+ """Creates a GenAI Part or File from various image source formats."""
248
+ self.logger.debug(f"Creating image part from info: {list(image_info.keys())}")
249
+
250
+ if "path" in image_info:
251
+ return self._client.files.upload(file=image_info["path"])
252
+
253
+ if "data" in image_info:
254
+ data = image_info["data"]
255
+ if image_info.get("source_type") == "base64":
256
+ data = base64.b64decode(data)
257
+ return types.Part.from_bytes(data=data, mime_type=image_info["mime_type"])
258
+
259
+ url = image_info.get("image_url", image_info.get("url"))
260
+ if isinstance(url, dict):
261
+ url = url.get("url")
262
+
263
+ if not url:
264
+ raise ValueError(f"Invalid image info, requires 'path', 'data', or 'url'. Received: {image_info}")
265
+
266
+ if url.startswith("data:"):
267
+ header, encoded = url.split(",", 1)
268
+ mime_type = header.split(":", 1)[-1].split(";", 1)[0]
269
+ image_data = base64.b64decode(encoded)
270
+ return types.Part.from_bytes(data=image_data, mime_type=mime_type)
271
+ else:
272
+ response = requests.get(url)
273
+ response.raise_for_status()
274
+ mime_type = response.headers.get("Content-Type", "image/jpeg")
275
+ return types.Part.from_bytes(data=response.content, mime_type=mime_type)
276
+
277
+ def _create_video_part(self, video_info: Dict[str, Any]) -> Union[types.Part, types.File]:
278
+ """Creates a Google GenAI Part or File from video information.
279
+
280
+ Supports multiple video input formats:
281
+ - File object: {"type": "video_file", "file": file_object}
282
+ - File path: {"type": "video_file", "path": "/path/to/video.mp4"}
283
+ - Raw bytes: {"type": "video_file", "data": video_bytes, "mime_type": "video/mp4"}
284
+ - URL/URI: {"type": "video_file", "url": "https://example.com/video.mp4"}
285
+ - YouTube URL: {"type": "video_file", "url": "https://www.youtube.com/watch?v=..."}
286
+ - URL with offset: {"type": "video_file", "url": "...", "start_offset": "12s", "end_offset": "50s"}
287
+
288
+ Args:
289
+ video_info: Dictionary containing video information
290
+
291
+ Returns:
292
+ Either a types.Part or File object for Google GenAI
293
+
294
+ Raises:
295
+ FileNotFoundError: If video file path doesn't exist
296
+ ValueError: If video_info is invalid or missing required fields
297
+ """
298
+ self.logger.debug(f"Creating video part from info: {list(video_info.keys())}")
299
+
300
+ # Handle pre-uploaded file object
301
+ if "file" in video_info:
302
+ if isinstance(video_info["file"], types.File):
303
+ return video_info["file"]
304
+ else:
305
+ raise ValueError(f"The 'file' key must contain a google.genai.File object, but got {type(video_info['file'])}")
306
+
307
+ if "path" in video_info:
308
+ self.logger.debug(f"Uploading video file from path: {video_info['path']}")
309
+
310
+ uploaded_file =self._client.files.upload(file=video_info["path"])
311
+
312
+ self.logger.debug(f"Uploaded video file: {uploaded_file}")
313
+
314
+ return uploaded_file
315
+
316
+ mime_type = video_info.get("mime_type")
317
+
318
+ if "data" in video_info:
319
+ data = video_info["data"]
320
+ if not mime_type:
321
+ raise ValueError("'mime_type' is required when providing video data.")
322
+ max_size = 20 * 1024 * 1024 # 20MB
323
+ if len(data) > max_size:
324
+ raise ValueError(f"Video data size ({len(data)} bytes) exceeds 20MB limit for inline data.")
325
+ return types.Part(inline_data=types.Blob(data=data, mime_type=mime_type))
326
+
327
+ url = video_info.get("url")
328
+ if not url:
329
+ raise ValueError(f"Invalid video info, requires 'path', 'data', 'url', or 'file'. Received: {video_info}")
330
+
331
+ mime_type = video_info.get("mime_type", "video/mp4")
332
+
333
+ # Handle video offsets
334
+ start_offset = video_info.get("start_offset")
335
+ end_offset = video_info.get("end_offset")
336
+
337
+ self.logger.debug(f"Video offsets: {start_offset} to {end_offset}.")
338
+
339
+ if start_offset or end_offset:
340
+ video_metadata = types.VideoMetadata(start_offset=start_offset, end_offset=end_offset)
341
+ return types.Part(
342
+ file_data=types.FileData(file_uri=url, mime_type=mime_type),
343
+ video_metadata=video_metadata
344
+ )
345
+
346
+ return types.Part(file_data=types.FileData(file_uri=url, mime_type=mime_type))
347
+
348
+ def _parse_message_content(
349
+ self, content: Union[str, List[Union[str, Dict]]], *, is_simple: bool = True
350
+ ) -> Iterator[Union[str, types.Part, types.File]]:
351
+ """
352
+ Parses LangChain message content and yields parts for Google GenAI.
353
+
354
+ Args:
355
+ content: The message content to parse.
356
+ is_simple: If True, yields raw objects where possible (e.g., str, File)
357
+ for single-turn efficiency. If False, ensures all yielded
358
+ parts are `types.Part` by converting raw strings and
359
+ Files as needed, which is required for multi-turn chat.
360
+
361
+ Supports both standard LangChain formats and enhanced video formats:
362
+ - Text: "string" or {"type": "text", "text": "content"}
363
+ - Image: {"type": "image_url", "image_url": "url"} or {"type": "image_url", "image_url": {"url": "url"}}
364
+ - Video: {"type": "video_file", ...} or {"type": "video", ...}
365
+ """
366
+ if isinstance(content, str):
367
+ yield content if is_simple else types.Part(text=content)
368
+ return
369
+
370
+ if not isinstance(content, list):
371
+ self.logger.warning(f"Unsupported content format: {type(content)}")
372
+ return
373
+
374
+ for i, part_spec in enumerate(content):
375
+ try:
376
+ if isinstance(part_spec, str):
377
+ yield part_spec if is_simple else types.Part(text=part_spec)
378
+ continue
379
+
380
+ if isinstance(part_spec, types.File):
381
+ if is_simple:
382
+ yield part_spec
383
+ else:
384
+ yield types.Part(file_data=types.FileData(
385
+ mime_type=part_spec.mime_type,
386
+ file_uri=part_spec.uri
387
+ ))
388
+ continue
389
+
390
+ if not isinstance(part_spec, dict):
391
+ self.logger.warning(f"Skipping non-dict part in content list: {type(part_spec)}")
392
+ continue
393
+
394
+ part_type = part_spec.get("type", "").lower()
395
+
396
+ if part_type == "text":
397
+ if text_content := part_spec.get("text"):
398
+ yield text_content if is_simple else types.Part(text=text_content)
399
+ elif part_type in ("image", "image_url"):
400
+ yield self._create_image_part(part_spec)
401
+ elif part_type in ("video", "video_file"):
402
+ yield self._create_video_part(part_spec)
403
+ else:
404
+ self.logger.debug(f"Part with unknown type '{part_type}' was ignored at index {i}.")
405
+ except Exception as e:
406
+ self.logger.error(f"Failed to process message part at index {i}: {part_spec}. Error: {e}", exc_info=True)
407
+
408
+ def _prepare_generation_config(
409
+ self, messages: List[BaseMessage], stop: Optional[List[str]] = None
410
+ ) -> Dict[str, Any]:
411
+ """Prepares the generation configuration, including system instructions."""
412
+ # Base config from model parameters
413
+ config = {
414
+ "temperature": self.temperature,
415
+ "max_output_tokens": self.max_tokens,
416
+ "top_p": self.top_p,
417
+ "top_k": self.top_k,
418
+ }
419
+ if stop:
420
+ config["stop_sequences"] = stop
421
+
422
+ # Handle system instructions
423
+ system_prompts = [msg.content for msg in messages if isinstance(msg, SystemMessage) and msg.content]
424
+ if system_prompts:
425
+ system_prompt_str = "\n\n".join(system_prompts)
426
+ config["system_instruction"] = system_prompt_str
427
+
428
+ # Filter out None values before returning
429
+ return {k: v for k, v in config.items() if v is not None}
430
+
431
+ def _trim_for_logging(self, contents: Any) -> Any:
432
+ """Helper to trim large binary data from logging payloads."""
433
+ if isinstance(contents, str):
434
+ return contents
435
+
436
+ if isinstance(contents, types.Content):
437
+ return {
438
+ "role": contents.role,
439
+ "parts": [self._trim_part(part) for part in contents.parts]
440
+ }
441
+
442
+ if isinstance(contents, list):
443
+ return [self._trim_for_logging(item) for item in contents]
444
+
445
+ return contents
446
+
447
+ def _trim_part(self, part: types.Part) -> dict:
448
+ """Trims individual part data for safe logging."""
449
+ part_dict = {}
450
+ if part.text:
451
+ part_dict["text"] = part.text
452
+ if part.inline_data:
453
+ part_dict["inline_data"] = {
454
+ "mime_type": part.inline_data.mime_type,
455
+ "data_size": f"{len(part.inline_data.data)} bytes"
456
+ }
457
+ if part.file_data:
458
+ part_dict["file_data"] = {
459
+ "mime_type": part.file_data.mime_type,
460
+ "file_uri": part.file_data.file_uri
461
+ }
462
+ return part_dict
463
+
464
+ def _generate(
465
+ self,
466
+ messages: List[BaseMessage],
467
+ stop: Optional[List[str]] = None,
468
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
469
+ **kwargs: Any,
470
+ ) -> ChatResult:
471
+ """Generates a chat response from a list of messages."""
472
+ self.logger.info(f"Generating response for {len(messages)} messages.")
473
+
474
+ contents = self._convert_messages(messages)
475
+ config = self._prepare_generation_config(messages, stop)
476
+
477
+ try:
478
+ response = self._client.models.generate_content(
479
+ model=self.model_name,
480
+ contents=contents,
481
+ config=config,
482
+ **kwargs,
483
+ )
484
+
485
+ generated_text = response.text
486
+ finish_reason = response.candidates[0].finish_reason.name if response.candidates else None
487
+
488
+ message = AIMessage(
489
+ content=generated_text,
490
+ response_metadata={"model_name": self.model_name, "finish_reason": finish_reason},
491
+ )
492
+ return ChatResult(generations=[ChatGeneration(message=message)])
493
+
494
+ except Exception as e:
495
+ self.logger.error(f"Error generating content with Google GenAI: {e}", exc_info=True)
496
+ raise ValueError(f"Error during generation: {e}")
497
+
498
+ async def _agenerate(
499
+ self,
500
+ messages: List[BaseMessage],
501
+ stop: Optional[List[str]] = None,
502
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
503
+ **kwargs: Any,
504
+ ) -> ChatResult:
505
+ """Asynchronously generates a chat response."""
506
+ self.logger.info(f"Async generating response for {len(messages)} messages.")
507
+
508
+ contents = self._convert_messages(messages)
509
+ config = self._prepare_generation_config(messages, stop)
510
+
511
+ try:
512
+ response = await self._client.generate_content(
513
+ model=self.model_name,
514
+ contents=contents,
515
+ config=config,
516
+ **kwargs,
517
+ )
518
+
519
+ generated_text = response.text
520
+ finish_reason = response.candidates[0].finish_reason.name if response.candidates else None
521
+
522
+ message = AIMessage(
523
+ content=generated_text,
524
+ response_metadata={"model_name": self.model_name, "finish_reason": finish_reason},
525
+ )
526
+ return ChatResult(generations=[ChatGeneration(message=message)])
527
+
528
+ except Exception as e:
529
+ self.logger.error(f"Error during async generation: {e}", exc_info=True)
530
+ raise ValueError(f"Error during async generation: {e}")
531
+
532
+ def _stream(
533
+ self,
534
+ messages: List[BaseMessage],
535
+ stop: Optional[List[str]] = None,
536
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
537
+ **kwargs: Any,
538
+ ) -> Iterator[ChatGenerationChunk]:
539
+ """Streams the chat response."""
540
+ self.logger.info(f"Streaming response for {len(messages)} messages.")
541
+
542
+ contents = self._convert_messages(messages)
543
+ config = self._prepare_generation_config(messages, stop)
544
+
545
+ try:
546
+ stream = self._client.models.generate_content_stream(
547
+ model=self.model_name,
548
+ contents=contents,
549
+ config=config,
550
+ **kwargs,
551
+ )
552
+ for chunk_response in stream:
553
+ if text_content := chunk_response.text:
554
+ chunk = ChatGenerationChunk(message=AIMessageChunk(content=text_content))
555
+ if run_manager:
556
+ run_manager.on_llm_new_token(text_content, chunk=chunk)
557
+ yield chunk
558
+ except Exception as e:
559
+ self.logger.error(f"Error streaming content: {e}", exc_info=True)
560
+ raise ValueError(f"Error during streaming: {e}")
561
+
562
+ async def _astream(
563
+ self,
564
+ messages: List[BaseMessage],
565
+ stop: Optional[List[str]] = None,
566
+ run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
567
+ **kwargs: Any,
568
+ ) -> AsyncIterator[ChatGenerationChunk]:
569
+ """Asynchronously streams the chat response."""
570
+ self.logger.info(f"Async streaming response for {len(messages)} messages.")
571
+
572
+ contents = self._convert_messages(messages)
573
+ config = self._prepare_generation_config(messages, stop)
574
+
575
+ try:
576
+ stream = await self._client.generate_content_async(
577
+ model=self.model_name,
578
+ contents=contents,
579
+ config=config,
580
+ **kwargs,
581
+ )
582
+ async for chunk_response in stream:
583
+ if text_content := chunk_response.text:
584
+ chunk = ChatGenerationChunk(message=AIMessageChunk(content=text_content))
585
+ if run_manager:
586
+ await run_manager.on_llm_new_token(text_content, chunk=chunk)
587
+ yield chunk
588
+ except Exception as e:
589
+ self.logger.error(f"Error during async streaming: {e}", exc_info=True)
590
+ raise ValueError(f"Error during async streaming: {e}")
@@ -0,0 +1,184 @@
1
+ import json
2
+ import random
3
+ import logging
4
+ from typing import Dict, List, Optional, Union
5
+ from collections import defaultdict
6
+ from langchain_openai import AzureChatOpenAI, ChatOpenAI, AzureOpenAIEmbeddings
7
+ from .gemini_chat_model import GeminiChatModel
8
+
9
+
10
+ class ModelLoadBalancer:
11
+ def __init__(self,
12
+ config_path: Optional[str] = "config/models_config.json",
13
+ config_data: Optional[Dict] = None,
14
+ logger: Optional[logging.Logger] = None):
15
+ """
16
+ Initializes the ModelLoadBalancer.
17
+
18
+ Args:
19
+ config_path: Path to the JSON configuration file.
20
+ config_data: A dictionary containing the model configuration.
21
+ logger: An optional logger instance. If not provided, a default one is created.
22
+
23
+ Raises:
24
+ ValueError: If neither config_path nor config_data is provided.
25
+ """
26
+ if not config_path and not config_data:
27
+ raise ValueError("Either 'config_path' or 'config_data' must be provided.")
28
+
29
+ self.config_path = config_path
30
+ self.config_data = config_data
31
+ self.logger = logger or logging.getLogger(__name__)
32
+ self.models_config: List[Dict] = []
33
+ self.models: Dict[int, Union[AzureChatOpenAI, ChatOpenAI, AzureOpenAIEmbeddings, GeminiChatModel]] = {}
34
+ self._initialize_state()
35
+ self._config_loaded = False # Flag to check if config is loaded
36
+
37
+ def load_config(self):
38
+ """Load and validate model configurations from a file path or a dictionary."""
39
+ self.logger.debug("Model balancer: loading configuration.")
40
+ try:
41
+ config = None
42
+ if self.config_data:
43
+ config = self.config_data
44
+ elif self.config_path:
45
+ with open(self.config_path, 'r') as f:
46
+ config = json.load(f)
47
+ else:
48
+ # This case is handled in __init__, but as a safeguard:
49
+ raise RuntimeError("No configuration source provided (path or data).")
50
+
51
+ # Validate config
52
+ if 'models' not in config or not isinstance(config['models'], list):
53
+ raise ValueError("Configuration must contain a 'models' list.")
54
+
55
+ for model in config.get('models', []):
56
+ if 'provider' not in model or 'type' not in model or 'id' not in model:
57
+ self.logger.error("Model config must contain 'id', 'provider', and 'type' fields.")
58
+ raise ValueError("Model config must contain 'id', 'provider', and 'type' fields.")
59
+
60
+ self.models_config = config['models']
61
+
62
+ # Instantiate models
63
+ for model_config in self.models_config:
64
+ model_id = model_config['id']
65
+ self.models[model_id] = self._instantiate_model(model_config)
66
+
67
+ self._config_loaded = True
68
+ self.logger.debug("Model balancer: configuration loaded successfully.")
69
+ except (FileNotFoundError, json.JSONDecodeError, ValueError) as e:
70
+ self._config_loaded = False
71
+ self.logger.error(f"Failed to load model configuration: {e}", exc_info=True)
72
+ raise RuntimeError(f"Failed to load model configuration: {e}")
73
+
74
+ def get_model(self, provider: str = None, model_type: str = None, deployment_name: str = None):
75
+ """
76
+ Get a model instance.
77
+
78
+ Can fetch a model in two ways:
79
+ 1. By its specific `deployment_name`.
80
+ 2. By `provider` and `model_type`, which will select a model using round-robin.
81
+
82
+ Args:
83
+ provider: The model provider (e.g., 'azure-openai', 'google-genai').
84
+ model_type: The type of model (e.g., 'inference', 'embedding').
85
+ deployment_name: The unique name for the model deployment.
86
+
87
+ Returns:
88
+ An instantiated language model object.
89
+
90
+ Raises:
91
+ RuntimeError: If the model configuration has not been loaded.
92
+ ValueError: If the requested model cannot be found or if parameters are insufficient.
93
+ """
94
+ if not self._config_loaded:
95
+ self.logger.error("Model configuration not loaded")
96
+ raise RuntimeError("Model configuration not loaded")
97
+
98
+ if deployment_name:
99
+ for model_config in self.models_config:
100
+ if model_config.get('deployment_name') == deployment_name:
101
+ model_id = model_config['id']
102
+ return self.models[model_id]
103
+ self.logger.error(f"No model found for deployment name: {deployment_name}")
104
+ raise ValueError(f"No model found for deployment name: {deployment_name}")
105
+
106
+ if provider and model_type:
107
+ candidates = [model for model in self.models_config if model.get('provider') == provider and model.get('type') == model_type]
108
+ if not candidates:
109
+ self.logger.error(f"No models found for provider '{provider}' and type '{model_type}'")
110
+ raise ValueError(f"No models found for provider '{provider}' and type '{model_type}'")
111
+
112
+ selected_model_config = self._round_robin_selection(candidates)
113
+ model_id = selected_model_config['id']
114
+ return self.models[model_id]
115
+
116
+ raise ValueError("Either 'deployment_name' or both 'provider' and 'model_type' must be provided.")
117
+
118
+ def _instantiate_model(self, model_config: Dict):
119
+ """Instantiate and return an LLM object based on the model configuration"""
120
+ provider = model_config['provider']
121
+ self.logger.debug(f"Model balancer: instantiating {provider} -- {model_config.get('deployment_name')}")
122
+
123
+ if provider == 'azure-openai':
124
+ kwargs = {
125
+ 'azure_deployment': model_config['deployment_name'],
126
+ 'openai_api_version': model_config['api_version'],
127
+ 'azure_endpoint': model_config['api_base'],
128
+ 'openai_api_key': model_config['api_key']
129
+ }
130
+ if 'temperature' in model_config:
131
+ kwargs['temperature'] = model_config['temperature']
132
+ if model_config.get('deployment_name') == 'o1-mini':
133
+ kwargs['disable_streaming'] = True
134
+ return AzureChatOpenAI(**kwargs)
135
+ elif provider == 'openai':
136
+ kwargs = {
137
+ 'openai_api_key': model_config['api_key']
138
+ }
139
+ if 'temperature' in model_config:
140
+ kwargs['temperature'] = model_config['temperature']
141
+ return ChatOpenAI(**kwargs)
142
+ elif provider == 'azure-openai-embeddings':
143
+ return AzureOpenAIEmbeddings(
144
+ azure_deployment=model_config['deployment_name'],
145
+ openai_api_version=model_config['api_version'],
146
+ api_key=model_config['api_key'],
147
+ azure_endpoint=model_config['api_base'],
148
+ chunk_size=16, request_timeout=60, max_retries=2
149
+ )
150
+ elif provider == 'google-genai':
151
+ kwargs = {
152
+ 'google_api_key': model_config['api_key'],
153
+ 'model_name': model_config['deployment_name'] # Map deployment_name to model_name
154
+ }
155
+ if 'temperature' in model_config:
156
+ kwargs['temperature'] = model_config['temperature']
157
+ if 'max_tokens' in model_config:
158
+ kwargs['max_tokens'] = model_config['max_tokens']
159
+ return GeminiChatModel(**kwargs)
160
+ else:
161
+ self.logger.error(f"Unsupported provider: {provider}")
162
+ raise ValueError(f"Unsupported provider: {provider}")
163
+
164
+ def _initialize_state(self):
165
+ self.active_models = []
166
+ self.usage_counter = defaultdict(int)
167
+ self.current_indices = {}
168
+
169
+ def _round_robin_selection(self, candidates: list) -> Dict:
170
+ if id(candidates) not in self.current_indices:
171
+ self.current_indices[id(candidates)] = 0
172
+ idx = self.current_indices[id(candidates)]
173
+ model = candidates[idx]
174
+ self.current_indices[id(candidates)] = (idx + 1) % len(candidates)
175
+ self.usage_counter[model['id']] += 1
176
+
177
+ return model
178
+
179
+ def _least_used_selection(self, candidates: list) -> Dict:
180
+ min_usage = min(self.usage_counter[m['model_id']] for m in candidates)
181
+ least_used = [m for m in candidates if self.usage_counter[m['model_id']] == min_usage]
182
+ model = random.choice(least_used)
183
+ self.usage_counter[model['id']] += 1
184
+ return model
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "crewplus"
9
- version = "0.1.1"
9
+ version = "0.1.3"
10
10
  description = "Base services for CrewPlus AI applications"
11
11
  authors = [
12
12
  { name = "Tim Liu", email = "tim@opsmateai.com" },
@@ -17,6 +17,9 @@ dependencies = [
17
17
  "langchain==0.3.25",
18
18
  "langchain-openai==0.3.24",
19
19
  "google-genai==1.21.1",
20
+ "mkdocs (>=1.6.1,<2.0.0)",
21
+ "mkdocs-material (>=9.6.14,<10.0.0)",
22
+ "mkdocstrings-python (>=1.16.12,<2.0.0)",
20
23
  ]
21
24
 
22
25
  [project.license]
@@ -0,0 +1,77 @@
1
+ {
2
+ "models": [
3
+ {
4
+ "id": 1,
5
+ "provider": "azure-openai",
6
+ "type": "inference",
7
+ "deployment_name": "gpt-o3mini-eastus2-RPM25",
8
+ "api_version": "2024-12-01-preview",
9
+ "api_base": "https://crewplus-eastus2.openai.azure.com",
10
+ "api_key": "c67cb5d0d8ae4aef81d7f42aeae274b6"
11
+ },
12
+ {
13
+ "id": 2,
14
+ "provider": "azure-openai",
15
+ "type": "ingestion",
16
+ "deployment_name": "gpt-4o",
17
+ "api_version": "2025-01-01-preview",
18
+ "api_base": "https://crewplus-eastus2.openai.azure.com",
19
+ "api_key": "c67cb5d0d8ae4aef81d7f42aeae274b6",
20
+ "temperature": 0.0
21
+ },
22
+ {
23
+ "id": 3,
24
+ "provider": "azure-openai",
25
+ "type": "inference",
26
+ "deployment_name": "gpt-4.1",
27
+ "api_version": "2025-01-01-preview",
28
+ "api_base": "https://crewplus-eastus2.openai.azure.com",
29
+ "api_key": "c67cb5d0d8ae4aef81d7f42aeae274b6",
30
+ "temperature": 0.0
31
+ },
32
+ {
33
+ "id": 4,
34
+ "provider": "azure-openai",
35
+ "type": "ingestion",
36
+ "deployment_name": "cpai-gpt4o-westus",
37
+ "api_version": "2025-01-01-preview",
38
+ "api_base": "https://crewplus-westus.openai.azure.com",
39
+ "api_key": "b93bc4d2ef8e4298bd8390002922d084",
40
+ "temperature": 0.0
41
+ },
42
+ {
43
+ "id": 5,
44
+ "provider": "azure-openai-embeddings",
45
+ "type": "embedding",
46
+ "deployment_name": "cpai-text-embedding-ada-002-westus",
47
+ "api_version": "2024-02-01",
48
+ "api_base": "https://crewplus-westus.openai.azure.com",
49
+ "api_key": "b93bc4d2ef8e4298bd8390002922d084"
50
+ },
51
+ {
52
+ "id": 6,
53
+ "provider": "azure-openai-embeddings",
54
+ "type": "embedding",
55
+ "deployment_name": "cpai-text-embedding-3-large-eastus2",
56
+ "api_version": "1",
57
+ "api_base": "https://crewplus-eastus2.openai.azure.com",
58
+ "api_key": "c67cb5d0d8ae4aef81d7f42aeae274b6"
59
+ },
60
+ {
61
+ "id": 7,
62
+ "provider": "google-genai",
63
+ "type": "inference",
64
+ "deployment_name": "gemini-2.5-flash",
65
+ "api_key": "AIzaSyDkZbcGcV7SB6OyN4XkK_sF2mzO2E-nKQk",
66
+ "temperature": 0.0
67
+ },
68
+ {
69
+ "id": 8,
70
+ "provider": "google-genai",
71
+ "type": "ingestion",
72
+ "deployment_name": "gemini-2.5-pro",
73
+ "api_key": "AIzaSyDkZbcGcV7SB6OyN4XkK_sF2mzO2E-nKQk",
74
+ "temperature": 0.0
75
+ }
76
+ ]
77
+ }
@@ -1,365 +0,0 @@
1
- import os
2
- import asyncio
3
- from typing import Any, Dict, Iterator, List, Optional, AsyncIterator
4
- from google import genai
5
- from langchain_core.language_models import BaseChatModel
6
- from langchain_core.messages import (
7
- AIMessage,
8
- AIMessageChunk,
9
- BaseMessage,
10
- HumanMessage,
11
- SystemMessage,
12
- )
13
- from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
14
- from langchain_core.callbacks import (
15
- CallbackManagerForLLMRun,
16
- AsyncCallbackManagerForLLMRun
17
- )
18
- from pydantic import Field, SecretStr
19
- from langchain_core.utils import convert_to_secret_str
20
-
21
- class GeminiChatModel(BaseChatModel):
22
- """Custom chat model using Google's genai client package directly with real streaming support.
23
-
24
- This implementation provides direct access to Google's genai features
25
- while being compatible with LangChain's BaseChatModel interface.
26
-
27
- Example:
28
- ```python
29
- model = GeminiChatModel(
30
- model_name="gemini-2.0-flash",
31
- google_api_key="your-api-key",
32
- temperature=0.7
33
- )
34
-
35
- # Basic usage
36
- response = model.invoke("Hello, how are you?")
37
- print(response.content)
38
-
39
- # Streaming usage
40
- for chunk in model.stream("Tell me a story"):
41
- print(chunk.content, end="")
42
-
43
- # Async usage
44
- async def test_async():
45
- response = await model.ainvoke("Hello!")
46
- print(response.content)
47
-
48
- async for chunk in model.astream("Tell me a story"):
49
- print(chunk.content, end="")
50
- ```
51
- """
52
-
53
- # Model configuration
54
- model_name: str = Field(default="gemini-2.0-flash", description="The Google model name to use")
55
- google_api_key: Optional[SecretStr] = Field(default=None, description="Google API key")
56
- temperature: Optional[float] = Field(default=0.7, description="Sampling temperature")
57
- max_tokens: Optional[int] = Field(default=None, description="Maximum tokens to generate")
58
- top_p: Optional[float] = Field(default=None, description="Top-p sampling parameter")
59
- top_k: Optional[int] = Field(default=None, description="Top-k sampling parameter")
60
-
61
- # Internal client
62
- _client: Optional[genai.Client] = None
63
-
64
- def __init__(self, **kwargs):
65
- super().__init__(**kwargs)
66
-
67
- # Get API key from environment if not provided
68
- if self.google_api_key is None:
69
- api_key = os.getenv("GOOGLE_API_KEY")
70
- if api_key:
71
- self.google_api_key = convert_to_secret_str(api_key)
72
-
73
- # Initialize the Google GenAI client
74
- if self.google_api_key:
75
- self._client = genai.Client(
76
- api_key=self.google_api_key.get_secret_value()
77
- )
78
- else:
79
- raise ValueError("Google API key is required. Set GOOGLE_API_KEY environment variable or pass google_api_key parameter.")
80
-
81
- @property
82
- def _llm_type(self) -> str:
83
- """Return identifier for the model type."""
84
- return "custom_google_genai"
85
-
86
- @property
87
- def _identifying_params(self) -> Dict[str, Any]:
88
- """Return a dictionary of identifying parameters for tracing."""
89
- return {
90
- "model_name": self.model_name,
91
- "temperature": self.temperature,
92
- "max_tokens": self.max_tokens,
93
- "top_p": self.top_p,
94
- "top_k": self.top_k,
95
- }
96
-
97
- def _convert_messages_to_genai_format(self, messages: List[BaseMessage]) -> str:
98
- """Convert LangChain messages to Google GenAI format.
99
-
100
- Google GenAI API doesn't support system messages, so we'll convert
101
- the conversation to a single prompt string with proper formatting.
102
- """
103
- prompt_parts = []
104
-
105
- for message in messages:
106
- if isinstance(message, SystemMessage):
107
- # Convert system message to instruction format
108
- prompt_parts.append(f"Instructions: {message.content}")
109
- elif isinstance(message, HumanMessage):
110
- prompt_parts.append(f"Human: {message.content}")
111
- elif isinstance(message, AIMessage):
112
- prompt_parts.append(f"Assistant: {message.content}")
113
- else:
114
- # Default to human format for unknown message types
115
- prompt_parts.append(f"Human: {str(message.content)}")
116
-
117
- # Add a final prompt for the assistant to respond
118
- if not prompt_parts or not prompt_parts[-1].startswith("Human:"):
119
- prompt_parts.append("Human: Please respond to the above.")
120
-
121
- prompt_parts.append("Assistant:")
122
-
123
- return "\n\n".join(prompt_parts)
124
-
125
- def _prepare_generation_config(self, stop: Optional[List[str]] = None) -> Dict[str, Any]:
126
- """Prepare generation configuration for Google GenAI."""
127
- generation_config = {}
128
- if self.temperature is not None:
129
- generation_config["temperature"] = self.temperature
130
- if self.max_tokens is not None:
131
- generation_config["max_output_tokens"] = self.max_tokens
132
- if self.top_p is not None:
133
- generation_config["top_p"] = self.top_p
134
- if self.top_k is not None:
135
- generation_config["top_k"] = self.top_k
136
- if stop:
137
- generation_config["stop_sequences"] = stop
138
- return generation_config
139
-
140
- def _generate(
141
- self,
142
- messages: List[BaseMessage],
143
- stop: Optional[List[str]] = None,
144
- run_manager: Optional[CallbackManagerForLLMRun] = None,
145
- **kwargs: Any,
146
- ) -> ChatResult:
147
- """Generate a response using Google's genai client."""
148
-
149
- # Convert messages to a single prompt string
150
- prompt = self._convert_messages_to_genai_format(messages)
151
-
152
- # Prepare generation config
153
- generation_config = self._prepare_generation_config(stop)
154
-
155
- try:
156
- # Generate response using Google GenAI
157
- response = self._client.models.generate_content(
158
- model=self.model_name,
159
- contents=prompt,
160
- config=generation_config if generation_config else None
161
- )
162
-
163
- # Extract the generated text
164
- generated_text = response.text if hasattr(response, 'text') else str(response)
165
-
166
- # Create AI message with response metadata
167
- message = AIMessage(
168
- content=generated_text,
169
- response_metadata={
170
- "model_name": self.model_name,
171
- "finish_reason": getattr(response, 'finish_reason', None),
172
- }
173
- )
174
-
175
- # Create and return ChatResult
176
- generation = ChatGeneration(message=message)
177
- return ChatResult(generations=[generation])
178
-
179
- except Exception as e:
180
- raise ValueError(f"Error generating content with Google GenAI: {str(e)}")
181
-
182
- async def _agenerate(
183
- self,
184
- messages: List[BaseMessage],
185
- stop: Optional[List[str]] = None,
186
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
187
- **kwargs: Any,
188
- ) -> ChatResult:
189
- """Async generate a response using Google's genai client."""
190
-
191
- # Convert messages to a single prompt string
192
- prompt = self._convert_messages_to_genai_format(messages)
193
-
194
- # Prepare generation config
195
- generation_config = self._prepare_generation_config(stop)
196
-
197
- try:
198
- # Generate response using Google GenAI (run in executor for async)
199
- loop = asyncio.get_event_loop()
200
- response = await loop.run_in_executor(
201
- None,
202
- lambda: self._client.models.generate_content(
203
- model=self.model_name,
204
- contents=prompt,
205
- config=generation_config if generation_config else None
206
- )
207
- )
208
-
209
- # Extract the generated text
210
- generated_text = response.text if hasattr(response, 'text') else str(response)
211
-
212
- # Create AI message with response metadata
213
- message = AIMessage(
214
- content=generated_text,
215
- response_metadata={
216
- "model_name": self.model_name,
217
- "finish_reason": getattr(response, 'finish_reason', None),
218
- }
219
- )
220
-
221
- # Create and return ChatResult
222
- generation = ChatGeneration(message=message)
223
- return ChatResult(generations=[generation])
224
-
225
- except Exception as e:
226
- raise ValueError(f"Error generating content with Google GenAI: {str(e)}")
227
-
228
- def _stream(
229
- self,
230
- messages: List[BaseMessage],
231
- stop: Optional[List[str]] = None,
232
- run_manager: Optional[CallbackManagerForLLMRun] = None,
233
- **kwargs: Any,
234
- ) -> Iterator[ChatGenerationChunk]:
235
- """Stream the output using Google's genai client with real streaming."""
236
-
237
- # Convert messages to a single prompt string
238
- prompt = self._convert_messages_to_genai_format(messages)
239
-
240
- # Prepare generation config
241
- generation_config = self._prepare_generation_config(stop)
242
-
243
- try:
244
- # Use Google GenAI streaming
245
- stream = self._client.models.generate_content_stream(
246
- model=self.model_name,
247
- contents=prompt,
248
- config=generation_config if generation_config else None
249
- )
250
-
251
- for chunk_response in stream:
252
- if hasattr(chunk_response, 'text') and chunk_response.text:
253
- content = chunk_response.text
254
-
255
- chunk = ChatGenerationChunk(
256
- message=AIMessageChunk(
257
- content=content,
258
- response_metadata={
259
- "model_name": self.model_name,
260
- "finish_reason": getattr(chunk_response, 'finish_reason', None),
261
- }
262
- )
263
- )
264
- yield chunk
265
-
266
- # Trigger callback for new token
267
- if run_manager:
268
- run_manager.on_llm_new_token(content, chunk=chunk)
269
-
270
- except Exception as e:
271
- # Fallback to non-streaming if streaming fails
272
- try:
273
- response = self._client.models.generate_content(
274
- model=self.model_name,
275
- contents=prompt,
276
- config=generation_config if generation_config else None
277
- )
278
-
279
- generated_text = response.text if hasattr(response, 'text') else str(response)
280
-
281
- # Simulate streaming by yielding words
282
- words = generated_text.split()
283
- for i, word in enumerate(words):
284
- content = f" {word}" if i > 0 else word
285
-
286
- chunk = ChatGenerationChunk(
287
- message=AIMessageChunk(content=content)
288
- )
289
- yield chunk
290
-
291
- if run_manager:
292
- run_manager.on_llm_new_token(content, chunk=chunk)
293
-
294
- except Exception as fallback_e:
295
- raise ValueError(f"Error streaming content with Google GenAI: {str(e)}. Fallback also failed: {str(fallback_e)}")
296
-
297
- async def _astream(
298
- self,
299
- messages: List[BaseMessage],
300
- stop: Optional[List[str]] = None,
301
- run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
302
- **kwargs: Any,
303
- ) -> AsyncIterator[ChatGenerationChunk]:
304
- """Async stream the output using Google's genai client."""
305
-
306
- # Convert messages to a single prompt string
307
- prompt = self._convert_messages_to_genai_format(messages)
308
-
309
- # Prepare generation config
310
- generation_config = self._prepare_generation_config(stop)
311
-
312
- try:
313
- # Use Google GenAI streaming in async context
314
- loop = asyncio.get_event_loop()
315
-
316
- # Run the streaming in executor
317
- def create_stream():
318
- return self._client.models.generate_content_stream(
319
- model=self.model_name,
320
- contents=prompt,
321
- config=generation_config if generation_config else None
322
- )
323
-
324
- stream = await loop.run_in_executor(None, create_stream)
325
-
326
- for chunk_response in stream:
327
- if hasattr(chunk_response, 'text') and chunk_response.text:
328
- content = chunk_response.text
329
-
330
- chunk = ChatGenerationChunk(
331
- message=AIMessageChunk(
332
- content=content,
333
- response_metadata={
334
- "model_name": self.model_name,
335
- "finish_reason": getattr(chunk_response, 'finish_reason', None),
336
- }
337
- )
338
- )
339
- yield chunk
340
-
341
- # Trigger callback for new token
342
- if run_manager:
343
- await run_manager.on_llm_new_token(content, chunk=chunk)
344
-
345
- except Exception as e:
346
- # Fallback to async generate and simulate streaming
347
- try:
348
- result = await self._agenerate(messages, stop, run_manager, **kwargs)
349
- generated_text = result.generations[0].message.content
350
-
351
- # Simulate streaming by yielding words
352
- words = generated_text.split()
353
- for i, word in enumerate(words):
354
- content = f" {word}" if i > 0 else word
355
-
356
- chunk = ChatGenerationChunk(
357
- message=AIMessageChunk(content=content)
358
- )
359
- yield chunk
360
-
361
- if run_manager:
362
- await run_manager.on_llm_new_token(content, chunk=chunk)
363
-
364
- except Exception as fallback_e:
365
- raise ValueError(f"Error async streaming content with Google GenAI: {str(e)}. Fallback also failed: {str(fallback_e)}")
File without changes
File without changes