crewplus 0.2.15__py3-none-any.whl → 0.2.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crewplus might be problematic. Click here for more details.
- crewplus/services/__init__.py +9 -2
- crewplus/services/azure_chat_model.py +201 -0
- crewplus/services/gemini_chat_model.py +302 -22
- crewplus/services/model_load_balancer.py +4 -3
- {crewplus-0.2.15.dist-info → crewplus-0.2.21.dist-info}/METADATA +2 -1
- {crewplus-0.2.15.dist-info → crewplus-0.2.21.dist-info}/RECORD +9 -8
- {crewplus-0.2.15.dist-info → crewplus-0.2.21.dist-info}/WHEEL +0 -0
- {crewplus-0.2.15.dist-info → crewplus-0.2.21.dist-info}/entry_points.txt +0 -0
- {crewplus-0.2.15.dist-info → crewplus-0.2.21.dist-info}/licenses/LICENSE +0 -0
crewplus/services/__init__.py
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
from .gemini_chat_model import GeminiChatModel
|
|
2
2
|
from .init_services import init_load_balancer, get_model_balancer
|
|
3
3
|
from .model_load_balancer import ModelLoadBalancer
|
|
4
|
+
from .azure_chat_model import TracedAzureChatOpenAI
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
|
|
6
|
+
__all__ = [
|
|
7
|
+
"GeminiChatModel",
|
|
8
|
+
"init_load_balancer",
|
|
9
|
+
"get_model_balancer",
|
|
10
|
+
"ModelLoadBalancer",
|
|
11
|
+
"init_services",
|
|
12
|
+
"TracedAzureChatOpenAI"
|
|
13
|
+
]
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from langchain_openai.chat_models.azure import AzureChatOpenAI
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
|
|
8
|
+
# Langfuse imports with graceful fallback
|
|
9
|
+
try:
|
|
10
|
+
from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler
|
|
11
|
+
LANGFUSE_AVAILABLE = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
LANGFUSE_AVAILABLE = False
|
|
14
|
+
LangfuseCallbackHandler = None
|
|
15
|
+
|
|
16
|
+
class TracedAzureChatOpenAI(AzureChatOpenAI):
|
|
17
|
+
"""
|
|
18
|
+
Wrapper for AzureChatOpenAI that integrates with Langfuse for tracing.
|
|
19
|
+
|
|
20
|
+
This class automatically handles Langfuse callback integration, making it easier
|
|
21
|
+
to trace and debug your interactions with the Azure OpenAI service.
|
|
22
|
+
|
|
23
|
+
**Langfuse Integration:**
|
|
24
|
+
Langfuse tracing is automatically enabled when environment variables are set:
|
|
25
|
+
- LANGFUSE_PUBLIC_KEY: Your Langfuse public key
|
|
26
|
+
- LANGFUSE_SECRET_KEY: Your Langfuse secret key
|
|
27
|
+
- LANGFUSE_HOST: Langfuse host URL (optional, defaults to https://cloud.langfuse.com)
|
|
28
|
+
|
|
29
|
+
You can also configure it explicitly or disable it. Session and user tracking
|
|
30
|
+
can be set per call via metadata in the `config` argument.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
logger (Optional[logging.Logger]): An optional logger instance.
|
|
34
|
+
enable_langfuse (Optional[bool]): Enable/disable Langfuse tracing (auto-detect if None).
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
.. code-block:: python
|
|
38
|
+
|
|
39
|
+
# Set Langfuse environment variables (optional)
|
|
40
|
+
import os
|
|
41
|
+
os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-..."
|
|
42
|
+
os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-..."
|
|
43
|
+
|
|
44
|
+
from crewplus.services.azure_chat_model import TracedAzureChatOpenAI
|
|
45
|
+
from langchain_core.messages import HumanMessage
|
|
46
|
+
|
|
47
|
+
# Initialize the model
|
|
48
|
+
model = TracedAzureChatOpenAI(
|
|
49
|
+
azure_deployment="your-deployment",
|
|
50
|
+
api_version="2024-05-01-preview",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# --- Text-only usage (automatically traced if env vars set) ---
|
|
54
|
+
response = model.invoke("Hello, how are you?")
|
|
55
|
+
print("Text response:", response.content)
|
|
56
|
+
|
|
57
|
+
# --- Langfuse tracing with session/user tracking ---
|
|
58
|
+
response = model.invoke(
|
|
59
|
+
"What is AI?",
|
|
60
|
+
config={
|
|
61
|
+
"metadata": {
|
|
62
|
+
"langfuse_session_id": "chat-session-123",
|
|
63
|
+
"langfuse_user_id": "user-456"
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# --- Disable Langfuse for specific calls ---
|
|
69
|
+
response = model.invoke(
|
|
70
|
+
"Hello without tracing",
|
|
71
|
+
config={"metadata": {"langfuse_disabled": True}}
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# --- Asynchronous Streaming Usage ---
|
|
75
|
+
import asyncio
|
|
76
|
+
from langchain_core.messages import HumanMessage
|
|
77
|
+
|
|
78
|
+
async def main():
|
|
79
|
+
messages = [HumanMessage(content="Tell me a short story about a brave robot.")]
|
|
80
|
+
print("\nAsync Streaming response:")
|
|
81
|
+
async for chunk in model.astream(messages):
|
|
82
|
+
print(chunk.content, end="", flush=True)
|
|
83
|
+
print()
|
|
84
|
+
|
|
85
|
+
# In a real application, you would run this with:
|
|
86
|
+
# asyncio.run(main())
|
|
87
|
+
"""
|
|
88
|
+
logger: Optional[logging.Logger] = Field(default=None, description="Optional logger instance", exclude=True)
|
|
89
|
+
enable_langfuse: Optional[bool] = Field(default=None, description="Enable Langfuse tracing (auto-detect if None)")
|
|
90
|
+
|
|
91
|
+
langfuse_handler: Optional[LangfuseCallbackHandler] = Field(default=None, exclude=True)
|
|
92
|
+
|
|
93
|
+
def __init__(self, **kwargs: Any):
|
|
94
|
+
super().__init__(**kwargs)
|
|
95
|
+
|
|
96
|
+
# Initialize logger
|
|
97
|
+
if self.logger is None:
|
|
98
|
+
self.logger = logging.getLogger(f"{self.__class__.__module__}.{self.__class__.__name__}")
|
|
99
|
+
if not self.logger.handlers:
|
|
100
|
+
self.logger.addHandler(logging.StreamHandler())
|
|
101
|
+
self.logger.setLevel(logging.INFO)
|
|
102
|
+
|
|
103
|
+
# Initialize Langfuse handler
|
|
104
|
+
self._initialize_langfuse()
|
|
105
|
+
|
|
106
|
+
def _initialize_langfuse(self):
|
|
107
|
+
"""Initialize Langfuse handler if enabled and available."""
|
|
108
|
+
if not LANGFUSE_AVAILABLE:
|
|
109
|
+
if self.enable_langfuse is True:
|
|
110
|
+
self.logger.warning("Langfuse is not installed. Install with: pip install langfuse")
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
# Auto-detect if Langfuse should be enabled
|
|
114
|
+
if self.enable_langfuse is None:
|
|
115
|
+
langfuse_env_vars = ["LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
|
|
116
|
+
self.enable_langfuse = any(os.getenv(var) for var in langfuse_env_vars)
|
|
117
|
+
|
|
118
|
+
if not self.enable_langfuse:
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
self.langfuse_handler = LangfuseCallbackHandler()
|
|
123
|
+
self.logger.info(f"Langfuse tracing enabled for TracedAzureChatOpenAI with deployment: {self.deployment_name}")
|
|
124
|
+
except Exception as e:
|
|
125
|
+
self.logger.warning(f"Failed to initialize Langfuse: {e}")
|
|
126
|
+
self.langfuse_handler = None
|
|
127
|
+
|
|
128
|
+
def invoke(self, input, config=None, **kwargs):
|
|
129
|
+
"""Override invoke to add Langfuse callback automatically."""
|
|
130
|
+
if config is None:
|
|
131
|
+
config = {}
|
|
132
|
+
|
|
133
|
+
if self.langfuse_handler:
|
|
134
|
+
# Do not trace if disabled via metadata
|
|
135
|
+
if config.get("metadata", {}).get("langfuse_disabled"):
|
|
136
|
+
return super().invoke(input, config=config, **kwargs)
|
|
137
|
+
|
|
138
|
+
callbacks = config.get("callbacks", [])
|
|
139
|
+
has_langfuse = any(isinstance(callback, LangfuseCallbackHandler) for callback in callbacks)
|
|
140
|
+
|
|
141
|
+
if not has_langfuse:
|
|
142
|
+
callbacks = callbacks + [self.langfuse_handler]
|
|
143
|
+
config = {**config, "callbacks": callbacks}
|
|
144
|
+
|
|
145
|
+
return super().invoke(input, config=config, **kwargs)
|
|
146
|
+
|
|
147
|
+
async def ainvoke(self, input, config=None, **kwargs):
|
|
148
|
+
"""Override ainvoke to add Langfuse callback automatically."""
|
|
149
|
+
if config is None:
|
|
150
|
+
config = {}
|
|
151
|
+
|
|
152
|
+
if self.langfuse_handler:
|
|
153
|
+
# Do not trace if disabled via metadata
|
|
154
|
+
if config.get("metadata", {}).get("langfuse_disabled"):
|
|
155
|
+
return await super().ainvoke(input, config=config, **kwargs)
|
|
156
|
+
|
|
157
|
+
callbacks = config.get("callbacks", [])
|
|
158
|
+
has_langfuse = any(isinstance(callback, LangfuseCallbackHandler) for callback in callbacks)
|
|
159
|
+
|
|
160
|
+
if not has_langfuse:
|
|
161
|
+
callbacks = callbacks + [self.langfuse_handler]
|
|
162
|
+
config = {**config, "callbacks": callbacks}
|
|
163
|
+
|
|
164
|
+
return await super().ainvoke(input, config=config, **kwargs)
|
|
165
|
+
|
|
166
|
+
def stream(self, input, config=None, **kwargs):
|
|
167
|
+
"""Override stream to add Langfuse callback and request usage metadata."""
|
|
168
|
+
if config is None:
|
|
169
|
+
config = {}
|
|
170
|
+
|
|
171
|
+
# Add stream_options to get usage data for Langfuse
|
|
172
|
+
stream_options = kwargs.get("stream_options", {})
|
|
173
|
+
stream_options["include_usage"] = True
|
|
174
|
+
kwargs["stream_options"] = stream_options
|
|
175
|
+
|
|
176
|
+
# Add Langfuse callback if enabled and not already present
|
|
177
|
+
if self.langfuse_handler and not config.get("metadata", {}).get("langfuse_disabled"):
|
|
178
|
+
callbacks = config.get("callbacks", [])
|
|
179
|
+
if not any(isinstance(c, LangfuseCallbackHandler) for c in callbacks):
|
|
180
|
+
config["callbacks"] = callbacks + [self.langfuse_handler]
|
|
181
|
+
|
|
182
|
+
yield from super().stream(input, config=config, **kwargs)
|
|
183
|
+
|
|
184
|
+
async def astream(self, input, config=None, **kwargs) :
|
|
185
|
+
"""Override astream to add Langfuse callback and request usage metadata."""
|
|
186
|
+
if config is None:
|
|
187
|
+
config = {}
|
|
188
|
+
|
|
189
|
+
# Add stream_options to get usage data for Langfuse
|
|
190
|
+
stream_options = kwargs.get("stream_options", {})
|
|
191
|
+
stream_options["include_usage"] = True
|
|
192
|
+
kwargs["stream_options"] = stream_options
|
|
193
|
+
|
|
194
|
+
# Add Langfuse callback if enabled and not already present
|
|
195
|
+
if self.langfuse_handler and not config.get("metadata", {}).get("langfuse_disabled"):
|
|
196
|
+
callbacks = config.get("callbacks", [])
|
|
197
|
+
if not any(isinstance(c, LangfuseCallbackHandler) for c in callbacks):
|
|
198
|
+
config["callbacks"] = callbacks + [self.langfuse_handler]
|
|
199
|
+
|
|
200
|
+
async for chunk in super().astream(input, config=config, **kwargs):
|
|
201
|
+
yield chunk
|
|
@@ -22,6 +22,14 @@ from langchain_core.callbacks import (
|
|
|
22
22
|
from pydantic import Field, SecretStr
|
|
23
23
|
from langchain_core.utils import convert_to_secret_str
|
|
24
24
|
|
|
25
|
+
# Langfuse imports with graceful fallback
|
|
26
|
+
try:
|
|
27
|
+
from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler
|
|
28
|
+
LANGFUSE_AVAILABLE = True
|
|
29
|
+
except ImportError:
|
|
30
|
+
LANGFUSE_AVAILABLE = False
|
|
31
|
+
LangfuseCallbackHandler = None
|
|
32
|
+
|
|
25
33
|
class GeminiChatModel(BaseChatModel):
|
|
26
34
|
"""Custom chat model for Google Gemini, supporting text, image, and video.
|
|
27
35
|
|
|
@@ -33,6 +41,15 @@ class GeminiChatModel(BaseChatModel):
|
|
|
33
41
|
API keys can be provided directly or loaded from the `GOOGLE_API_KEY`
|
|
34
42
|
environment variable.
|
|
35
43
|
|
|
44
|
+
**Langfuse Integration:**
|
|
45
|
+
Langfuse tracing is automatically enabled when environment variables are set:
|
|
46
|
+
- LANGFUSE_PUBLIC_KEY: Your Langfuse public key
|
|
47
|
+
- LANGFUSE_SECRET_KEY: Your Langfuse secret key
|
|
48
|
+
- LANGFUSE_HOST: Langfuse host URL (optional, defaults to https://cloud.langfuse.com)
|
|
49
|
+
|
|
50
|
+
You can also configure it explicitly or disable it. Session and user tracking
|
|
51
|
+
can be set per call via metadata.
|
|
52
|
+
|
|
36
53
|
Attributes:
|
|
37
54
|
model_name (str): The Google model name to use (e.g., "gemini-1.5-flash").
|
|
38
55
|
google_api_key (Optional[SecretStr]): Your Google API key.
|
|
@@ -41,10 +58,18 @@ class GeminiChatModel(BaseChatModel):
|
|
|
41
58
|
top_p (Optional[float]): The top-p (nucleus) sampling parameter.
|
|
42
59
|
top_k (Optional[int]): The top-k sampling parameter.
|
|
43
60
|
logger (Optional[logging.Logger]): An optional logger instance.
|
|
61
|
+
enable_langfuse (Optional[bool]): Enable/disable Langfuse tracing (auto-detect if None).
|
|
44
62
|
|
|
45
63
|
Example:
|
|
46
64
|
.. code-block:: python
|
|
47
65
|
|
|
66
|
+
# Set Langfuse environment variables (optional)
|
|
67
|
+
import os
|
|
68
|
+
os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-..."
|
|
69
|
+
os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-..."
|
|
70
|
+
os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" # EU region or self-hosted
|
|
71
|
+
# os.environ["LANGFUSE_HOST"] = "https://us.cloud.langfuse.com" # US region
|
|
72
|
+
|
|
48
73
|
from crewplus.services import GeminiChatModel
|
|
49
74
|
from langchain_core.messages import HumanMessage
|
|
50
75
|
import base64
|
|
@@ -54,10 +79,21 @@ class GeminiChatModel(BaseChatModel):
|
|
|
54
79
|
logger = logging.getLogger("my_app.gemini")
|
|
55
80
|
model = GeminiChatModel(model_name="gemini-2.0-flash", logger=logger)
|
|
56
81
|
|
|
57
|
-
# --- Text-only usage ---
|
|
82
|
+
# --- Text-only usage (automatically traced if env vars set) ---
|
|
58
83
|
response = model.invoke("Hello, how are you?")
|
|
59
84
|
print("Text response:", response.content)
|
|
60
85
|
|
|
86
|
+
# --- Langfuse tracing with session/user tracking ---
|
|
87
|
+
response = model.invoke(
|
|
88
|
+
"What is AI?",
|
|
89
|
+
config={
|
|
90
|
+
"metadata": {
|
|
91
|
+
"langfuse_session_id": "chat-session-123",
|
|
92
|
+
"langfuse_user_id": "user-456"
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
|
|
61
97
|
# --- Image processing with base64 data URI ---
|
|
62
98
|
# Replace with a path to your image
|
|
63
99
|
image_path = "path/to/your/image.jpg"
|
|
@@ -138,6 +174,23 @@ class GeminiChatModel(BaseChatModel):
|
|
|
138
174
|
print("Streaming response:")
|
|
139
175
|
for chunk in model.stream([url_message]):
|
|
140
176
|
print(chunk.content, end="", flush=True)
|
|
177
|
+
|
|
178
|
+
# --- Traditional Langfuse callback approach still works ---
|
|
179
|
+
from langfuse.langchain import CallbackHandler
|
|
180
|
+
langfuse_handler = CallbackHandler(
|
|
181
|
+
session_id="session-123",
|
|
182
|
+
user_id="user-456"
|
|
183
|
+
)
|
|
184
|
+
response = model.invoke(
|
|
185
|
+
"Hello with manual callback",
|
|
186
|
+
config={"callbacks": [langfuse_handler]}
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# --- Disable Langfuse for specific calls ---
|
|
190
|
+
response = model.invoke(
|
|
191
|
+
"Hello without tracing",
|
|
192
|
+
config={"metadata": {"langfuse_disabled": True}}
|
|
193
|
+
)
|
|
141
194
|
"""
|
|
142
195
|
|
|
143
196
|
# Model configuration
|
|
@@ -149,8 +202,12 @@ class GeminiChatModel(BaseChatModel):
|
|
|
149
202
|
top_k: Optional[int] = Field(default=None, description="Top-k sampling parameter")
|
|
150
203
|
logger: Optional[logging.Logger] = Field(default=None, description="Optional logger instance")
|
|
151
204
|
|
|
152
|
-
#
|
|
205
|
+
# Langfuse configuration
|
|
206
|
+
enable_langfuse: Optional[bool] = Field(default=None, description="Enable Langfuse tracing (auto-detect if None)")
|
|
207
|
+
|
|
208
|
+
# Internal clients
|
|
153
209
|
_client: Optional[genai.Client] = None
|
|
210
|
+
_langfuse_handler: Optional[LangfuseCallbackHandler] = None
|
|
154
211
|
|
|
155
212
|
def __init__(self, **kwargs):
|
|
156
213
|
super().__init__(**kwargs)
|
|
@@ -178,6 +235,135 @@ class GeminiChatModel(BaseChatModel):
|
|
|
178
235
|
error_msg = "Google API key is required. Set GOOGLE_API_KEY environment variable or pass google_api_key parameter."
|
|
179
236
|
self.logger.error(error_msg)
|
|
180
237
|
raise ValueError(error_msg)
|
|
238
|
+
|
|
239
|
+
# Initialize Langfuse handler
|
|
240
|
+
self._initialize_langfuse()
|
|
241
|
+
|
|
242
|
+
def _initialize_langfuse(self):
|
|
243
|
+
"""Initialize Langfuse handler if enabled and available."""
|
|
244
|
+
if not LANGFUSE_AVAILABLE:
|
|
245
|
+
if self.enable_langfuse is True:
|
|
246
|
+
self.logger.warning("Langfuse is not installed. Install with: pip install langfuse")
|
|
247
|
+
return
|
|
248
|
+
|
|
249
|
+
# Auto-detect if Langfuse should be enabled
|
|
250
|
+
if self.enable_langfuse is None:
|
|
251
|
+
# Check if Langfuse environment variables are set
|
|
252
|
+
langfuse_env_vars = ["LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
|
|
253
|
+
self.enable_langfuse = any(os.getenv(var) for var in langfuse_env_vars)
|
|
254
|
+
|
|
255
|
+
if not self.enable_langfuse:
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
try:
|
|
259
|
+
# Initialize Langfuse handler with minimal config
|
|
260
|
+
# Session/user tracking will be handled per call via metadata
|
|
261
|
+
self._langfuse_handler = LangfuseCallbackHandler()
|
|
262
|
+
self.logger.info("Langfuse tracing enabled for GeminiChatModel")
|
|
263
|
+
|
|
264
|
+
except Exception as e:
|
|
265
|
+
self.logger.warning(f"Failed to initialize Langfuse: {e}")
|
|
266
|
+
self._langfuse_handler = None
|
|
267
|
+
|
|
268
|
+
def _should_add_langfuse_callback(self, run_manager: Optional[CallbackManagerForLLMRun] = None) -> bool:
|
|
269
|
+
"""Check if Langfuse callback should be added."""
|
|
270
|
+
if not self._langfuse_handler:
|
|
271
|
+
return False
|
|
272
|
+
|
|
273
|
+
# Check if Langfuse is already in the callback manager
|
|
274
|
+
if run_manager and hasattr(run_manager, 'handlers'):
|
|
275
|
+
has_langfuse = any(
|
|
276
|
+
isinstance(handler, LangfuseCallbackHandler)
|
|
277
|
+
for handler in run_manager.handlers
|
|
278
|
+
)
|
|
279
|
+
if has_langfuse:
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
return True
|
|
283
|
+
|
|
284
|
+
def invoke(self, input, config=None, **kwargs):
|
|
285
|
+
"""Override invoke to add Langfuse callback automatically."""
|
|
286
|
+
if config is None:
|
|
287
|
+
config = {}
|
|
288
|
+
|
|
289
|
+
# Add Langfuse callback if enabled and not already present
|
|
290
|
+
if self._langfuse_handler:
|
|
291
|
+
callbacks = config.get("callbacks", [])
|
|
292
|
+
|
|
293
|
+
# Check if Langfuse callback is already present
|
|
294
|
+
has_langfuse = any(
|
|
295
|
+
isinstance(callback, LangfuseCallbackHandler)
|
|
296
|
+
for callback in callbacks
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
if not has_langfuse:
|
|
300
|
+
callbacks = callbacks + [self._langfuse_handler]
|
|
301
|
+
config = {**config, "callbacks": callbacks}
|
|
302
|
+
|
|
303
|
+
return super().invoke(input, config=config, **kwargs)
|
|
304
|
+
|
|
305
|
+
async def ainvoke(self, input, config=None, **kwargs):
|
|
306
|
+
"""Override ainvoke to add Langfuse callback automatically."""
|
|
307
|
+
if config is None:
|
|
308
|
+
config = {}
|
|
309
|
+
|
|
310
|
+
# Add Langfuse callback if enabled and not already present
|
|
311
|
+
if self._langfuse_handler:
|
|
312
|
+
callbacks = config.get("callbacks", [])
|
|
313
|
+
|
|
314
|
+
# Check if Langfuse callback is already present
|
|
315
|
+
has_langfuse = any(
|
|
316
|
+
isinstance(callback, LangfuseCallbackHandler)
|
|
317
|
+
for callback in callbacks
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
if not has_langfuse:
|
|
321
|
+
callbacks = callbacks + [self._langfuse_handler]
|
|
322
|
+
config = {**config, "callbacks": callbacks}
|
|
323
|
+
|
|
324
|
+
return await super().ainvoke(input, config=config, **kwargs)
|
|
325
|
+
|
|
326
|
+
def stream(self, input, config=None, **kwargs):
|
|
327
|
+
"""Override stream to add Langfuse callback automatically."""
|
|
328
|
+
if config is None:
|
|
329
|
+
config = {}
|
|
330
|
+
|
|
331
|
+
# Add Langfuse callback if enabled and not already present
|
|
332
|
+
if self._langfuse_handler:
|
|
333
|
+
callbacks = config.get("callbacks", [])
|
|
334
|
+
|
|
335
|
+
# Check if Langfuse callback is already present
|
|
336
|
+
has_langfuse = any(
|
|
337
|
+
isinstance(callback, LangfuseCallbackHandler)
|
|
338
|
+
for callback in callbacks
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
if not has_langfuse:
|
|
342
|
+
callbacks = callbacks + [self._langfuse_handler]
|
|
343
|
+
config = {**config, "callbacks": callbacks}
|
|
344
|
+
|
|
345
|
+
return super().stream(input, config=config, **kwargs)
|
|
346
|
+
|
|
347
|
+
async def astream(self, input, config=None, **kwargs):
|
|
348
|
+
"""Override astream to add Langfuse callback automatically."""
|
|
349
|
+
if config is None:
|
|
350
|
+
config = {}
|
|
351
|
+
|
|
352
|
+
# Add Langfuse callback if enabled and not already present
|
|
353
|
+
if self._langfuse_handler:
|
|
354
|
+
callbacks = config.get("callbacks", [])
|
|
355
|
+
|
|
356
|
+
# Check if Langfuse callback is already present
|
|
357
|
+
has_langfuse = any(
|
|
358
|
+
isinstance(callback, LangfuseCallbackHandler)
|
|
359
|
+
for callback in callbacks
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
if not has_langfuse:
|
|
363
|
+
callbacks = callbacks + [self._langfuse_handler]
|
|
364
|
+
config = {**config, "callbacks": callbacks}
|
|
365
|
+
|
|
366
|
+
return super().astream(input, config=config, **kwargs)
|
|
181
367
|
|
|
182
368
|
@property
|
|
183
369
|
def _llm_type(self) -> str:
|
|
@@ -461,6 +647,66 @@ class GeminiChatModel(BaseChatModel):
|
|
|
461
647
|
}
|
|
462
648
|
return part_dict
|
|
463
649
|
|
|
650
|
+
def _extract_usage_metadata(self, response) -> Optional[Any]:
|
|
651
|
+
"""Extracts the raw usage_metadata object from a Google GenAI response."""
|
|
652
|
+
if hasattr(response, 'usage_metadata') and response.usage_metadata:
|
|
653
|
+
self.logger.debug(f"[_extract_usage_metadata] Found usage_metadata: {response.usage_metadata}")
|
|
654
|
+
return response.usage_metadata
|
|
655
|
+
return None
|
|
656
|
+
|
|
657
|
+
def _create_chat_generation_chunk(self, chunk_response) -> ChatGenerationChunk:
|
|
658
|
+
"""Creates a ChatGenerationChunk for streaming."""
|
|
659
|
+
# For streaming, we do not include usage metadata in individual chunks
|
|
660
|
+
# to prevent merge conflicts. The final, aggregated response will contain
|
|
661
|
+
# the full usage details for callbacks like Langfuse.
|
|
662
|
+
return ChatGenerationChunk(
|
|
663
|
+
message=AIMessageChunk(
|
|
664
|
+
content=chunk_response.text,
|
|
665
|
+
response_metadata={"model_name": self.model_name},
|
|
666
|
+
),
|
|
667
|
+
generation_info=None,
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
def _create_chat_result_with_usage(self, response) -> ChatResult:
|
|
671
|
+
"""Creates a ChatResult with usage metadata for Langfuse tracking."""
|
|
672
|
+
generated_text = response.text
|
|
673
|
+
finish_reason = response.candidates[0].finish_reason.name if response.candidates else None
|
|
674
|
+
|
|
675
|
+
# Extract usage metadata for token tracking
|
|
676
|
+
usage_metadata = self._extract_usage_metadata(response)
|
|
677
|
+
usage_dict = usage_metadata.dict() if usage_metadata and hasattr(usage_metadata, "dict") else {}
|
|
678
|
+
|
|
679
|
+
# Create AIMessage with usage information in response_metadata
|
|
680
|
+
message = AIMessage(
|
|
681
|
+
content=generated_text,
|
|
682
|
+
response_metadata={
|
|
683
|
+
"model_name": self.model_name,
|
|
684
|
+
"finish_reason": finish_reason,
|
|
685
|
+
**usage_dict
|
|
686
|
+
}
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
# For non-streaming, we include the usage dict in generation_info.
|
|
690
|
+
# This is another field that callback handlers like Langfuse might inspect.
|
|
691
|
+
generation = ChatGeneration(
|
|
692
|
+
message=message,
|
|
693
|
+
generation_info=usage_dict if usage_dict else None
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
# We also construct the llm_output dictionary in the format expected
|
|
697
|
+
# by LangChain callback handlers, with a specific "token_usage" key.
|
|
698
|
+
chat_result = ChatResult(
|
|
699
|
+
generations=[generation],
|
|
700
|
+
llm_output={
|
|
701
|
+
"token_usage": usage_dict,
|
|
702
|
+
"model_name": self.model_name
|
|
703
|
+
} if usage_dict else {
|
|
704
|
+
"model_name": self.model_name
|
|
705
|
+
}
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
return chat_result
|
|
709
|
+
|
|
464
710
|
def _generate(
|
|
465
711
|
self,
|
|
466
712
|
messages: List[BaseMessage],
|
|
@@ -471,6 +717,8 @@ class GeminiChatModel(BaseChatModel):
|
|
|
471
717
|
"""Generates a chat response from a list of messages."""
|
|
472
718
|
self.logger.info(f"Generating response for {len(messages)} messages.")
|
|
473
719
|
|
|
720
|
+
# Remove the problematic add_handler call - callbacks are now handled in invoke methods
|
|
721
|
+
|
|
474
722
|
contents = self._convert_messages(messages)
|
|
475
723
|
config = self._prepare_generation_config(messages, stop)
|
|
476
724
|
|
|
@@ -482,14 +730,7 @@ class GeminiChatModel(BaseChatModel):
|
|
|
482
730
|
**kwargs,
|
|
483
731
|
)
|
|
484
732
|
|
|
485
|
-
|
|
486
|
-
finish_reason = response.candidates[0].finish_reason.name if response.candidates else None
|
|
487
|
-
|
|
488
|
-
message = AIMessage(
|
|
489
|
-
content=generated_text,
|
|
490
|
-
response_metadata={"model_name": self.model_name, "finish_reason": finish_reason},
|
|
491
|
-
)
|
|
492
|
-
return ChatResult(generations=[ChatGeneration(message=message)])
|
|
733
|
+
return self._create_chat_result_with_usage(response)
|
|
493
734
|
|
|
494
735
|
except Exception as e:
|
|
495
736
|
self.logger.error(f"Error generating content with Google GenAI: {e}", exc_info=True)
|
|
@@ -516,14 +757,7 @@ class GeminiChatModel(BaseChatModel):
|
|
|
516
757
|
**kwargs,
|
|
517
758
|
)
|
|
518
759
|
|
|
519
|
-
|
|
520
|
-
finish_reason = response.candidates[0].finish_reason.name if response.candidates else None
|
|
521
|
-
|
|
522
|
-
message = AIMessage(
|
|
523
|
-
content=generated_text,
|
|
524
|
-
response_metadata={"model_name": self.model_name, "finish_reason": finish_reason},
|
|
525
|
-
)
|
|
526
|
-
return ChatResult(generations=[ChatGeneration(message=message)])
|
|
760
|
+
return self._create_chat_result_with_usage(response)
|
|
527
761
|
|
|
528
762
|
except Exception as e:
|
|
529
763
|
self.logger.error(f"Error during async generation: {e}", exc_info=True)
|
|
@@ -536,7 +770,7 @@ class GeminiChatModel(BaseChatModel):
|
|
|
536
770
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
537
771
|
**kwargs: Any,
|
|
538
772
|
) -> Iterator[ChatGenerationChunk]:
|
|
539
|
-
"""Streams the chat response."""
|
|
773
|
+
"""Streams the chat response and properly handles final usage metadata."""
|
|
540
774
|
self.logger.info(f"Streaming response for {len(messages)} messages.")
|
|
541
775
|
|
|
542
776
|
contents = self._convert_messages(messages)
|
|
@@ -549,12 +783,35 @@ class GeminiChatModel(BaseChatModel):
|
|
|
549
783
|
config=config,
|
|
550
784
|
**kwargs,
|
|
551
785
|
)
|
|
786
|
+
|
|
787
|
+
final_usage_metadata = None
|
|
552
788
|
for chunk_response in stream:
|
|
789
|
+
# The usage metadata is on the chunk response itself. We update
|
|
790
|
+
# our variable on each chunk that has it to ensure we get the
|
|
791
|
+
# final, cumulative count at the end of the stream.
|
|
792
|
+
if chunk_response.usage_metadata:
|
|
793
|
+
final_usage_metadata = self._extract_usage_metadata(chunk_response)
|
|
794
|
+
|
|
553
795
|
if text_content := chunk_response.text:
|
|
554
|
-
chunk =
|
|
796
|
+
chunk = self._create_chat_generation_chunk(chunk_response)
|
|
555
797
|
if run_manager:
|
|
556
798
|
run_manager.on_llm_new_token(text_content, chunk=chunk)
|
|
557
799
|
yield chunk
|
|
800
|
+
|
|
801
|
+
# After the stream is exhausted, we yield a final, empty chunk
|
|
802
|
+
# containing the full usage details. LangChain merges this into the
|
|
803
|
+
# final result, making it available to callback handlers.
|
|
804
|
+
if final_usage_metadata:
|
|
805
|
+
usage_dict = final_usage_metadata.dict() if hasattr(final_usage_metadata, "dict") else {}
|
|
806
|
+
final_generation_info = {
|
|
807
|
+
"token_usage": usage_dict,
|
|
808
|
+
"model_name": self.model_name
|
|
809
|
+
}
|
|
810
|
+
yield ChatGenerationChunk(
|
|
811
|
+
message=AIMessageChunk(content=""),
|
|
812
|
+
generation_info=final_generation_info
|
|
813
|
+
)
|
|
814
|
+
|
|
558
815
|
except Exception as e:
|
|
559
816
|
self.logger.error(f"Error streaming content: {e}", exc_info=True)
|
|
560
817
|
raise ValueError(f"Error during streaming: {e}")
|
|
@@ -566,7 +823,7 @@ class GeminiChatModel(BaseChatModel):
|
|
|
566
823
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
|
567
824
|
**kwargs: Any,
|
|
568
825
|
) -> AsyncIterator[ChatGenerationChunk]:
|
|
569
|
-
"""Asynchronously streams the chat response."""
|
|
826
|
+
"""Asynchronously streams the chat response and properly handles final usage metadata."""
|
|
570
827
|
self.logger.info(f"Async streaming response for {len(messages)} messages.")
|
|
571
828
|
|
|
572
829
|
contents = self._convert_messages(messages)
|
|
@@ -579,12 +836,35 @@ class GeminiChatModel(BaseChatModel):
|
|
|
579
836
|
config=config,
|
|
580
837
|
**kwargs,
|
|
581
838
|
)
|
|
839
|
+
|
|
840
|
+
final_usage_metadata = None
|
|
582
841
|
async for chunk_response in stream:
|
|
842
|
+
# The usage metadata is on the chunk response itself. We update
|
|
843
|
+
# our variable on each chunk that has it to ensure we get the
|
|
844
|
+
# final, cumulative count at the end of the stream.
|
|
845
|
+
if chunk_response.usage_metadata:
|
|
846
|
+
final_usage_metadata = self._extract_usage_metadata(chunk_response)
|
|
847
|
+
|
|
583
848
|
if text_content := chunk_response.text:
|
|
584
|
-
chunk =
|
|
849
|
+
chunk = self._create_chat_generation_chunk(chunk_response)
|
|
585
850
|
if run_manager:
|
|
586
851
|
await run_manager.on_llm_new_token(text_content, chunk=chunk)
|
|
587
852
|
yield chunk
|
|
853
|
+
|
|
854
|
+
# After the stream is exhausted, we yield a final, empty chunk
|
|
855
|
+
# containing the full usage details. LangChain merges this into the
|
|
856
|
+
# final result, making it available to callback handlers.
|
|
857
|
+
if final_usage_metadata:
|
|
858
|
+
usage_dict = final_usage_metadata.dict() if hasattr(final_usage_metadata, "dict") else {}
|
|
859
|
+
final_generation_info = {
|
|
860
|
+
"token_usage": usage_dict,
|
|
861
|
+
"model_name": self.model_name
|
|
862
|
+
}
|
|
863
|
+
yield ChatGenerationChunk(
|
|
864
|
+
message=AIMessageChunk(content=""),
|
|
865
|
+
generation_info=final_generation_info
|
|
866
|
+
)
|
|
867
|
+
|
|
588
868
|
except Exception as e:
|
|
589
869
|
self.logger.error(f"Error during async streaming: {e}", exc_info=True)
|
|
590
870
|
raise ValueError(f"Error during async streaming: {e}")
|
|
@@ -3,8 +3,9 @@ import random
|
|
|
3
3
|
import logging
|
|
4
4
|
from typing import Dict, List, Optional, Union
|
|
5
5
|
from collections import defaultdict
|
|
6
|
-
from langchain_openai import
|
|
6
|
+
from langchain_openai import ChatOpenAI, AzureOpenAIEmbeddings
|
|
7
7
|
from .gemini_chat_model import GeminiChatModel
|
|
8
|
+
from .azure_chat_model import TracedAzureChatOpenAI
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class ModelLoadBalancer:
|
|
@@ -30,7 +31,7 @@ class ModelLoadBalancer:
|
|
|
30
31
|
self.config_data = config_data
|
|
31
32
|
self.logger = logger or logging.getLogger(__name__)
|
|
32
33
|
self.models_config: List[Dict] = []
|
|
33
|
-
self.models: Dict[int, Union[
|
|
34
|
+
self.models: Dict[int, Union[TracedAzureChatOpenAI, ChatOpenAI, AzureOpenAIEmbeddings, GeminiChatModel]] = {}
|
|
34
35
|
self._initialize_state()
|
|
35
36
|
self._config_loaded = False # Flag to check if config is loaded
|
|
36
37
|
|
|
@@ -131,7 +132,7 @@ class ModelLoadBalancer:
|
|
|
131
132
|
kwargs['temperature'] = model_config['temperature']
|
|
132
133
|
if model_config.get('deployment_name') == 'o1-mini':
|
|
133
134
|
kwargs['disable_streaming'] = True
|
|
134
|
-
return
|
|
135
|
+
return TracedAzureChatOpenAI(**kwargs)
|
|
135
136
|
elif provider == 'openai':
|
|
136
137
|
kwargs = {
|
|
137
138
|
'openai_api_key': model_config['api_key']
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: crewplus
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.21
|
|
4
4
|
Summary: Base services for CrewPlus AI applications
|
|
5
5
|
Author-Email: Tim Liu <tim@opsmateai.com>
|
|
6
6
|
License: MIT
|
|
@@ -16,6 +16,7 @@ Requires-Dist: mkdocs<2.0.0,>=1.6.1
|
|
|
16
16
|
Requires-Dist: mkdocs-material<10.0.0,>=9.6.14
|
|
17
17
|
Requires-Dist: mkdocstrings-python<2.0.0,>=1.16.12
|
|
18
18
|
Requires-Dist: langchain-milvus<0.3.0,>=0.2.1
|
|
19
|
+
Requires-Dist: langfuse<4.0.0,>=3.1.3
|
|
19
20
|
Description-Content-Type: text/markdown
|
|
20
21
|
|
|
21
22
|
# CrewPlus
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
crewplus-0.2.
|
|
2
|
-
crewplus-0.2.
|
|
3
|
-
crewplus-0.2.
|
|
4
|
-
crewplus-0.2.
|
|
1
|
+
crewplus-0.2.21.dist-info/METADATA,sha256=_cUtSY9ZifWXNKbJ7j3zQjP0gqKY5RPvExND8z-XOB0,5125
|
|
2
|
+
crewplus-0.2.21.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
3
|
+
crewplus-0.2.21.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
4
|
+
crewplus-0.2.21.dist-info/licenses/LICENSE,sha256=2_NHSHRTKB_cTcT_GXgcenOCtIZku8j343mOgAguTfc,1087
|
|
5
5
|
crewplus/__init__.py,sha256=m46HkZL1Y4toD619NL47Sn2Qe084WFFSFD7e6VoYKZc,284
|
|
6
|
-
crewplus/services/__init__.py,sha256=
|
|
7
|
-
crewplus/services/
|
|
6
|
+
crewplus/services/__init__.py,sha256=zUM4ZwUfGMBDx-j7Wehf_KC5yYXPTK8BK_oeO5veIXQ,398
|
|
7
|
+
crewplus/services/azure_chat_model.py,sha256=xPuIsQpLV5Y3Ntwe3eqvquhBjh35g65VlF22AWJdEcU,8648
|
|
8
|
+
crewplus/services/gemini_chat_model.py,sha256=HMDt7TKlLpQ43ZPxY9omG64EGFkP846BXT_SfyBeM0I,38415
|
|
8
9
|
crewplus/services/init_services.py,sha256=U91zoMNJlOEKyldarNnATjeZDT2V-0CrXPAwI64hZkw,758
|
|
9
|
-
crewplus/services/model_load_balancer.py,sha256=
|
|
10
|
+
crewplus/services/model_load_balancer.py,sha256=6JvmqmHz52KmVdBqF8nt45rjwymUCiF6fqSDFZcQuJ0,8791
|
|
10
11
|
crewplus/utils/__init__.py,sha256=2Gk1n5srFJQnFfBuYTxktdtKOVZyNrFcNaZKhXk35Pw,142
|
|
11
12
|
crewplus/utils/schema_action.py,sha256=GDaBoVFQD1rXqrLVSMTfXYW1xcUu7eDcHsn57XBSnIg,422
|
|
12
13
|
crewplus/utils/schema_document_updater.py,sha256=frvffxn2vbi71fHFPoGb9hq7gH2azmmdq17p-Fumnvg,7322
|
|
@@ -18,4 +19,4 @@ docs/GeminiChatModel.md,sha256=_IQyup3ofAa2HxfSurO1GYUEezTHYYt5Q1khYNVThGM,8040
|
|
|
18
19
|
docs/ModelLoadBalancer.md,sha256=aGHES1dcXPz4c7Y8kB5-vsCNJjriH2SWmjBkSGoYKiI,4398
|
|
19
20
|
docs/VDBService.md,sha256=Dw286Rrf_fsi13jyD3Bo4Sy7nZ_G7tYm7d8MZ2j9hxk,9375
|
|
20
21
|
docs/index.md,sha256=3tlc15uR8lzFNM5WjdoZLw0Y9o1P1gwgbEnOdIBspqc,1643
|
|
21
|
-
crewplus-0.2.
|
|
22
|
+
crewplus-0.2.21.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|