crewplus 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crewplus might be problematic. Click here for more details.

@@ -4,34 +4,29 @@ from typing import Any, Optional
4
4
 
5
5
  from langchain_openai.chat_models.azure import AzureChatOpenAI
6
6
  from pydantic import Field
7
-
8
- # Langfuse imports with graceful fallback
9
- try:
10
- from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler
11
- LANGFUSE_AVAILABLE = True
12
- except ImportError:
13
- LANGFUSE_AVAILABLE = False
14
- LangfuseCallbackHandler = None
7
+ from .tracing_manager import TracingManager, TracingContext
15
8
 
16
9
  class TracedAzureChatOpenAI(AzureChatOpenAI):
17
10
  """
18
- Wrapper for AzureChatOpenAI that integrates with Langfuse for tracing.
11
+ Wrapper for AzureChatOpenAI that integrates with tracing services like Langfuse.
19
12
 
20
- This class automatically handles Langfuse callback integration, making it easier
13
+ This class automatically handles callback integration, making it easier
21
14
  to trace and debug your interactions with the Azure OpenAI service.
22
15
 
23
- **Langfuse Integration:**
24
- Langfuse tracing is automatically enabled when environment variables are set:
16
+ **Tracing Integration (e.g., Langfuse):**
17
+ Tracing is automatically enabled when the respective environment variables are set.
18
+ For Langfuse:
25
19
  - LANGFUSE_PUBLIC_KEY: Your Langfuse public key
26
20
  - LANGFUSE_SECRET_KEY: Your Langfuse secret key
27
21
  - LANGFUSE_HOST: Langfuse host URL (optional, defaults to https://cloud.langfuse.com)
28
22
 
29
- You can also configure it explicitly or disable it. Session and user tracking
30
- can be set per call via metadata in the `config` argument.
23
+ You can explicitly control this with the `enable_tracing` parameter or disable
24
+ it for specific calls by adding `{"metadata": {"tracing_disabled": True}}`
25
+ to the `config` argument.
31
26
 
32
27
  Attributes:
33
28
  logger (Optional[logging.Logger]): An optional logger instance.
34
- enable_langfuse (Optional[bool]): Enable/disable Langfuse tracing (auto-detect if None).
29
+ enable_tracing (Optional[bool]): Enable/disable tracing (auto-detect if None).
35
30
 
36
31
  Example:
37
32
  .. code-block:: python
@@ -54,21 +49,21 @@ class TracedAzureChatOpenAI(AzureChatOpenAI):
54
49
  response = model.invoke("Hello, how are you?")
55
50
  print("Text response:", response.content)
56
51
 
57
- # --- Langfuse tracing with session/user tracking ---
52
+ # --- Tracing with session/user tracking (for Langfuse) ---
58
53
  response = model.invoke(
59
54
  "What is AI?",
60
55
  config={
61
56
  "metadata": {
62
57
  "langfuse_session_id": "chat-session-123",
63
- "langfuse_user_id": "user-456"
58
+ "user_id": "user-456"
64
59
  }
65
60
  }
66
61
  )
67
62
 
68
- # --- Disable Langfuse for specific calls ---
63
+ # --- Disable tracing for a specific call ---
69
64
  response = model.invoke(
70
65
  "Hello without tracing",
71
- config={"metadata": {"langfuse_disabled": True}}
66
+ config={"metadata": {"tracing_disabled": True}}
72
67
  )
73
68
 
74
69
  # --- Asynchronous Streaming Usage ---
@@ -86,9 +81,9 @@ class TracedAzureChatOpenAI(AzureChatOpenAI):
86
81
  # asyncio.run(main())
87
82
  """
88
83
  logger: Optional[logging.Logger] = Field(default=None, description="Optional logger instance", exclude=True)
89
- enable_langfuse: Optional[bool] = Field(default=None, description="Enable Langfuse tracing (auto-detect if None)")
84
+ enable_tracing: Optional[bool] = Field(default=None, description="Enable tracing (auto-detect if None)")
90
85
 
91
- langfuse_handler: Optional[LangfuseCallbackHandler] = Field(default=None, exclude=True)
86
+ _tracing_manager: Optional[TracingManager] = None
92
87
 
93
88
  def __init__(self, **kwargs: Any):
94
89
  super().__init__(**kwargs)
@@ -100,102 +95,35 @@ class TracedAzureChatOpenAI(AzureChatOpenAI):
100
95
  self.logger.addHandler(logging.StreamHandler())
101
96
  self.logger.setLevel(logging.INFO)
102
97
 
103
- # Initialize Langfuse handler
104
- self._initialize_langfuse()
105
-
106
- def _initialize_langfuse(self):
107
- """Initialize Langfuse handler if enabled and available."""
108
- if not LANGFUSE_AVAILABLE:
109
- if self.enable_langfuse is True:
110
- self.logger.warning("Langfuse is not installed. Install with: pip install langfuse")
111
- return
112
-
113
- # Auto-detect if Langfuse should be enabled
114
- if self.enable_langfuse is None:
115
- langfuse_env_vars = ["LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
116
- self.enable_langfuse = any(os.getenv(var) for var in langfuse_env_vars)
117
-
118
- if not self.enable_langfuse:
119
- return
120
-
121
- try:
122
- self.langfuse_handler = LangfuseCallbackHandler()
123
- self.logger.info(f"Langfuse tracing enabled for TracedAzureChatOpenAI with deployment: {self.deployment_name}")
124
- except Exception as e:
125
- self.logger.warning(f"Failed to initialize Langfuse: {e}")
126
- self.langfuse_handler = None
98
+ self._tracing_manager = TracingManager(self)
127
99
 
128
- def invoke(self, input, config=None, **kwargs):
129
- """Override invoke to add Langfuse callback automatically."""
130
- if config is None:
131
- config = {}
132
-
133
- if self.langfuse_handler:
134
- # Do not trace if disabled via metadata
135
- if config.get("metadata", {}).get("langfuse_disabled"):
136
- return super().invoke(input, config=config, **kwargs)
100
+ def get_model_identifier(self) -> str:
101
+ """Return a string identifying this model for tracing and logging."""
102
+ return f"{self.__class__.__name__} (deployment='{self.deployment_name}')"
137
103
 
138
- callbacks = config.get("callbacks", [])
139
- has_langfuse = any(isinstance(callback, LangfuseCallbackHandler) for callback in callbacks)
140
-
141
- if not has_langfuse:
142
- callbacks = callbacks + [self.langfuse_handler]
143
- config = {**config, "callbacks": callbacks}
144
-
104
+ def invoke(self, input, config=None, **kwargs):
105
+ config = self._tracing_manager.add_callbacks_to_config(config)
145
106
  return super().invoke(input, config=config, **kwargs)
146
107
 
147
108
  async def ainvoke(self, input, config=None, **kwargs):
148
- """Override ainvoke to add Langfuse callback automatically."""
149
- if config is None:
150
- config = {}
151
-
152
- if self.langfuse_handler:
153
- # Do not trace if disabled via metadata
154
- if config.get("metadata", {}).get("langfuse_disabled"):
155
- return await super().ainvoke(input, config=config, **kwargs)
156
-
157
- callbacks = config.get("callbacks", [])
158
- has_langfuse = any(isinstance(callback, LangfuseCallbackHandler) for callback in callbacks)
159
-
160
- if not has_langfuse:
161
- callbacks = callbacks + [self.langfuse_handler]
162
- config = {**config, "callbacks": callbacks}
163
-
109
+ config = self._tracing_manager.add_callbacks_to_config(config)
164
110
  return await super().ainvoke(input, config=config, **kwargs)
165
111
 
166
112
  def stream(self, input, config=None, **kwargs):
167
- """Override stream to add Langfuse callback and request usage metadata."""
168
- if config is None:
169
- config = {}
170
-
171
113
  # Add stream_options to get usage data for Langfuse
172
114
  stream_options = kwargs.get("stream_options", {})
173
115
  stream_options["include_usage"] = True
174
116
  kwargs["stream_options"] = stream_options
175
-
176
- # Add Langfuse callback if enabled and not already present
177
- if self.langfuse_handler and not config.get("metadata", {}).get("langfuse_disabled"):
178
- callbacks = config.get("callbacks", [])
179
- if not any(isinstance(c, LangfuseCallbackHandler) for c in callbacks):
180
- config["callbacks"] = callbacks + [self.langfuse_handler]
181
117
 
118
+ config = self._tracing_manager.add_callbacks_to_config(config)
182
119
  yield from super().stream(input, config=config, **kwargs)
183
120
 
184
121
  async def astream(self, input, config=None, **kwargs) :
185
- """Override astream to add Langfuse callback and request usage metadata."""
186
- if config is None:
187
- config = {}
188
-
189
122
  # Add stream_options to get usage data for Langfuse
190
123
  stream_options = kwargs.get("stream_options", {})
191
124
  stream_options["include_usage"] = True
192
125
  kwargs["stream_options"] = stream_options
193
126
 
194
- # Add Langfuse callback if enabled and not already present
195
- if self.langfuse_handler and not config.get("metadata", {}).get("langfuse_disabled"):
196
- callbacks = config.get("callbacks", [])
197
- if not any(isinstance(c, LangfuseCallbackHandler) for c in callbacks):
198
- config["callbacks"] = callbacks + [self.langfuse_handler]
199
-
127
+ config = self._tracing_manager.add_callbacks_to_config(config)
200
128
  async for chunk in super().astream(input, config=config, **kwargs):
201
129
  yield chunk
@@ -21,14 +21,7 @@ from langchain_core.callbacks import (
21
21
  )
22
22
  from pydantic import Field, SecretStr
23
23
  from langchain_core.utils import convert_to_secret_str
24
-
25
- # Langfuse imports with graceful fallback
26
- try:
27
- from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler
28
- LANGFUSE_AVAILABLE = True
29
- except ImportError:
30
- LANGFUSE_AVAILABLE = False
31
- LangfuseCallbackHandler = None
24
+ from .tracing_manager import TracingManager, TracingContext
32
25
 
33
26
  class GeminiChatModel(BaseChatModel):
34
27
  """Custom chat model for Google Gemini, supporting text, image, and video.
@@ -41,8 +34,9 @@ class GeminiChatModel(BaseChatModel):
41
34
  API keys can be provided directly or loaded from the `GOOGLE_API_KEY`
42
35
  environment variable.
43
36
 
44
- **Langfuse Integration:**
45
- Langfuse tracing is automatically enabled when environment variables are set:
37
+ **Tracing Integration:**
38
+ Tracing (e.g., with Langfuse) is automatically enabled when the respective
39
+ environment variables are set. For Langfuse:
46
40
  - LANGFUSE_PUBLIC_KEY: Your Langfuse public key
47
41
  - LANGFUSE_SECRET_KEY: Your Langfuse secret key
48
42
  - LANGFUSE_HOST: Langfuse host URL (optional, defaults to https://cloud.langfuse.com)
@@ -58,7 +52,7 @@ class GeminiChatModel(BaseChatModel):
58
52
  top_p (Optional[float]): The top-p (nucleus) sampling parameter.
59
53
  top_k (Optional[int]): The top-k sampling parameter.
60
54
  logger (Optional[logging.Logger]): An optional logger instance.
61
- enable_langfuse (Optional[bool]): Enable/disable Langfuse tracing (auto-detect if None).
55
+ enable_tracing (Optional[bool]): Enable/disable all tracing (auto-detect if None).
62
56
 
63
57
  Example:
64
58
  .. code-block:: python
@@ -83,7 +77,7 @@ class GeminiChatModel(BaseChatModel):
83
77
  response = model.invoke("Hello, how are you?")
84
78
  print("Text response:", response.content)
85
79
 
86
- # --- Langfuse tracing with session/user tracking ---
80
+ # --- Tracing with session/user tracking (for Langfuse) ---
87
81
  response = model.invoke(
88
82
  "What is AI?",
89
83
  config={
@@ -189,25 +183,25 @@ class GeminiChatModel(BaseChatModel):
189
183
  # --- Disable Langfuse for specific calls ---
190
184
  response = model.invoke(
191
185
  "Hello without tracing",
192
- config={"metadata": {"langfuse_disabled": True}}
186
+ config={"metadata": {"tracing_disabled": True}}
193
187
  )
194
188
  """
195
189
 
196
190
  # Model configuration
197
- model_name: str = Field(default="gemini-2.0-flash", description="The Google model name to use")
191
+ model_name: str = Field(default="gemini-2.5-flash", description="The Google model name to use")
198
192
  google_api_key: Optional[SecretStr] = Field(default=None, description="Google API key")
199
193
  temperature: Optional[float] = Field(default=0.7, description="Sampling temperature")
200
194
  max_tokens: Optional[int] = Field(default=None, description="Maximum tokens to generate")
201
195
  top_p: Optional[float] = Field(default=None, description="Top-p sampling parameter")
202
196
  top_k: Optional[int] = Field(default=None, description="Top-k sampling parameter")
203
- logger: Optional[logging.Logger] = Field(default=None, description="Optional logger instance")
204
197
 
205
- # Langfuse configuration
206
- enable_langfuse: Optional[bool] = Field(default=None, description="Enable Langfuse tracing (auto-detect if None)")
198
+ # Configuration for tracing and logging
199
+ logger: Optional[logging.Logger] = Field(default=None, description="Optional logger instance", exclude=True)
200
+ enable_tracing: Optional[bool] = Field(default=None, description="Enable tracing (auto-detect if None)")
207
201
 
208
- # Internal clients
202
+ # Internal clients and managers
209
203
  _client: Optional[genai.Client] = None
210
- _langfuse_handler: Optional[LangfuseCallbackHandler] = None
204
+ _tracing_manager: Optional[TracingManager] = None
211
205
 
212
206
  def __init__(self, **kwargs):
213
207
  super().__init__(**kwargs)
@@ -215,7 +209,7 @@ class GeminiChatModel(BaseChatModel):
215
209
  # Initialize logger
216
210
  if self.logger is None:
217
211
  self.logger = logging.getLogger(f"{self.__class__.__module__}.{self.__class__.__name__}")
218
- if not self.logger.handlers: # and not getattr(self.logger, 'propagate', True):
212
+ if not self.logger.handlers:
219
213
  self.logger.addHandler(logging.StreamHandler())
220
214
  self.logger.setLevel(logging.INFO)
221
215
 
@@ -227,143 +221,40 @@ class GeminiChatModel(BaseChatModel):
227
221
 
228
222
  # Initialize the Google GenAI client
229
223
  if self.google_api_key:
230
- self._client = genai.Client(
231
- api_key=self.google_api_key.get_secret_value()
232
- )
224
+ self._client = genai.Client(api_key=self.google_api_key.get_secret_value())
233
225
  self.logger.info(f"Initialized GeminiChatModel with model: {self.model_name}")
234
226
  else:
235
227
  error_msg = "Google API key is required. Set GOOGLE_API_KEY environment variable or pass google_api_key parameter."
236
228
  self.logger.error(error_msg)
237
229
  raise ValueError(error_msg)
238
230
 
239
- # Initialize Langfuse handler
240
- self._initialize_langfuse()
241
-
242
- def _initialize_langfuse(self):
243
- """Initialize Langfuse handler if enabled and available."""
244
- if not LANGFUSE_AVAILABLE:
245
- if self.enable_langfuse is True:
246
- self.logger.warning("Langfuse is not installed. Install with: pip install langfuse")
247
- return
248
-
249
- # Auto-detect if Langfuse should be enabled
250
- if self.enable_langfuse is None:
251
- # Check if Langfuse environment variables are set
252
- langfuse_env_vars = ["LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
253
- self.enable_langfuse = any(os.getenv(var) for var in langfuse_env_vars)
254
-
255
- if not self.enable_langfuse:
256
- return
257
-
258
- try:
259
- # Initialize Langfuse handler with minimal config
260
- # Session/user tracking will be handled per call via metadata
261
- self._langfuse_handler = LangfuseCallbackHandler()
262
- self.logger.info("Langfuse tracing enabled for GeminiChatModel")
263
-
264
- except Exception as e:
265
- self.logger.warning(f"Failed to initialize Langfuse: {e}")
266
- self._langfuse_handler = None
267
-
268
- def _should_add_langfuse_callback(self, run_manager: Optional[CallbackManagerForLLMRun] = None) -> bool:
269
- """Check if Langfuse callback should be added."""
270
- if not self._langfuse_handler:
271
- return False
272
-
273
- # Check if Langfuse is already in the callback manager
274
- if run_manager and hasattr(run_manager, 'handlers'):
275
- has_langfuse = any(
276
- isinstance(handler, LangfuseCallbackHandler)
277
- for handler in run_manager.handlers
278
- )
279
- if has_langfuse:
280
- return False
281
-
282
- return True
231
+ self._tracing_manager = TracingManager(self)
232
+
233
+ def get_model_identifier(self) -> str:
234
+ """Return a string identifying this model for tracing and logging."""
235
+ return f"{self.__class__.__name__} (model='{self.model_name}')"
283
236
 
284
237
  def invoke(self, input, config=None, **kwargs):
285
- """Override invoke to add Langfuse callback automatically."""
286
- if config is None:
287
- config = {}
288
-
289
- # Add Langfuse callback if enabled and not already present
290
- if self._langfuse_handler:
291
- callbacks = config.get("callbacks", [])
292
-
293
- # Check if Langfuse callback is already present
294
- has_langfuse = any(
295
- isinstance(callback, LangfuseCallbackHandler)
296
- for callback in callbacks
297
- )
298
-
299
- if not has_langfuse:
300
- callbacks = callbacks + [self._langfuse_handler]
301
- config = {**config, "callbacks": callbacks}
302
-
238
+ """Override invoke to add tracing callbacks automatically."""
239
+ config = self._tracing_manager.add_callbacks_to_config(config)
303
240
  return super().invoke(input, config=config, **kwargs)
304
241
 
305
242
  async def ainvoke(self, input, config=None, **kwargs):
306
- """Override ainvoke to add Langfuse callback automatically."""
307
- if config is None:
308
- config = {}
309
-
310
- # Add Langfuse callback if enabled and not already present
311
- if self._langfuse_handler:
312
- callbacks = config.get("callbacks", [])
313
-
314
- # Check if Langfuse callback is already present
315
- has_langfuse = any(
316
- isinstance(callback, LangfuseCallbackHandler)
317
- for callback in callbacks
318
- )
319
-
320
- if not has_langfuse:
321
- callbacks = callbacks + [self._langfuse_handler]
322
- config = {**config, "callbacks": callbacks}
323
-
243
+ """Override ainvoke to add tracing callbacks automatically."""
244
+ config = self._tracing_manager.add_callbacks_to_config(config)
324
245
  return await super().ainvoke(input, config=config, **kwargs)
325
246
 
326
247
  def stream(self, input, config=None, **kwargs):
327
- """Override stream to add Langfuse callback automatically."""
328
- if config is None:
329
- config = {}
330
-
331
- # Add Langfuse callback if enabled and not already present
332
- if self._langfuse_handler:
333
- callbacks = config.get("callbacks", [])
334
-
335
- # Check if Langfuse callback is already present
336
- has_langfuse = any(
337
- isinstance(callback, LangfuseCallbackHandler)
338
- for callback in callbacks
339
- )
340
-
341
- if not has_langfuse:
342
- callbacks = callbacks + [self._langfuse_handler]
343
- config = {**config, "callbacks": callbacks}
344
-
248
+ """Override stream to add tracing callbacks automatically."""
249
+ config = self._tracing_manager.add_callbacks_to_config(config)
345
250
  return super().stream(input, config=config, **kwargs)
346
251
 
347
252
  async def astream(self, input, config=None, **kwargs):
348
- """Override astream to add Langfuse callback automatically."""
349
- if config is None:
350
- config = {}
351
-
352
- # Add Langfuse callback if enabled and not already present
353
- if self._langfuse_handler:
354
- callbacks = config.get("callbacks", [])
355
-
356
- # Check if Langfuse callback is already present
357
- has_langfuse = any(
358
- isinstance(callback, LangfuseCallbackHandler)
359
- for callback in callbacks
360
- )
361
-
362
- if not has_langfuse:
363
- callbacks = callbacks + [self._langfuse_handler]
364
- config = {**config, "callbacks": callbacks}
365
-
366
- return super().astream(input, config=config, **kwargs)
253
+ """Override astream to add tracing callbacks automatically."""
254
+ config = self._tracing_manager.add_callbacks_to_config(config)
255
+ # We must call an async generator,
256
+ async for chunk in super().astream(input, config=config, **kwargs):
257
+ yield chunk
367
258
 
368
259
  @property
369
260
  def _llm_type(self) -> str:
@@ -647,6 +538,59 @@ class GeminiChatModel(BaseChatModel):
647
538
  }
648
539
  return part_dict
649
540
 
541
+ def _map_usage_metadata(self, usage_metadata: Any) -> Optional[dict]:
542
+ """
543
+ Maps Google's rich usage metadata to LangChain's expected format,
544
+ including detailed breakdowns by modality.
545
+ """
546
+ if not usage_metadata:
547
+ return None
548
+
549
+ # --- Basic Token Counts ---
550
+ input_tokens = getattr(usage_metadata, "prompt_token_count", 0)
551
+ output_tokens = getattr(usage_metadata, "candidates_token_count", 0)
552
+ thoughts_tokens = getattr(usage_metadata, "thoughts_token_count", 0)
553
+ total_tokens = getattr(usage_metadata, "total_token_count", 0)
554
+
555
+ # In some cases, total_tokens is not provided, so we calculate it
556
+ if total_tokens == 0 and (input_tokens > 0 or output_tokens > 0):
557
+ total_tokens = input_tokens + output_tokens
558
+
559
+ # --- Detailed Token Counts (The Fix) ---
560
+ input_details = {}
561
+ # The `prompt_tokens_details` is a list of ModalityTokenCount objects.
562
+ # We convert it to a dictionary.
563
+ if prompt_details_list := getattr(usage_metadata, "prompt_tokens_details", None):
564
+ for detail in prompt_details_list:
565
+ # Convert enum e.g., <MediaModality.TEXT: 'TEXT'> to "text"
566
+ modality_key = detail.modality.name.lower()
567
+ input_details[modality_key] = detail.token_count
568
+
569
+ # Add cached tokens to input details if present
570
+ #if cached_tokens := getattr(usage_metadata, "cached_content_token_count", 0):
571
+ # input_details["cached_content"] = cached_tokens
572
+
573
+ output_details = {}
574
+ # The `candidates_tokens_details` is also a list, so we convert it.
575
+ if candidate_details_list := getattr(usage_metadata, "candidates_tokens_details", None):
576
+ for detail in candidate_details_list:
577
+ modality_key = detail.modality.name.lower()
578
+ output_details[modality_key] = detail.token_count
579
+
580
+ # --- Construct the final dictionary ---
581
+ final_metadata = {
582
+ "input_tokens": input_tokens,
583
+ "output_tokens": output_tokens,
584
+ "thoughts_tokens": thoughts_tokens,
585
+ "total_tokens": total_tokens,
586
+ }
587
+ if input_details:
588
+ final_metadata["input_token_details"] = input_details
589
+ if output_details:
590
+ final_metadata["output_token_details"] = output_details
591
+
592
+ return final_metadata
593
+
650
594
  def _extract_usage_metadata(self, response) -> Optional[Any]:
651
595
  """Extracts the raw usage_metadata object from a Google GenAI response."""
652
596
  if hasattr(response, 'usage_metadata') and response.usage_metadata:
@@ -672,11 +616,10 @@ class GeminiChatModel(BaseChatModel):
672
616
  generated_text = response.text
673
617
  finish_reason = response.candidates[0].finish_reason.name if response.candidates else None
674
618
 
675
- # Extract usage metadata for token tracking
619
+ # Use the new mapping function here for invoke calls
676
620
  usage_metadata = self._extract_usage_metadata(response)
677
- usage_dict = usage_metadata.dict() if usage_metadata and hasattr(usage_metadata, "dict") else {}
621
+ usage_dict = self._map_usage_metadata(usage_metadata) or {}
678
622
 
679
- # Create AIMessage with usage information in response_metadata
680
623
  message = AIMessage(
681
624
  content=generated_text,
682
625
  response_metadata={
@@ -686,11 +629,9 @@ class GeminiChatModel(BaseChatModel):
686
629
  }
687
630
  )
688
631
 
689
- # For non-streaming, we include the usage dict in generation_info.
690
- # This is another field that callback handlers like Langfuse might inspect.
691
632
  generation = ChatGeneration(
692
633
  message=message,
693
- generation_info=usage_dict if usage_dict else None
634
+ generation_info={"token_usage": usage_dict} if usage_dict else None
694
635
  )
695
636
 
696
637
  # We also construct the llm_output dictionary in the format expected
@@ -786,31 +727,19 @@ class GeminiChatModel(BaseChatModel):
786
727
 
787
728
  final_usage_metadata = None
788
729
  for chunk_response in stream:
789
- # The usage metadata is on the chunk response itself. We update
790
- # our variable on each chunk that has it to ensure we get the
791
- # final, cumulative count at the end of the stream.
792
730
  if chunk_response.usage_metadata:
793
731
  final_usage_metadata = self._extract_usage_metadata(chunk_response)
794
732
 
795
- if text_content := chunk_response.text:
796
- chunk = self._create_chat_generation_chunk(chunk_response)
797
- if run_manager:
798
- run_manager.on_llm_new_token(text_content, chunk=chunk)
799
- yield chunk
733
+ if chunk_response.text:
734
+ yield self._create_chat_generation_chunk(chunk_response)
800
735
 
801
- # After the stream is exhausted, we yield a final, empty chunk
802
- # containing the full usage details. LangChain merges this into the
803
- # final result, making it available to callback handlers.
736
+ # **FIX:** Yield a final chunk with the mapped usage data
804
737
  if final_usage_metadata:
805
- usage_dict = final_usage_metadata.dict() if hasattr(final_usage_metadata, "dict") else {}
806
- final_generation_info = {
807
- "token_usage": usage_dict,
808
- "model_name": self.model_name
809
- }
810
- yield ChatGenerationChunk(
811
- message=AIMessageChunk(content=""),
812
- generation_info=final_generation_info
813
- )
738
+ lc_usage_metadata = self._map_usage_metadata(final_usage_metadata)
739
+ if lc_usage_metadata:
740
+ yield ChatGenerationChunk(
741
+ message=AIMessageChunk(content="", usage_metadata=lc_usage_metadata)
742
+ )
814
743
 
815
744
  except Exception as e:
816
745
  self.logger.error(f"Error streaming content: {e}", exc_info=True)
@@ -839,31 +768,19 @@ class GeminiChatModel(BaseChatModel):
839
768
 
840
769
  final_usage_metadata = None
841
770
  async for chunk_response in stream:
842
- # The usage metadata is on the chunk response itself. We update
843
- # our variable on each chunk that has it to ensure we get the
844
- # final, cumulative count at the end of the stream.
845
771
  if chunk_response.usage_metadata:
846
772
  final_usage_metadata = self._extract_usage_metadata(chunk_response)
847
773
 
848
- if text_content := chunk_response.text:
849
- chunk = self._create_chat_generation_chunk(chunk_response)
850
- if run_manager:
851
- await run_manager.on_llm_new_token(text_content, chunk=chunk)
852
- yield chunk
774
+ if chunk_response.text:
775
+ yield self._create_chat_generation_chunk(chunk_response)
853
776
 
854
- # After the stream is exhausted, we yield a final, empty chunk
855
- # containing the full usage details. LangChain merges this into the
856
- # final result, making it available to callback handlers.
777
+ # **FIX:** Yield a final chunk with the mapped usage data
857
778
  if final_usage_metadata:
858
- usage_dict = final_usage_metadata.dict() if hasattr(final_usage_metadata, "dict") else {}
859
- final_generation_info = {
860
- "token_usage": usage_dict,
861
- "model_name": self.model_name
862
- }
863
- yield ChatGenerationChunk(
864
- message=AIMessageChunk(content=""),
865
- generation_info=final_generation_info
866
- )
779
+ lc_usage_metadata = self._map_usage_metadata(final_usage_metadata)
780
+ if lc_usage_metadata:
781
+ yield ChatGenerationChunk(
782
+ message=AIMessageChunk(content="", usage_metadata=lc_usage_metadata)
783
+ )
867
784
 
868
785
  except Exception as e:
869
786
  self.logger.error(f"Error during async streaming: {e}", exc_info=True)
@@ -0,0 +1,148 @@
1
+ # File: crewplus/services/tracing_manager.py
2
+
3
+ from typing import Any, Optional, List, Protocol
4
+ import os
5
+ import logging
6
+
7
+ # Langfuse imports with graceful fallback. This allows the application to run
8
+ # even if the langfuse library is not installed.
9
+ try:
10
+ from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler
11
+ LANGFUSE_AVAILABLE = True
12
+ except ImportError:
13
+ LANGFUSE_AVAILABLE = False
14
+ LangfuseCallbackHandler = None
15
+
16
+ class TracingContext(Protocol):
17
+ """
18
+ A protocol that defines a formal contract for a model to be "traceable."
19
+
20
+ This protocol ensures that any class using the TracingManager provides the
21
+ necessary attributes and methods for the manager to function correctly. By
22
+ using a Protocol, we leverage Python's static analysis tools (like mypy)
23
+ to enforce this contract, preventing runtime errors and making the system
24
+ more robust and self-documenting.
25
+
26
+ It allows the TracingManager to be completely decoupled from any specific
27
+ model implementation, promoting clean, compositional design.
28
+
29
+ A class that implements this protocol must provide:
30
+ - A `logger` attribute for logging.
31
+ - An `enable_tracing` attribute to control tracing.
32
+ - A `get_model_identifier` method to describe itself for logging purposes.
33
+ """
34
+ logger: logging.Logger
35
+ enable_tracing: Optional[bool]
36
+
37
+ def get_model_identifier(self) -> str:
38
+ """
39
+ Return a string that uniquely identifies the model instance for logging.
40
+
41
+ Example:
42
+ "GeminiChatModel (model='gemini-1.5-flash')"
43
+
44
+ Note:
45
+ The '...' (Ellipsis) is the standard way in a Protocol to indicate
46
+ that this method must be implemented by any class that conforms to
47
+ this protocol, but has no implementation in the protocol itself.
48
+ """
49
+ ...
50
+
51
+ class TracingManager:
52
+ """
53
+ Manages the initialization and injection of tracing handlers for chat models.
54
+
55
+ This class uses a composition-based approach, taking a context object that
56
+ fulfills the TracingContext protocol. This design is highly extensible,
57
+ allowing new tracing providers (e.g., Helicone, OpenTelemetry) to be added
58
+ with minimal, isolated changes.
59
+ """
60
+
61
+ def __init__(self, context: TracingContext):
62
+ """
63
+ Args:
64
+ context: An object (typically a chat model instance) that conforms
65
+ to the TracingContext protocol.
66
+ """
67
+ self.context = context
68
+ self._handlers: List[Any] = []
69
+ self._initialize_handlers()
70
+
71
+ def _initialize_handlers(self):
72
+ """
73
+ Initializes all supported tracing handlers. This is the central point
74
+ for adding new observability tools.
75
+ """
76
+ self._handlers = []
77
+ self._initialize_langfuse()
78
+ # To add a new handler (e.g., Helicone), you would add a call to
79
+ # self._initialize_helicone() here.
80
+
81
+ def _initialize_langfuse(self):
82
+ """Initializes the Langfuse handler if it's available and enabled."""
83
+ if not LANGFUSE_AVAILABLE:
84
+ if self.context.enable_tracing is True:
85
+ self.context.logger.warning("Langfuse is not installed; tracing will be disabled. Install with: pip install langfuse")
86
+ return
87
+
88
+ # Determine if Langfuse should be enabled via an explicit flag or
89
+ # by detecting its environment variables.
90
+ enable_langfuse = self.context.enable_tracing
91
+ if enable_langfuse is None: # Auto-detect if not explicitly set
92
+ langfuse_env_vars = ["LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
93
+ enable_langfuse = any(os.getenv(var) for var in langfuse_env_vars)
94
+
95
+ if enable_langfuse:
96
+ try:
97
+ handler = LangfuseCallbackHandler()
98
+ self._handlers.append(handler)
99
+ self.context.logger.info(f"Langfuse tracing enabled for {self.context.get_model_identifier()}")
100
+ except Exception as e:
101
+ self.context.logger.warning(f"Failed to initialize Langfuse: {e}")
102
+
103
+ def add_callbacks_to_config(self, config: Optional[dict]) -> dict:
104
+ """
105
+ Adds all registered tracing handlers to the request configuration.
106
+
107
+ This method is robust and handles three scenarios for the 'callbacks' key:
108
+ 1. A list of callbacks.
109
+ 2. A LangChain CallbackManager instance.
110
+ 3. None or a missing key.
111
+
112
+ Args:
113
+ config: The request configuration dictionary from a LangChain call.
114
+
115
+ Returns:
116
+ The updated configuration dictionary with tracing callbacks added.
117
+ """
118
+ if config is None:
119
+ config = {}
120
+
121
+ # Respect a global disable flag for this specific call.
122
+ if not self._handlers or config.get("metadata", {}).get("tracing_disabled"):
123
+ return config
124
+
125
+ callbacks = config.get("callbacks")
126
+
127
+ # Case 1: The 'callbacks' key holds a CallbackManager instance
128
+ if hasattr(callbacks, 'add_handler') and hasattr(callbacks, 'handlers'):
129
+ for handler in self._handlers:
130
+ if not any(isinstance(cb, type(handler)) for cb in callbacks.handlers):
131
+ callbacks.add_handler(handler, inherit=True)
132
+ return config # Return the original, now-mutated config
133
+
134
+ # Case 2: The 'callbacks' key holds a list or is None
135
+ current_callbacks = callbacks or []
136
+ new_callbacks = list(current_callbacks)
137
+
138
+ for handler in self._handlers:
139
+ if not any(isinstance(cb, type(handler)) for cb in new_callbacks):
140
+ new_callbacks.append(handler)
141
+
142
+ if len(new_callbacks) > len(current_callbacks):
143
+ # Create a new dictionary with the updated callbacks list.
144
+ # This is a safe operation that overwrites the existing 'callbacks'
145
+ # key and avoids mutating the original config object.
146
+ return {**config, "callbacks": new_callbacks}
147
+
148
+ return config
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: crewplus
3
- Version: 0.2.25
3
+ Version: 0.2.27
4
4
  Summary: Base services for CrewPlus AI applications
5
5
  Author-Email: Tim Liu <tim@opsmateai.com>
6
6
  License: MIT
@@ -1,13 +1,14 @@
1
- crewplus-0.2.25.dist-info/METADATA,sha256=NynftFjnRM1sFSQfe8PUWMRDuo1YOWyyyjgODaegtBs,4991
2
- crewplus-0.2.25.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
3
- crewplus-0.2.25.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
4
- crewplus-0.2.25.dist-info/licenses/LICENSE,sha256=2_NHSHRTKB_cTcT_GXgcenOCtIZku8j343mOgAguTfc,1087
1
+ crewplus-0.2.27.dist-info/METADATA,sha256=bYZxj9fhuzOqc7YDs-7Ef5msU_-ZXktY9ruuAsJjevM,4991
2
+ crewplus-0.2.27.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
3
+ crewplus-0.2.27.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
4
+ crewplus-0.2.27.dist-info/licenses/LICENSE,sha256=2_NHSHRTKB_cTcT_GXgcenOCtIZku8j343mOgAguTfc,1087
5
5
  crewplus/__init__.py,sha256=m46HkZL1Y4toD619NL47Sn2Qe084WFFSFD7e6VoYKZc,284
6
6
  crewplus/services/__init__.py,sha256=zUM4ZwUfGMBDx-j7Wehf_KC5yYXPTK8BK_oeO5veIXQ,398
7
- crewplus/services/azure_chat_model.py,sha256=xPuIsQpLV5Y3Ntwe3eqvquhBjh35g65VlF22AWJdEcU,8648
8
- crewplus/services/gemini_chat_model.py,sha256=HMDt7TKlLpQ43ZPxY9omG64EGFkP846BXT_SfyBeM0I,38415
7
+ crewplus/services/azure_chat_model.py,sha256=WMSf4BDO8UcP7ZASNGRJxdTEnuWBmCRSY_4yx_VMbok,5499
8
+ crewplus/services/gemini_chat_model.py,sha256=oVLL07VEjwrHOeP56YSPLnldbfjvTVRrsTI6xcPNt1E,35224
9
9
  crewplus/services/init_services.py,sha256=U91zoMNJlOEKyldarNnATjeZDT2V-0CrXPAwI64hZkw,758
10
10
  crewplus/services/model_load_balancer.py,sha256=HH_eHxFfxgarPWFGpANg7dgShnWca4q46Jz0b1vJ4Sw,9405
11
+ crewplus/services/tracing_manager.py,sha256=aCU9N4Jvh8pDD3h8kWX4O-Ax8xwdLHnQ4wJ3sf-vLwA,6289
11
12
  crewplus/utils/__init__.py,sha256=2Gk1n5srFJQnFfBuYTxktdtKOVZyNrFcNaZKhXk35Pw,142
12
13
  crewplus/utils/schema_action.py,sha256=GDaBoVFQD1rXqrLVSMTfXYW1xcUu7eDcHsn57XBSnIg,422
13
14
  crewplus/utils/schema_document_updater.py,sha256=frvffxn2vbi71fHFPoGb9hq7gH2azmmdq17p-Fumnvg,7322
@@ -15,8 +16,8 @@ crewplus/vectorstores/milvus/__init__.py,sha256=egGncAdjlXG6ekTQvKMKqhvKBifrUrPl
15
16
  crewplus/vectorstores/milvus/milvus_schema_manager.py,sha256=2IZT61LVui21Pt5Z3y8YYS2dYcwzkgUKxMq2NA0-lQE,9222
16
17
  crewplus/vectorstores/milvus/schema_milvus.py,sha256=IvKdUCH451HJ-F3TUR5jDjqwQlQs4SEXAQ_th4JAnfc,12117
17
18
  crewplus/vectorstores/milvus/vdb_service.py,sha256=wCltxZc0aD27iTu7wjveHqQWPEF2VyO4B2WGQCheeVs,21118
18
- docs/GeminiChatModel.md,sha256=_IQyup3ofAa2HxfSurO1GYUEezTHYYt5Q1khYNVThGM,8040
19
+ docs/GeminiChatModel.md,sha256=zZYyl6RmjZTUsKxxMiC9O4yV70MC4TD-IGUmWhIDBKA,8677
19
20
  docs/ModelLoadBalancer.md,sha256=aGHES1dcXPz4c7Y8kB5-vsCNJjriH2SWmjBkSGoYKiI,4398
20
21
  docs/VDBService.md,sha256=Dw286Rrf_fsi13jyD3Bo4Sy7nZ_G7tYm7d8MZ2j9hxk,9375
21
22
  docs/index.md,sha256=3tlc15uR8lzFNM5WjdoZLw0Y9o1P1gwgbEnOdIBspqc,1643
22
- crewplus-0.2.25.dist-info/RECORD,,
23
+ crewplus-0.2.27.dist-info/RECORD,,
docs/GeminiChatModel.md CHANGED
@@ -61,9 +61,22 @@ response = model.invoke("Hello, how are you?")
61
61
  print(response.content)
62
62
 
63
63
  # Using stream for a chunked response
64
- print("\\n--- Streaming Response ---")
65
- for chunk in model.stream("Tell me a short story."):
64
+ print("\n--- Streaming Response ---")
65
+ for chunk in model.stream("Tell me a short story about a brave robot."):
66
66
  print(chunk.content, end="", flush=True)
67
+
68
+ # Using astream for an asynchronous chunked response
69
+ import asyncio
70
+
71
+ async def main():
72
+ print("\n--- Async Streaming Response ---")
73
+ async for chunk in model.astream("Tell me a short story about a brave robot."):
74
+ print(chunk.content, end="", flush=True)
75
+
76
+ # To run the async function in a Jupyter Notebook or a script:
77
+ # await main()
78
+ # Or, if not in an async context:
79
+ # asyncio.run(main())
67
80
  ```
68
81
 
69
82
  ## 5. Image Understanding
@@ -120,9 +133,17 @@ try:
120
133
  print("Image response (base64):", image_response.content)
121
134
  except FileNotFoundError:
122
135
  print(f"Image file not found at {image_path}, skipping base64 example.")
136
+
137
+ ### Example 3: Streaming a Multimodal Response
138
+
139
+ Streaming also works with complex, multimodal inputs. This is useful for getting faster time-to-first-token while the model processes all the data.
140
+
141
+ ```python
142
+ # The url_message is from the previous example
143
+ print("\n--- Streaming Multimodal Response ---")
144
+ for chunk in model.stream([url_message]):
145
+ print(chunk.content, end="", flush=True)
123
146
  ```
124
- > **Sample Output:**
125
- > This image is a movie still from the 2017 Japanese thriller "22 Year Old's Confession: I am the Murderer"... The four women in the photo are the victims of a serial killer...
126
147
 
127
148
  ## 6. Video Understanding
128
149