crewplus 0.2.15__tar.gz → 0.2.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crewplus might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: crewplus
3
- Version: 0.2.15
3
+ Version: 0.2.19
4
4
  Summary: Base services for CrewPlus AI applications
5
5
  Author-Email: Tim Liu <tim@opsmateai.com>
6
6
  License: MIT
@@ -16,6 +16,7 @@ Requires-Dist: mkdocs<2.0.0,>=1.6.1
16
16
  Requires-Dist: mkdocs-material<10.0.0,>=9.6.14
17
17
  Requires-Dist: mkdocstrings-python<2.0.0,>=1.16.12
18
18
  Requires-Dist: langchain-milvus<0.3.0,>=0.2.1
19
+ Requires-Dist: langfuse<4.0.0,>=3.1.3
19
20
  Description-Content-Type: text/markdown
20
21
 
21
22
  # CrewPlus
@@ -22,6 +22,14 @@ from langchain_core.callbacks import (
22
22
  from pydantic import Field, SecretStr
23
23
  from langchain_core.utils import convert_to_secret_str
24
24
 
25
+ # Langfuse imports with graceful fallback
26
+ try:
27
+ from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler
28
+ LANGFUSE_AVAILABLE = True
29
+ except ImportError:
30
+ LANGFUSE_AVAILABLE = False
31
+ LangfuseCallbackHandler = None
32
+
25
33
  class GeminiChatModel(BaseChatModel):
26
34
  """Custom chat model for Google Gemini, supporting text, image, and video.
27
35
 
@@ -33,6 +41,15 @@ class GeminiChatModel(BaseChatModel):
33
41
  API keys can be provided directly or loaded from the `GOOGLE_API_KEY`
34
42
  environment variable.
35
43
 
44
+ **Langfuse Integration:**
45
+ Langfuse tracing is automatically enabled when environment variables are set:
46
+ - LANGFUSE_PUBLIC_KEY: Your Langfuse public key
47
+ - LANGFUSE_SECRET_KEY: Your Langfuse secret key
48
+ - LANGFUSE_HOST: Langfuse host URL (optional, defaults to https://cloud.langfuse.com)
49
+
50
+ You can also configure it explicitly or disable it. Session and user tracking
51
+ can be set per call via metadata.
52
+
36
53
  Attributes:
37
54
  model_name (str): The Google model name to use (e.g., "gemini-1.5-flash").
38
55
  google_api_key (Optional[SecretStr]): Your Google API key.
@@ -41,10 +58,18 @@ class GeminiChatModel(BaseChatModel):
41
58
  top_p (Optional[float]): The top-p (nucleus) sampling parameter.
42
59
  top_k (Optional[int]): The top-k sampling parameter.
43
60
  logger (Optional[logging.Logger]): An optional logger instance.
61
+ enable_langfuse (Optional[bool]): Enable/disable Langfuse tracing (auto-detect if None).
44
62
 
45
63
  Example:
46
64
  .. code-block:: python
47
65
 
66
+ # Set Langfuse environment variables (optional)
67
+ import os
68
+ os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-..."
69
+ os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-..."
70
+ os.environ["LANGFUSE_HOST"] = "https://cloud.langfuse.com" # EU region or self-hosted
71
+ # os.environ["LANGFUSE_HOST"] = "https://us.cloud.langfuse.com" # US region
72
+
48
73
  from crewplus.services import GeminiChatModel
49
74
  from langchain_core.messages import HumanMessage
50
75
  import base64
@@ -54,10 +79,21 @@ class GeminiChatModel(BaseChatModel):
54
79
  logger = logging.getLogger("my_app.gemini")
55
80
  model = GeminiChatModel(model_name="gemini-2.0-flash", logger=logger)
56
81
 
57
- # --- Text-only usage ---
82
+ # --- Text-only usage (automatically traced if env vars set) ---
58
83
  response = model.invoke("Hello, how are you?")
59
84
  print("Text response:", response.content)
60
85
 
86
+ # --- Langfuse tracing with session/user tracking ---
87
+ response = model.invoke(
88
+ "What is AI?",
89
+ config={
90
+ "metadata": {
91
+ "langfuse_session_id": "chat-session-123",
92
+ "langfuse_user_id": "user-456"
93
+ }
94
+ }
95
+ )
96
+
61
97
  # --- Image processing with base64 data URI ---
62
98
  # Replace with a path to your image
63
99
  image_path = "path/to/your/image.jpg"
@@ -138,6 +174,23 @@ class GeminiChatModel(BaseChatModel):
138
174
  print("Streaming response:")
139
175
  for chunk in model.stream([url_message]):
140
176
  print(chunk.content, end="", flush=True)
177
+
178
+ # --- Traditional Langfuse callback approach still works ---
179
+ from langfuse.langchain import CallbackHandler
180
+ langfuse_handler = CallbackHandler(
181
+ session_id="session-123",
182
+ user_id="user-456"
183
+ )
184
+ response = model.invoke(
185
+ "Hello with manual callback",
186
+ config={"callbacks": [langfuse_handler]}
187
+ )
188
+
189
+ # --- Disable Langfuse for specific calls ---
190
+ response = model.invoke(
191
+ "Hello without tracing",
192
+ config={"metadata": {"langfuse_disabled": True}}
193
+ )
141
194
  """
142
195
 
143
196
  # Model configuration
@@ -149,8 +202,12 @@ class GeminiChatModel(BaseChatModel):
149
202
  top_k: Optional[int] = Field(default=None, description="Top-k sampling parameter")
150
203
  logger: Optional[logging.Logger] = Field(default=None, description="Optional logger instance")
151
204
 
152
- # Internal client
205
+ # Langfuse configuration
206
+ enable_langfuse: Optional[bool] = Field(default=None, description="Enable Langfuse tracing (auto-detect if None)")
207
+
208
+ # Internal clients
153
209
  _client: Optional[genai.Client] = None
210
+ _langfuse_handler: Optional[LangfuseCallbackHandler] = None
154
211
 
155
212
  def __init__(self, **kwargs):
156
213
  super().__init__(**kwargs)
@@ -178,6 +235,135 @@ class GeminiChatModel(BaseChatModel):
178
235
  error_msg = "Google API key is required. Set GOOGLE_API_KEY environment variable or pass google_api_key parameter."
179
236
  self.logger.error(error_msg)
180
237
  raise ValueError(error_msg)
238
+
239
+ # Initialize Langfuse handler
240
+ self._initialize_langfuse()
241
+
242
+ def _initialize_langfuse(self):
243
+ """Initialize Langfuse handler if enabled and available."""
244
+ if not LANGFUSE_AVAILABLE:
245
+ if self.enable_langfuse is True:
246
+ self.logger.warning("Langfuse is not installed. Install with: pip install langfuse")
247
+ return
248
+
249
+ # Auto-detect if Langfuse should be enabled
250
+ if self.enable_langfuse is None:
251
+ # Check if Langfuse environment variables are set
252
+ langfuse_env_vars = ["LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
253
+ self.enable_langfuse = any(os.getenv(var) for var in langfuse_env_vars)
254
+
255
+ if not self.enable_langfuse:
256
+ return
257
+
258
+ try:
259
+ # Initialize Langfuse handler with minimal config
260
+ # Session/user tracking will be handled per call via metadata
261
+ self._langfuse_handler = LangfuseCallbackHandler()
262
+ self.logger.info("Langfuse tracing enabled for GeminiChatModel")
263
+
264
+ except Exception as e:
265
+ self.logger.warning(f"Failed to initialize Langfuse: {e}")
266
+ self._langfuse_handler = None
267
+
268
+ def _should_add_langfuse_callback(self, run_manager: Optional[CallbackManagerForLLMRun] = None) -> bool:
269
+ """Check if Langfuse callback should be added."""
270
+ if not self._langfuse_handler:
271
+ return False
272
+
273
+ # Check if Langfuse is already in the callback manager
274
+ if run_manager and hasattr(run_manager, 'handlers'):
275
+ has_langfuse = any(
276
+ isinstance(handler, LangfuseCallbackHandler)
277
+ for handler in run_manager.handlers
278
+ )
279
+ if has_langfuse:
280
+ return False
281
+
282
+ return True
283
+
284
+ def invoke(self, input, config=None, **kwargs):
285
+ """Override invoke to add Langfuse callback automatically."""
286
+ if config is None:
287
+ config = {}
288
+
289
+ # Add Langfuse callback if enabled and not already present
290
+ if self._langfuse_handler:
291
+ callbacks = config.get("callbacks", [])
292
+
293
+ # Check if Langfuse callback is already present
294
+ has_langfuse = any(
295
+ isinstance(callback, LangfuseCallbackHandler)
296
+ for callback in callbacks
297
+ )
298
+
299
+ if not has_langfuse:
300
+ callbacks = callbacks + [self._langfuse_handler]
301
+ config = {**config, "callbacks": callbacks}
302
+
303
+ return super().invoke(input, config=config, **kwargs)
304
+
305
+ async def ainvoke(self, input, config=None, **kwargs):
306
+ """Override ainvoke to add Langfuse callback automatically."""
307
+ if config is None:
308
+ config = {}
309
+
310
+ # Add Langfuse callback if enabled and not already present
311
+ if self._langfuse_handler:
312
+ callbacks = config.get("callbacks", [])
313
+
314
+ # Check if Langfuse callback is already present
315
+ has_langfuse = any(
316
+ isinstance(callback, LangfuseCallbackHandler)
317
+ for callback in callbacks
318
+ )
319
+
320
+ if not has_langfuse:
321
+ callbacks = callbacks + [self._langfuse_handler]
322
+ config = {**config, "callbacks": callbacks}
323
+
324
+ return await super().ainvoke(input, config=config, **kwargs)
325
+
326
+ def stream(self, input, config=None, **kwargs):
327
+ """Override stream to add Langfuse callback automatically."""
328
+ if config is None:
329
+ config = {}
330
+
331
+ # Add Langfuse callback if enabled and not already present
332
+ if self._langfuse_handler:
333
+ callbacks = config.get("callbacks", [])
334
+
335
+ # Check if Langfuse callback is already present
336
+ has_langfuse = any(
337
+ isinstance(callback, LangfuseCallbackHandler)
338
+ for callback in callbacks
339
+ )
340
+
341
+ if not has_langfuse:
342
+ callbacks = callbacks + [self._langfuse_handler]
343
+ config = {**config, "callbacks": callbacks}
344
+
345
+ return super().stream(input, config=config, **kwargs)
346
+
347
+ async def astream(self, input, config=None, **kwargs):
348
+ """Override astream to add Langfuse callback automatically."""
349
+ if config is None:
350
+ config = {}
351
+
352
+ # Add Langfuse callback if enabled and not already present
353
+ if self._langfuse_handler:
354
+ callbacks = config.get("callbacks", [])
355
+
356
+ # Check if Langfuse callback is already present
357
+ has_langfuse = any(
358
+ isinstance(callback, LangfuseCallbackHandler)
359
+ for callback in callbacks
360
+ )
361
+
362
+ if not has_langfuse:
363
+ callbacks = callbacks + [self._langfuse_handler]
364
+ config = {**config, "callbacks": callbacks}
365
+
366
+ return super().astream(input, config=config, **kwargs)
181
367
 
182
368
  @property
183
369
  def _llm_type(self) -> str:
@@ -461,6 +647,66 @@ class GeminiChatModel(BaseChatModel):
461
647
  }
462
648
  return part_dict
463
649
 
650
+ def _extract_usage_metadata(self, response) -> Optional[Any]:
651
+ """Extracts the raw usage_metadata object from a Google GenAI response."""
652
+ if hasattr(response, 'usage_metadata') and response.usage_metadata:
653
+ self.logger.debug(f"[_extract_usage_metadata] Found usage_metadata: {response.usage_metadata}")
654
+ return response.usage_metadata
655
+ return None
656
+
657
+ def _create_chat_generation_chunk(self, chunk_response) -> ChatGenerationChunk:
658
+ """Creates a ChatGenerationChunk for streaming."""
659
+ # For streaming, we do not include usage metadata in individual chunks
660
+ # to prevent merge conflicts. The final, aggregated response will contain
661
+ # the full usage details for callbacks like Langfuse.
662
+ return ChatGenerationChunk(
663
+ message=AIMessageChunk(
664
+ content=chunk_response.text,
665
+ response_metadata={"model_name": self.model_name},
666
+ ),
667
+ generation_info=None,
668
+ )
669
+
670
+ def _create_chat_result_with_usage(self, response) -> ChatResult:
671
+ """Creates a ChatResult with usage metadata for Langfuse tracking."""
672
+ generated_text = response.text
673
+ finish_reason = response.candidates[0].finish_reason.name if response.candidates else None
674
+
675
+ # Extract usage metadata for token tracking
676
+ usage_metadata = self._extract_usage_metadata(response)
677
+ usage_dict = usage_metadata.dict() if usage_metadata and hasattr(usage_metadata, "dict") else {}
678
+
679
+ # Create AIMessage with usage information in response_metadata
680
+ message = AIMessage(
681
+ content=generated_text,
682
+ response_metadata={
683
+ "model_name": self.model_name,
684
+ "finish_reason": finish_reason,
685
+ **usage_dict
686
+ }
687
+ )
688
+
689
+ # For non-streaming, we include the usage dict in generation_info.
690
+ # This is another field that callback handlers like Langfuse might inspect.
691
+ generation = ChatGeneration(
692
+ message=message,
693
+ generation_info=usage_dict if usage_dict else None
694
+ )
695
+
696
+ # We also construct the llm_output dictionary in the format expected
697
+ # by LangChain callback handlers, with a specific "token_usage" key.
698
+ chat_result = ChatResult(
699
+ generations=[generation],
700
+ llm_output={
701
+ "token_usage": usage_dict,
702
+ "model_name": self.model_name
703
+ } if usage_dict else {
704
+ "model_name": self.model_name
705
+ }
706
+ )
707
+
708
+ return chat_result
709
+
464
710
  def _generate(
465
711
  self,
466
712
  messages: List[BaseMessage],
@@ -471,6 +717,8 @@ class GeminiChatModel(BaseChatModel):
471
717
  """Generates a chat response from a list of messages."""
472
718
  self.logger.info(f"Generating response for {len(messages)} messages.")
473
719
 
720
+ # Remove the problematic add_handler call - callbacks are now handled in invoke methods
721
+
474
722
  contents = self._convert_messages(messages)
475
723
  config = self._prepare_generation_config(messages, stop)
476
724
 
@@ -482,14 +730,7 @@ class GeminiChatModel(BaseChatModel):
482
730
  **kwargs,
483
731
  )
484
732
 
485
- generated_text = response.text
486
- finish_reason = response.candidates[0].finish_reason.name if response.candidates else None
487
-
488
- message = AIMessage(
489
- content=generated_text,
490
- response_metadata={"model_name": self.model_name, "finish_reason": finish_reason},
491
- )
492
- return ChatResult(generations=[ChatGeneration(message=message)])
733
+ return self._create_chat_result_with_usage(response)
493
734
 
494
735
  except Exception as e:
495
736
  self.logger.error(f"Error generating content with Google GenAI: {e}", exc_info=True)
@@ -516,14 +757,7 @@ class GeminiChatModel(BaseChatModel):
516
757
  **kwargs,
517
758
  )
518
759
 
519
- generated_text = response.text
520
- finish_reason = response.candidates[0].finish_reason.name if response.candidates else None
521
-
522
- message = AIMessage(
523
- content=generated_text,
524
- response_metadata={"model_name": self.model_name, "finish_reason": finish_reason},
525
- )
526
- return ChatResult(generations=[ChatGeneration(message=message)])
760
+ return self._create_chat_result_with_usage(response)
527
761
 
528
762
  except Exception as e:
529
763
  self.logger.error(f"Error during async generation: {e}", exc_info=True)
@@ -536,7 +770,7 @@ class GeminiChatModel(BaseChatModel):
536
770
  run_manager: Optional[CallbackManagerForLLMRun] = None,
537
771
  **kwargs: Any,
538
772
  ) -> Iterator[ChatGenerationChunk]:
539
- """Streams the chat response."""
773
+ """Streams the chat response and properly handles final usage metadata."""
540
774
  self.logger.info(f"Streaming response for {len(messages)} messages.")
541
775
 
542
776
  contents = self._convert_messages(messages)
@@ -549,12 +783,35 @@ class GeminiChatModel(BaseChatModel):
549
783
  config=config,
550
784
  **kwargs,
551
785
  )
786
+
787
+ final_usage_metadata = None
552
788
  for chunk_response in stream:
789
+ # The usage metadata is on the chunk response itself. We update
790
+ # our variable on each chunk that has it to ensure we get the
791
+ # final, cumulative count at the end of the stream.
792
+ if chunk_response.usage_metadata:
793
+ final_usage_metadata = self._extract_usage_metadata(chunk_response)
794
+
553
795
  if text_content := chunk_response.text:
554
- chunk = ChatGenerationChunk(message=AIMessageChunk(content=text_content))
796
+ chunk = self._create_chat_generation_chunk(chunk_response)
555
797
  if run_manager:
556
798
  run_manager.on_llm_new_token(text_content, chunk=chunk)
557
799
  yield chunk
800
+
801
+ # After the stream is exhausted, we yield a final, empty chunk
802
+ # containing the full usage details. LangChain merges this into the
803
+ # final result, making it available to callback handlers.
804
+ if final_usage_metadata:
805
+ usage_dict = final_usage_metadata.dict() if hasattr(final_usage_metadata, "dict") else {}
806
+ final_generation_info = {
807
+ "token_usage": usage_dict,
808
+ "model_name": self.model_name
809
+ }
810
+ yield ChatGenerationChunk(
811
+ message=AIMessageChunk(content=""),
812
+ generation_info=final_generation_info
813
+ )
814
+
558
815
  except Exception as e:
559
816
  self.logger.error(f"Error streaming content: {e}", exc_info=True)
560
817
  raise ValueError(f"Error during streaming: {e}")
@@ -566,7 +823,7 @@ class GeminiChatModel(BaseChatModel):
566
823
  run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
567
824
  **kwargs: Any,
568
825
  ) -> AsyncIterator[ChatGenerationChunk]:
569
- """Asynchronously streams the chat response."""
826
+ """Asynchronously streams the chat response and properly handles final usage metadata."""
570
827
  self.logger.info(f"Async streaming response for {len(messages)} messages.")
571
828
 
572
829
  contents = self._convert_messages(messages)
@@ -579,12 +836,35 @@ class GeminiChatModel(BaseChatModel):
579
836
  config=config,
580
837
  **kwargs,
581
838
  )
839
+
840
+ final_usage_metadata = None
582
841
  async for chunk_response in stream:
842
+ # The usage metadata is on the chunk response itself. We update
843
+ # our variable on each chunk that has it to ensure we get the
844
+ # final, cumulative count at the end of the stream.
845
+ if chunk_response.usage_metadata:
846
+ final_usage_metadata = self._extract_usage_metadata(chunk_response)
847
+
583
848
  if text_content := chunk_response.text:
584
- chunk = ChatGenerationChunk(message=AIMessageChunk(content=text_content))
849
+ chunk = self._create_chat_generation_chunk(chunk_response)
585
850
  if run_manager:
586
851
  await run_manager.on_llm_new_token(text_content, chunk=chunk)
587
852
  yield chunk
853
+
854
+ # After the stream is exhausted, we yield a final, empty chunk
855
+ # containing the full usage details. LangChain merges this into the
856
+ # final result, making it available to callback handlers.
857
+ if final_usage_metadata:
858
+ usage_dict = final_usage_metadata.dict() if hasattr(final_usage_metadata, "dict") else {}
859
+ final_generation_info = {
860
+ "token_usage": usage_dict,
861
+ "model_name": self.model_name
862
+ }
863
+ yield ChatGenerationChunk(
864
+ message=AIMessageChunk(content=""),
865
+ generation_info=final_generation_info
866
+ )
867
+
588
868
  except Exception as e:
589
869
  self.logger.error(f"Error during async streaming: {e}", exc_info=True)
590
870
  raise ValueError(f"Error during async streaming: {e}")
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "crewplus"
9
- version = "0.2.15"
9
+ version = "0.2.19"
10
10
  description = "Base services for CrewPlus AI applications"
11
11
  authors = [
12
12
  { name = "Tim Liu", email = "tim@opsmateai.com" },
@@ -21,6 +21,7 @@ dependencies = [
21
21
  "mkdocs-material (>=9.6.14,<10.0.0)",
22
22
  "mkdocstrings-python (>=1.16.12,<2.0.0)",
23
23
  "langchain-milvus (>=0.2.1,<0.3.0)",
24
+ "langfuse (>=3.1.3,<4.0.0)",
24
25
  ]
25
26
 
26
27
  [project.license]
File without changes
File without changes
File without changes
File without changes