kolega-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. kolega_code/__init__.py +151 -0
  2. kolega_code/agent/__init__.py +42 -0
  3. kolega_code/agent/baseagent.py +998 -0
  4. kolega_code/agent/browseragent.py +123 -0
  5. kolega_code/agent/coder.py +157 -0
  6. kolega_code/agent/common.py +41 -0
  7. kolega_code/agent/compression.py +81 -0
  8. kolega_code/agent/context.py +112 -0
  9. kolega_code/agent/conversation.py +408 -0
  10. kolega_code/agent/generalagent.py +146 -0
  11. kolega_code/agent/investigationagent.py +123 -0
  12. kolega_code/agent/planningagent.py +187 -0
  13. kolega_code/agent/prompt_provider.py +196 -0
  14. kolega_code/agent/prompt_templates/agents/browser.j2 +102 -0
  15. kolega_code/agent/prompt_templates/agents/coder_cli_mode.j2 +127 -0
  16. kolega_code/agent/prompt_templates/agents/general.j2 +68 -0
  17. kolega_code/agent/prompt_templates/agents/investigation.j2 +72 -0
  18. kolega_code/agent/prompt_templates/common/frontend_guidance.md +36 -0
  19. kolega_code/agent/prompt_templates/common/kolega_md_instructions.md +14 -0
  20. kolega_code/agent/prompt_templates/environment_variables/workspace_env_vars.md +11 -0
  21. kolega_code/agent/prompt_templates/template_guidance/expo-template.md +379 -0
  22. kolega_code/agent/prompt_templates/template_guidance/html-website-template.md +3 -0
  23. kolega_code/agent/prompt_templates/template_guidance/mern-stack-template.md +3 -0
  24. kolega_code/agent/prompt_templates/template_guidance/react-vite-shadcdn-template.md +182 -0
  25. kolega_code/agent/prompts.py +192 -0
  26. kolega_code/agent/tests/__init__.py +0 -0
  27. kolega_code/agent/tests/llm/__init__.py +0 -0
  28. kolega_code/agent/tests/llm/test_anthropic_token_counting.py +633 -0
  29. kolega_code/agent/tests/llm/test_billing_openai_cache.py +74 -0
  30. kolega_code/agent/tests/llm/test_client.py +773 -0
  31. kolega_code/agent/tests/llm/test_dashscope_mapping.py +32 -0
  32. kolega_code/agent/tests/llm/test_error_boundary.py +322 -0
  33. kolega_code/agent/tests/llm/test_exceptions.py +249 -0
  34. kolega_code/agent/tests/llm/test_instrumented_client.py +536 -0
  35. kolega_code/agent/tests/llm/test_instrumented_client_integration.py +547 -0
  36. kolega_code/agent/tests/llm/test_langfuse_normalization.py +39 -0
  37. kolega_code/agent/tests/llm/test_model_specs.py +17 -0
  38. kolega_code/agent/tests/llm/test_openai_cached_tokens.py +58 -0
  39. kolega_code/agent/tests/llm/test_openai_cached_tokens_stream.py +74 -0
  40. kolega_code/agent/tests/llm/test_openai_message_conversion.py +30 -0
  41. kolega_code/agent/tests/llm/test_openai_token_counting.py +687 -0
  42. kolega_code/agent/tests/llm/test_tool_execution_ids.py +193 -0
  43. kolega_code/agent/tests/services/__init__.py +1 -0
  44. kolega_code/agent/tests/services/test_browser.py +447 -0
  45. kolega_code/agent/tests/services/test_browser_parity.py +353 -0
  46. kolega_code/agent/tests/services/test_file_system.py +699 -0
  47. kolega_code/agent/tests/services/test_sandbox_terminal_input.py +98 -0
  48. kolega_code/agent/tests/services/test_terminal.py +154 -0
  49. kolega_code/agent/tests/services/test_terminal_command_tracking.py +385 -0
  50. kolega_code/agent/tests/services/test_terminal_state_serializer.py +262 -0
  51. kolega_code/agent/tests/test_agent_tools_inventory.py +267 -0
  52. kolega_code/agent/tests/test_base_agent.py +1942 -0
  53. kolega_code/agent/tests/test_coder_attachments.py +330 -0
  54. kolega_code/agent/tests/test_coder_prompt_extensions.py +61 -0
  55. kolega_code/agent/tests/test_commands.py +179 -0
  56. kolega_code/agent/tests/test_duplicate_tool_results.py +556 -0
  57. kolega_code/agent/tests/test_empty_message_handling.py +48 -0
  58. kolega_code/agent/tests/test_general_agent.py +242 -0
  59. kolega_code/agent/tests/test_html.py +320 -0
  60. kolega_code/agent/tests/test_parallel_tool_calls.py +291 -0
  61. kolega_code/agent/tests/test_planning_agent.py +227 -0
  62. kolega_code/agent/tests/test_prompt_provider.py +271 -0
  63. kolega_code/agent/tests/test_tool_registry.py +102 -0
  64. kolega_code/agent/tests/test_tools.py +549 -0
  65. kolega_code/agent/tests/tool_backend/__init__.py +0 -0
  66. kolega_code/agent/tests/tool_backend/test_agent_tool.py +356 -0
  67. kolega_code/agent/tests/tool_backend/test_base_tool.py +147 -0
  68. kolega_code/agent/tests/tool_backend/test_browser_tool.py +335 -0
  69. kolega_code/agent/tests/tool_backend/test_build_tool.py +93 -0
  70. kolega_code/agent/tests/tool_backend/test_create_file_tool.py +115 -0
  71. kolega_code/agent/tests/tool_backend/test_glob_tool.py +196 -0
  72. kolega_code/agent/tests/tool_backend/test_glob_tool_sandbox_parity.py +230 -0
  73. kolega_code/agent/tests/tool_backend/test_list_directory_tool.py +292 -0
  74. kolega_code/agent/tests/tool_backend/test_read_file_tool.py +173 -0
  75. kolega_code/agent/tests/tool_backend/test_replace_entire_file_tool.py +115 -0
  76. kolega_code/agent/tests/tool_backend/test_replace_lines_tool.py +141 -0
  77. kolega_code/agent/tests/tool_backend/test_search_and_replace_tool.py +174 -0
  78. kolega_code/agent/tests/tool_backend/test_search_codebase_tool.py +228 -0
  79. kolega_code/agent/tests/tool_backend/test_terminal_tool.py +482 -0
  80. kolega_code/agent/tests/tool_backend/test_think_hard_integration.py +189 -0
  81. kolega_code/agent/tests/tool_backend/test_think_hard_streaming.py +445 -0
  82. kolega_code/agent/tests/tool_backend/test_web_fetch_tool.py +194 -0
  83. kolega_code/agent/tool_backend/agent_tool.py +414 -0
  84. kolega_code/agent/tool_backend/apply_edit_tool.py +98 -0
  85. kolega_code/agent/tool_backend/apply_patch_tool.py +514 -0
  86. kolega_code/agent/tool_backend/base_tool.py +217 -0
  87. kolega_code/agent/tool_backend/browser_tool.py +271 -0
  88. kolega_code/agent/tool_backend/build_tool.py +93 -0
  89. kolega_code/agent/tool_backend/create_file_tool.py +52 -0
  90. kolega_code/agent/tool_backend/glob_tool.py +323 -0
  91. kolega_code/agent/tool_backend/list_directory_tool.py +300 -0
  92. kolega_code/agent/tool_backend/memory_tool.py +79 -0
  93. kolega_code/agent/tool_backend/read_file_tool.py +119 -0
  94. kolega_code/agent/tool_backend/replace_entire_file_tool.py +40 -0
  95. kolega_code/agent/tool_backend/replace_lines_tool.py +97 -0
  96. kolega_code/agent/tool_backend/search_and_replace_tool.py +146 -0
  97. kolega_code/agent/tool_backend/search_codebase_tool.py +377 -0
  98. kolega_code/agent/tool_backend/streaming_tool.py +47 -0
  99. kolega_code/agent/tool_backend/terminal_tool.py +643 -0
  100. kolega_code/agent/tool_backend/think_hard_tool.py +211 -0
  101. kolega_code/agent/tool_backend/web_fetch_tool.py +205 -0
  102. kolega_code/agent/tools.py +1704 -0
  103. kolega_code/agent/utils/commands.py +94 -0
  104. kolega_code/cli/__init__.py +1 -0
  105. kolega_code/cli/app.py +2756 -0
  106. kolega_code/cli/config.py +280 -0
  107. kolega_code/cli/connection.py +49 -0
  108. kolega_code/cli/file_index.py +147 -0
  109. kolega_code/cli/main.py +564 -0
  110. kolega_code/cli/mentions.py +155 -0
  111. kolega_code/cli/messages.py +89 -0
  112. kolega_code/cli/provider_registry.py +96 -0
  113. kolega_code/cli/session_store.py +207 -0
  114. kolega_code/cli/settings.py +87 -0
  115. kolega_code/cli/skills.py +409 -0
  116. kolega_code/cli/slash_commands.py +108 -0
  117. kolega_code/cli/tests/__init__.py +1 -0
  118. kolega_code/cli/tests/test_app.py +4251 -0
  119. kolega_code/cli/tests/test_cli_config.py +171 -0
  120. kolega_code/cli/tests/test_connection.py +26 -0
  121. kolega_code/cli/tests/test_file_index.py +103 -0
  122. kolega_code/cli/tests/test_main.py +455 -0
  123. kolega_code/cli/tests/test_mentions.py +108 -0
  124. kolega_code/cli/tests/test_session_store.py +67 -0
  125. kolega_code/cli/tests/test_settings.py +62 -0
  126. kolega_code/cli/tests/test_skills.py +157 -0
  127. kolega_code/cli/tests/test_slash_commands.py +88 -0
  128. kolega_code/cli/theme.py +180 -0
  129. kolega_code/config.py +154 -0
  130. kolega_code/events.py +202 -0
  131. kolega_code/llm/client.py +300 -0
  132. kolega_code/llm/exceptions.py +285 -0
  133. kolega_code/llm/instrumented_client.py +520 -0
  134. kolega_code/llm/models.py +1368 -0
  135. kolega_code/llm/providers/__init__.py +0 -0
  136. kolega_code/llm/providers/anthropic.py +387 -0
  137. kolega_code/llm/providers/base.py +71 -0
  138. kolega_code/llm/providers/google.py +157 -0
  139. kolega_code/llm/providers/models.py +37 -0
  140. kolega_code/llm/providers/openai.py +363 -0
  141. kolega_code/llm/ratelimit.py +40 -0
  142. kolega_code/llm/specs.py +67 -0
  143. kolega_code/llm/tool_execution_ids.py +18 -0
  144. kolega_code/models/__init__.py +9 -0
  145. kolega_code/models/sandbox_terminal_state.py +47 -0
  146. kolega_code/runtime.py +50 -0
  147. kolega_code/sandbox/README.md +200 -0
  148. kolega_code/sandbox/__init__.py +21 -0
  149. kolega_code/sandbox/async_filesystem.py +475 -0
  150. kolega_code/sandbox/base.py +297 -0
  151. kolega_code/sandbox/browser.py +25 -0
  152. kolega_code/sandbox/event_loop.py +43 -0
  153. kolega_code/sandbox/filesystem.py +341 -0
  154. kolega_code/sandbox/local.py +118 -0
  155. kolega_code/sandbox/serializer.py +175 -0
  156. kolega_code/sandbox/terminal.py +868 -0
  157. kolega_code/sandbox/utils.py +216 -0
  158. kolega_code/services/base.py +255 -0
  159. kolega_code/services/browser.py +444 -0
  160. kolega_code/services/file_system.py +749 -0
  161. kolega_code/services/html.py +221 -0
  162. kolega_code/services/terminal.py +903 -0
  163. kolega_code/tools/__init__.py +22 -0
  164. kolega_code/tools/core.py +33 -0
  165. kolega_code/tools/definitions.py +81 -0
  166. kolega_code/tools/registry.py +73 -0
  167. kolega_code-0.1.0.dist-info/METADATA +157 -0
  168. kolega_code-0.1.0.dist-info/RECORD +171 -0
  169. kolega_code-0.1.0.dist-info/WHEEL +4 -0
  170. kolega_code-0.1.0.dist-info/entry_points.txt +2 -0
  171. kolega_code-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,520 @@
1
+ """
2
+ Instrumented LLM client that adds Langfuse tracing to all LLM operations.
3
+ """
4
+
5
+ import os
6
+ from typing import Any, Optional, List, Dict, Union, AsyncContextManager, Coroutine
7
+ from datetime import datetime, timezone
8
+ import logging
9
+
10
+ from langfuse import Langfuse
11
+
12
+ from .client import LLMClient
13
+ from .models import Message, MessageHistory
14
+ from .providers.models import GenerationParams
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class InstrumentedLLMClient(LLMClient):
20
+ """LLMClient with Langfuse instrumentation for observability."""
21
+
22
+ def __init__(
23
+ self,
24
+ provider: str,
25
+ api_key: str,
26
+ max_retries: int = 3,
27
+ requests_per_minute: Optional[int] = None,
28
+ tokens_per_minute: Optional[int] = None,
29
+ langfuse_client: Optional[Langfuse] = None,
30
+ workspace_id: Optional[str] = None,
31
+ thread_id: Optional[str] = None,
32
+ agent_type: Optional[str] = None,
33
+ environment: Optional[str] = None,
34
+ user_id: Optional[str] = None,
35
+ user_email: Optional[str] = None,
36
+ usage_recorder: Optional[Any] = None,
37
+ ):
38
+ super().__init__(provider, api_key, max_retries, requests_per_minute, tokens_per_minute)
39
+ self.langfuse = langfuse_client
40
+ self.workspace_id = workspace_id
41
+ self.thread_id = thread_id
42
+ self.agent_type = agent_type
43
+ self.environment = environment or os.environ.get("ENVIRONMENT", "development")
44
+ self.user_id = user_id
45
+ self.user_email = user_email
46
+ self.usage_recorder = usage_recorder
47
+
48
+ def _create_generation_metadata(self, **kwargs) -> Dict[str, Any]:
49
+ """Create metadata for Langfuse generation."""
50
+ metadata = {
51
+ "provider": self.provider_name,
52
+ "workspace_id": self.workspace_id,
53
+ "thread_id": self.thread_id,
54
+ "agent_type": self.agent_type,
55
+ "environment": self.environment,
56
+ "user_id": self.user_id,
57
+ "user_email": self.user_email,
58
+ "timestamp": datetime.now(timezone.utc).isoformat(),
59
+ }
60
+
61
+ # Add any additional kwargs as metadata
62
+ for key, value in kwargs.items():
63
+ if key not in ["messages", "system", "params", "model"]:
64
+ metadata[key] = value
65
+
66
+ return metadata
67
+
68
+ def _extract_usage_details(self, response: Message) -> Dict[str, Any]:
69
+ """Extract usage details from provider response"""
70
+ if not response or not hasattr(response, "usage_metadata"):
71
+ return {}
72
+
73
+ return response.usage_metadata
74
+
75
+ def _normalize_usage_data(
76
+ self, usage_metadata: Dict[str, Any], model: str, success: bool = True, error_message: Optional[str] = None
77
+ ) -> Optional[Dict[str, Any]]:
78
+ """Normalize provider usage metadata for host-provided usage recorders.
79
+
80
+ Args:
81
+ usage_metadata: Usage metadata from LLM response
82
+ model: Model name used
83
+ success: Whether the request was successful
84
+ error_message: Error message if request failed
85
+ """
86
+ if not usage_metadata:
87
+ return None
88
+
89
+ provider = usage_metadata.get("provider", self.provider_name)
90
+
91
+ if provider in ["anthropic", "moonshot", "deepseek"]:
92
+ input_tokens = usage_metadata.get("input_tokens", 0)
93
+ output_tokens = usage_metadata.get("output_tokens", 0)
94
+ cache_read_tokens = usage_metadata.get("cache_read_input_tokens", 0)
95
+ cache_write_tokens = usage_metadata.get("cache_write_input_tokens", 0)
96
+ elif provider in ["openai", "together", "groq", "fireworks", "llama", "xai", "dashscope"]:
97
+ input_tokens = usage_metadata.get("prompt_tokens", 0)
98
+ output_tokens = usage_metadata.get("completion_tokens", 0)
99
+ cache_read_tokens = usage_metadata.get("cache_read_input_tokens", 0)
100
+ cache_write_tokens = usage_metadata.get("cache_write_input_tokens", 0)
101
+ elif provider == "google":
102
+ input_tokens = usage_metadata.get("prompt_token_count", 0)
103
+ output_tokens = usage_metadata.get("candidates_token_count", 0)
104
+ cache_read_tokens = usage_metadata.get("cache_read_input_tokens", 0)
105
+ cache_write_tokens = usage_metadata.get("cache_write_input_tokens", 0)
106
+ else:
107
+ logger.warning(f"Unknown provider for usage recording: {provider}")
108
+ return None
109
+
110
+ return {
111
+ "user_id": self.user_id,
112
+ "workspace_id": self.workspace_id,
113
+ "thread_id": self.thread_id,
114
+ "agent_type": self.agent_type,
115
+ "provider": provider,
116
+ "model": model,
117
+ "input_tokens": input_tokens,
118
+ "output_tokens": output_tokens,
119
+ "cache_read_input_tokens": cache_read_tokens,
120
+ "cache_write_input_tokens": cache_write_tokens,
121
+ "success": success,
122
+ "error_message": error_message,
123
+ "timestamp": datetime.now(timezone.utc),
124
+ "metadata": {
125
+ "environment": self.environment,
126
+ "raw_usage": usage_metadata,
127
+ },
128
+ }
129
+
130
+ async def _record_usage(
131
+ self, usage_metadata: Dict[str, Any], model: str, success: bool = True, error_message: Optional[str] = None
132
+ ) -> None:
133
+ """Record token usage through a host-provided recorder, when configured."""
134
+ if os.environ.get("DISABLE_USAGE_RECORDING"):
135
+ logger.debug("Usage recording disabled by DISABLE_USAGE_RECORDING env var")
136
+ return
137
+
138
+ if not self.usage_recorder:
139
+ return
140
+
141
+ usage_data = self._normalize_usage_data(usage_metadata, model, success, error_message)
142
+ if not usage_data:
143
+ return
144
+
145
+ try:
146
+ if hasattr(self.usage_recorder, "record_usage"):
147
+ result = self.usage_recorder.record_usage(usage_data)
148
+ elif callable(self.usage_recorder):
149
+ result = self.usage_recorder(usage_data)
150
+ else:
151
+ logger.warning("Usage recorder is not callable and has no record_usage method")
152
+ return
153
+
154
+ import inspect
155
+
156
+ if inspect.isawaitable(result):
157
+ await result
158
+ except Exception as e:
159
+ logger.warning(f"Failed to record token usage: {e}")
160
+
161
+ async def generate(
162
+ self,
163
+ messages: MessageHistory,
164
+ system: Optional[Message] = None,
165
+ temperature: float = 1.0,
166
+ max_completion_tokens: Optional[int] = None,
167
+ tools: Optional[List[Dict[str, Any]]] = None,
168
+ thinking: Optional[Union[int, str]] = None,
169
+ params: Optional[GenerationParams] = None,
170
+ **kwargs: Dict[str, Any],
171
+ ) -> Message:
172
+ """Generate with Langfuse tracing."""
173
+ if not self.langfuse:
174
+ # Fallback to non-instrumented if Langfuse not configured
175
+ return await super().generate(
176
+ messages, system, temperature, max_completion_tokens, tools, thinking, params, **kwargs
177
+ )
178
+
179
+ # Extract model from kwargs
180
+ model = kwargs.get("model", "unknown")
181
+
182
+ # Format input for Langfuse
183
+ input_data = {
184
+ "messages": [msg.to_dict() for msg in messages],
185
+ "system": system.to_dict() if system else None,
186
+ "temperature": temperature,
187
+ "max_completion_tokens": max_completion_tokens,
188
+ "tools": tools,
189
+ }
190
+
191
+ # Create metadata for the generation
192
+ metadata = self._create_generation_metadata(**kwargs)
193
+
194
+ # Create trace first (v3 API)
195
+ trace = self.langfuse.start_span(
196
+ name=f"{self.agent_type or 'agent'}-llm-call",
197
+ input=input_data,
198
+ metadata=metadata,
199
+ )
200
+
201
+ # Create session name with user context
202
+ session_name = f"{self.workspace_id}/{self.thread_id}"
203
+
204
+ # Update trace with attributes
205
+ trace.update_trace(
206
+ user_id=self.user_id or self.workspace_id, # Use actual user_id if available, fallback to workspace
207
+ session_id=session_name,
208
+ tags=[
209
+ tag
210
+ for tag in [
211
+ self.environment,
212
+ f"workspace:{self.workspace_id}",
213
+ f"thread:{self.thread_id}",
214
+ f"agent:{self.agent_type}",
215
+ f"provider:{self.provider_name}",
216
+ f"user:{self.user_id}" if self.user_id else None,
217
+ ]
218
+ if tag is not None
219
+ ],
220
+ )
221
+
222
+ # Create generation as child of trace
223
+ generation = trace.start_generation(
224
+ name=f"{self.agent_type or 'agent'}-llm-generation",
225
+ model=model,
226
+ model_parameters={
227
+ "temperature": temperature,
228
+ "max_completion_tokens": max_completion_tokens,
229
+ "provider": self.provider_name,
230
+ },
231
+ input=input_data,
232
+ metadata=metadata,
233
+ )
234
+
235
+ try:
236
+ # Call parent generate method
237
+ response = await super().generate(
238
+ messages, system, temperature, max_completion_tokens, tools, thinking, params, **kwargs
239
+ )
240
+
241
+ # Extract token usage from response
242
+ usage_details = self._extract_usage_details(response)
243
+
244
+ # Normalize usage data to Langfuse format
245
+ normalized_usage = None
246
+ if usage_details:
247
+ provider = usage_details.get("provider", self.provider_name)
248
+ if provider in ["anthropic", "moonshot", "deepseek"]:
249
+ normalized_usage = {
250
+ "input": usage_details.get("input_tokens", 0),
251
+ "output": usage_details.get("output_tokens", 0),
252
+ "total": usage_details.get("input_tokens", 0) + usage_details.get("output_tokens", 0),
253
+ "cache_read_input_tokens": usage_details.get("cache_read_input_tokens", 0),
254
+ "cache_creation_input_tokens": usage_details.get("cache_write_input_tokens", 0),
255
+ }
256
+ elif provider == "openai":
257
+ normalized_usage = {
258
+ "input": usage_details.get("prompt_tokens", 0),
259
+ "output": usage_details.get("completion_tokens", 0),
260
+ "total": usage_details.get("total_tokens", 0),
261
+ "cache_read_input_tokens": usage_details.get("cache_read_input_tokens", 0),
262
+ "cache_creation_input_tokens": usage_details.get("cache_write_input_tokens", 0),
263
+ }
264
+ elif provider == "google":
265
+ normalized_usage = {
266
+ "input": usage_details.get("prompt_token_count", 0),
267
+ "output": usage_details.get("candidates_token_count", 0),
268
+ "total": usage_details.get("total_token_count", 0),
269
+ "cache_read_input_tokens": usage_details.get("cache_read_input_tokens", 0),
270
+ "cache_creation_input_tokens": usage_details.get("cache_write_input_tokens", 0),
271
+ }
272
+
273
+ # Update generation with success
274
+ generation.update(
275
+ output=response.to_dict(),
276
+ usage_details=normalized_usage,
277
+ level="DEFAULT",
278
+ status_message="Success",
279
+ )
280
+ generation.end()
281
+
282
+ # End the trace
283
+ trace.end()
284
+
285
+ await self._record_usage(usage_details, model, success=True)
286
+
287
+ return response
288
+
289
+ except Exception as e:
290
+ # Update generation with error
291
+ generation.update(
292
+ level="ERROR",
293
+ status_message=str(e),
294
+ )
295
+ generation.end()
296
+ # End the trace
297
+ trace.update(level="ERROR", status_message=str(e))
298
+ trace.end()
299
+ raise
300
+
301
+ def stream(
302
+ self,
303
+ messages: MessageHistory,
304
+ system: Optional[Message] = None,
305
+ temperature: float = 1.0,
306
+ max_completion_tokens: Optional[int] = None,
307
+ tools: Optional[List[Dict[str, Any]]] = None,
308
+ thinking: Optional[Union[int, str]] = None,
309
+ params: Optional[GenerationParams] = None,
310
+ **kwargs,
311
+ ) -> Union[AsyncContextManager[Any], Coroutine[Any, Any, AsyncContextManager[Any]]]:
312
+ """Stream a response with Langfuse tracing"""
313
+ if not self.langfuse:
314
+ # Fallback to non-instrumented if Langfuse not configured
315
+ return super().stream(
316
+ messages, system, temperature, max_completion_tokens, tools, thinking, params, **kwargs
317
+ )
318
+
319
+ # Since we need to create langfuse metadata synchronously but the stream
320
+ # might be a coroutine, we return a coroutine that creates the wrapper
321
+ async def create_instrumented_stream():
322
+ # Extract model from kwargs
323
+ model = kwargs.get("model", "unknown")
324
+
325
+ # Format input for Langfuse (same format as generate method)
326
+ input_data = {
327
+ "messages": [msg.to_dict() for msg in messages],
328
+ "system": system.to_dict() if system else None,
329
+ "temperature": temperature,
330
+ "max_completion_tokens": max_completion_tokens,
331
+ "tools": tools,
332
+ }
333
+
334
+ # Create metadata for the generation
335
+ metadata = self._create_generation_metadata(**kwargs)
336
+
337
+ # Create trace first (v3 API)
338
+ trace = self.langfuse.start_span(
339
+ name=f"{self.agent_type or 'agent'}-llm-stream",
340
+ input=input_data,
341
+ metadata=metadata,
342
+ )
343
+
344
+ # Create session name with user context
345
+ session_name = f"{self.workspace_id}/{self.thread_id}"
346
+
347
+ # Update trace with attributes
348
+ trace.update_trace(
349
+ user_id=self.user_id or self.workspace_id, # Use actual user_id if available, fallback to workspace
350
+ session_id=session_name,
351
+ tags=[
352
+ tag
353
+ for tag in [
354
+ self.environment,
355
+ f"workspace:{self.workspace_id}",
356
+ f"thread:{self.thread_id}",
357
+ f"agent:{self.agent_type}",
358
+ f"provider:{self.provider_name}",
359
+ "streaming",
360
+ f"user:{self.user_id}" if self.user_id else None,
361
+ ]
362
+ if tag is not None
363
+ ],
364
+ )
365
+
366
+ # Create generation as child of trace
367
+ generation = trace.start_generation(
368
+ name=f"{self.agent_type or 'agent'}-llm-stream-generation",
369
+ model=model,
370
+ model_parameters={
371
+ "temperature": temperature,
372
+ "max_completion_tokens": max_completion_tokens,
373
+ "streaming": True,
374
+ "provider": self.provider_name,
375
+ },
376
+ input=input_data,
377
+ metadata=metadata,
378
+ )
379
+
380
+ # Get stream from underlying client
381
+ stream = LLMClient.stream(
382
+ self, messages, system, temperature, max_completion_tokens, tools, thinking, params, **kwargs
383
+ )
384
+
385
+ # Check if stream is a coroutine (needs to be awaited)
386
+ import inspect
387
+
388
+ if inspect.iscoroutine(stream):
389
+ stream = await stream
390
+
391
+ # Wrap with minimal instrumented wrapper
392
+ return MinimalLangfuseStreamWrapper(stream, generation, trace, self, model)
393
+
394
+ return create_instrumented_stream()
395
+
396
+
397
+ class MinimalLangfuseStreamWrapper:
398
+ """Minimal wrapper for any provider's stream with Langfuse tracing"""
399
+
400
+ def __init__(self, stream, generation, trace, instrumented_client, model):
401
+ self.stream = stream
402
+ self.generation = generation
403
+ self.trace = trace
404
+ self.instrumented_client = instrumented_client
405
+ self.model = model
406
+
407
+ async def __aenter__(self):
408
+ # Enter the underlying stream's context
409
+ await self.stream.__aenter__()
410
+ return self
411
+
412
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
413
+ # Let stream clean up first
414
+ await self.stream.__aexit__(exc_type, exc_val, exc_tb)
415
+
416
+ # Try to get final message for usage data
417
+ output = None
418
+ usage = None
419
+
420
+ try:
421
+ if hasattr(self.stream, "get_final_message"):
422
+ final_message = await self.stream.get_final_message()
423
+
424
+ # The Message already has usage_metadata populated by from_anthropic/from_openai
425
+ output = final_message.get_text_content() if hasattr(final_message, "get_text_content") else None
426
+ usage = self._extract_langfuse_usage(final_message)
427
+ except Exception as e:
428
+ logger.debug(f"Error getting final message: {e}")
429
+
430
+ # Update generation with available data
431
+ gen_update_kwargs = {
432
+ "level": "ERROR" if exc_type else "DEFAULT",
433
+ "status_message": str(exc_val) if exc_val else "Success",
434
+ }
435
+
436
+ if output is not None:
437
+ gen_update_kwargs["output"] = output
438
+ if usage is not None:
439
+ gen_update_kwargs["usage_details"] = usage
440
+
441
+ try:
442
+ self.generation.update(**gen_update_kwargs)
443
+ self.generation.end()
444
+
445
+ # Update and end trace
446
+ self.trace.update(
447
+ level="ERROR" if exc_type else "DEFAULT",
448
+ status_message=str(exc_val) if exc_val else "Success",
449
+ )
450
+ self.trace.end()
451
+ except Exception as ex:
452
+ logger.debug(f"Error updating langfuse generation: {ex}")
453
+
454
+ # Record usage for streaming response
455
+ try:
456
+ if hasattr(self.stream, "get_final_message"):
457
+ final_message = await self.stream.get_final_message()
458
+ if hasattr(final_message, "usage_metadata") and final_message.usage_metadata:
459
+ # Get a mutable copy of usage metadata
460
+ usage_metadata = dict(final_message.usage_metadata)
461
+ await self.instrumented_client._record_usage(
462
+ usage_metadata,
463
+ self.model,
464
+ success=(exc_type is None),
465
+ error_message=str(exc_val) if exc_val else None,
466
+ )
467
+ except Exception as e:
468
+ logger.debug(f"Error recording usage for stream: {e}")
469
+
470
+ return False # Don't suppress exceptions
471
+
472
+ def __aiter__(self):
473
+ return self
474
+
475
+ async def __anext__(self):
476
+ try:
477
+ chunk = await self.stream.__anext__()
478
+ return chunk
479
+ except StopAsyncIteration:
480
+ raise
481
+
482
+ async def get_final_message(self):
483
+ """Delegate to underlying stream's get_final_message method."""
484
+ if hasattr(self.stream, "get_final_message"):
485
+ return await self.stream.get_final_message()
486
+ else:
487
+ raise AttributeError(f"Underlying stream {type(self.stream).__name__} has no attribute 'get_final_message'")
488
+
489
+ def _extract_langfuse_usage(self, message: Message) -> Optional[Dict[str, Any]]:
490
+ """Extract and normalize usage data for Langfuse from message."""
491
+ if not hasattr(message, "usage_metadata") or not message.usage_metadata:
492
+ return None
493
+
494
+ usage_metadata = message.usage_metadata
495
+ provider = usage_metadata.get("provider", "")
496
+
497
+ if provider in ["anthropic", "moonshot", "deepseek"]:
498
+ return {
499
+ "input": usage_metadata.get("input_tokens", 0),
500
+ "output": usage_metadata.get("output_tokens", 0),
501
+ "total": usage_metadata.get("input_tokens", 0) + usage_metadata.get("output_tokens", 0),
502
+ "cache_read_input_tokens": usage_metadata.get("cache_read_input_tokens", 0),
503
+ "cache_creation_input_tokens": usage_metadata.get("cache_write_input_tokens", 0),
504
+ }
505
+ elif provider == "openai":
506
+ return {
507
+ "input": usage_metadata.get("prompt_tokens", 0),
508
+ "output": usage_metadata.get("completion_tokens", 0),
509
+ "total": usage_metadata.get("total_tokens", 0),
510
+ "cache_read_input_tokens": usage_metadata.get("cache_read_input_tokens", 0),
511
+ "cache_creation_input_tokens": usage_metadata.get("cache_write_input_tokens", 0),
512
+ }
513
+ elif provider == "google":
514
+ return {
515
+ "input": usage_metadata.get("prompt_token_count", 0),
516
+ "output": usage_metadata.get("candidates_token_count", 0),
517
+ "total": usage_metadata.get("total_token_count", 0),
518
+ }
519
+
520
+ return None