botrun-flow-lang 5.12.264__py3-none-any.whl → 6.2.61__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -113,12 +113,17 @@ class GraphSchemaRequest(BaseModel):
113
113
  graph_name: str
114
114
 
115
115
 
116
- PERPLEXITY_SEARCH_AGENT = "perplexity_search_agent"
116
+ # 從常數檔案匯入,避免外部模組為了取得常數而觸發重型 import
117
+ from botrun_flow_lang.api.langgraph_constants import (
118
+ LANGGRAPH_REACT_AGENT,
119
+ GOV_SUBSIDY_AGENT,
120
+ PERPLEXITY_SEARCH_AGENT,
121
+ )
122
+
123
+ # 僅在此檔案內部使用的常數
117
124
  CUSTOM_WEB_RESEARCH_AGENT = "custom_web_research_agent"
118
- LANGGRAPH_REACT_AGENT = "langgraph_react_agent"
119
125
  DEEP_RESEARCH_AGENT = "deep_research_agent"
120
126
  # GOV_RESEARCHER_AGENT = "gov_researcher_agent"
121
- GOV_SUBSIDY_AGENT = "gov_subsidy_agent"
122
127
  GEMINI_SUBSIDY_AGENT = "gemini_subsidy_agent"
123
128
 
124
129
 
@@ -0,0 +1,11 @@
1
+ """
2
+ LangGraph 常數定義
3
+
4
+ 此檔案只包含常數定義,不包含任何會觸發重型 SDK 載入的 import。
5
+ 這樣可以讓其他模組在只需要常數時,不會觸發 langchain_google_vertexai 等重型套件的載入。
6
+ """
7
+
8
+ # Graph 名稱常數
9
+ LANGGRAPH_REACT_AGENT = "langgraph_react_agent"
10
+ GOV_SUBSIDY_AGENT = "gov_subsidy_agent"
11
+ PERPLEXITY_SEARCH_AGENT = "perplexity_search_agent"
@@ -86,8 +86,11 @@ from langchain_mcp_adapters.client import MultiServerMCPClient
86
86
  # ========
87
87
  # for Vertex AI
88
88
  from google.oauth2 import service_account
89
- from langchain_google_vertexai import ChatVertexAI
90
- from langchain_google_vertexai.model_garden import ChatAnthropicVertex
89
+ # 使用自製的 ChatVertexAIClaude 取代 ChatAnthropicVertex,
90
+ # 透過 rawPredict REST API 直接呼叫 Claude,避免載入 google-cloud-aiplatform(約 26 秒)
91
+ from botrun_flow_lang.langgraph_agents.agents.util.custom_vertex_claude import (
92
+ ChatVertexAIClaude,
93
+ )
91
94
 
92
95
  load_dotenv()
93
96
 
@@ -234,8 +237,10 @@ def get_react_agent_model(model_name: str = ""):
234
237
  # 判斷模型類型並創建相應實例
235
238
  if vertex_model_name.startswith("gemini-"):
236
239
  # Gemini 系列:gemini-2.5-pro, gemini-2.5-flash, gemini-pro
237
- model = ChatVertexAI(
240
+ # 使用 ChatGoogleGenerativeAI + vertexai=True,避免載入重型的 langchain_google_vertexai
241
+ model = ChatGoogleGenerativeAI(
238
242
  model=vertex_model_name,
243
+ vertexai=True,
239
244
  location=vertex_region,
240
245
  project=vertex_project,
241
246
  credentials=credentials,
@@ -243,21 +248,22 @@ def get_react_agent_model(model_name: str = ""):
243
248
  max_tokens=GEMINI_MAX_TOKENS,
244
249
  )
245
250
  logger.info(
246
- f"model ChatVertexAI {vertex_model_name} @ {vertex_region} (project: {vertex_project})"
251
+ f"model ChatGoogleGenerativeAI(vertexai=True) {vertex_model_name} @ {vertex_region} (project: {vertex_project})"
247
252
  )
248
253
 
249
254
  elif "claude" in vertex_model_name.lower() or vertex_model_name.startswith("maison/"):
250
255
  # Anthropic Claude (model garden)
251
- model = ChatAnthropicVertex(
256
+ # 使用自製的 ChatVertexAIClaude,避免載入 google-cloud-aiplatform
257
+ model = ChatVertexAIClaude(
252
258
  model=vertex_model_name,
253
259
  location=vertex_region,
254
- project=vertex_project,
260
+ project_id=vertex_project,
255
261
  credentials=credentials,
256
262
  temperature=0,
257
263
  max_tokens=ANTHROPIC_MAX_TOKENS,
258
264
  )
259
265
  logger.info(
260
- f"model ChatAnthropicVertex {vertex_model_name} @ {vertex_region} (project: {vertex_project})"
266
+ f"model ChatVertexAIClaude {vertex_model_name} @ {vertex_region} (project: {vertex_project})"
261
267
  )
262
268
 
263
269
  else:
@@ -301,9 +307,9 @@ def get_react_agent_model(model_name: str = ""):
301
307
  "VERTEX_AI_GOOGLE_APPLICATION_CREDENTIALS not set or file not found. Using ADC if available."
302
308
  )
303
309
 
304
- # 初始化 ChatAnthropicVertex
305
- model = ChatAnthropicVertex(
306
- project=vertex_project,
310
+ # 使用自製的 ChatVertexAIClaude,避免載入 google-cloud-aiplatform
311
+ model = ChatVertexAIClaude(
312
+ project_id=vertex_project,
307
313
  model=vertex_model,
308
314
  location=vertex_location,
309
315
  credentials=credentials,
@@ -311,7 +317,7 @@ def get_react_agent_model(model_name: str = ""):
311
317
  max_tokens=ANTHROPIC_MAX_TOKENS,
312
318
  )
313
319
  logger.info(
314
- f"model ChatAnthropicVertex {vertex_project} @ {vertex_model} @ {vertex_location}"
320
+ f"model ChatVertexAIClaude {vertex_project} @ {vertex_model} @ {vertex_location}"
315
321
  )
316
322
 
317
323
  else:
@@ -0,0 +1,406 @@
1
+ """
2
+ Custom Vertex AI Claude chat model for LangGraph.
3
+
4
+ Lightweight BaseChatModel that calls Claude via Vertex AI's rawPredict REST API,
5
+ avoiding the heavy google-cloud-aiplatform dependency (~26s import time).
6
+
7
+ Supports tool calling for LangGraph react agent compatibility.
8
+ """
9
+
10
+ import json
11
+ from typing import Any, Dict, List, Optional, Tuple, Union
12
+
13
+ import httpx
14
+
15
+ from google.auth.transport.requests import Request
16
+ from google.oauth2 import service_account
17
+ from langchain_core.callbacks import CallbackManagerForLLMRun
18
+ from langchain_core.language_models.chat_models import BaseChatModel
19
+ from langchain_core.messages import (
20
+ AIMessage,
21
+ BaseMessage,
22
+ HumanMessage,
23
+ SystemMessage,
24
+ ToolMessage,
25
+ )
26
+ from langchain_core.outputs import ChatGeneration, ChatResult
27
+ from pydantic import ConfigDict
28
+
29
+ from botrun_flow_lang.utils.botrun_logger import get_default_botrun_logger
30
+
31
+ logger = get_default_botrun_logger()
32
+
33
+
34
+ class ChatVertexAIClaude(BaseChatModel):
35
+ """
36
+ Lightweight Vertex AI Claude chat model using rawPredict REST API.
37
+
38
+ Replaces ChatAnthropicVertex without importing google-cloud-aiplatform.
39
+ Supports tool calling for LangGraph react agent.
40
+
41
+ Usage:
42
+ model = ChatVertexAIClaude(
43
+ model="claude-sonnet-4-5-20250929",
44
+ project_id="my-project",
45
+ location="asia-east1",
46
+ credentials=my_credentials, # or service_account_file="path/to/sa.json"
47
+ )
48
+ """
49
+
50
+ model: str = "claude-sonnet-4-5-20250929"
51
+ max_tokens: int = 64000
52
+ temperature: float = 0
53
+ project_id: str = ""
54
+ location: str = "asia-east1"
55
+ credentials: Any = None
56
+ service_account_file: str = ""
57
+
58
+ model_config = ConfigDict(arbitrary_types_allowed=True)
59
+
60
+ @property
61
+ def _llm_type(self) -> str:
62
+ return "vertex-ai-claude-custom"
63
+
64
+ @property
65
+ def _identifying_params(self) -> Dict[str, Any]:
66
+ return {
67
+ "model": self.model,
68
+ "max_tokens": self.max_tokens,
69
+ "temperature": self.temperature,
70
+ "project_id": self.project_id,
71
+ "location": self.location,
72
+ }
73
+
74
+ def bind_tools(
75
+ self,
76
+ tools: List[Any],
77
+ *,
78
+ tool_choice: Optional[Union[str, Dict]] = None,
79
+ **kwargs,
80
+ ):
81
+ """Convert tools to Anthropic format and bind via Runnable.bind()."""
82
+ formatted_tools = _convert_tools_to_anthropic(tools)
83
+ bind_kwargs: Dict[str, Any] = {"tools": formatted_tools, **kwargs}
84
+ if tool_choice is not None:
85
+ bind_kwargs["tool_choice"] = tool_choice
86
+ return self.bind(**bind_kwargs)
87
+
88
+ def _get_access_token(self) -> str:
89
+ """Get OAuth2 access token for Vertex AI API."""
90
+ if self.credentials:
91
+ creds = self.credentials
92
+ elif self.service_account_file:
93
+ creds = service_account.Credentials.from_service_account_file(
94
+ self.service_account_file,
95
+ scopes=["https://www.googleapis.com/auth/cloud-platform"],
96
+ )
97
+ else:
98
+ raise ValueError(
99
+ "ChatVertexAIClaude requires either 'credentials' or 'service_account_file'"
100
+ )
101
+
102
+ if not creds.valid or creds.expired:
103
+ creds.refresh(Request())
104
+ return creds.token
105
+
106
+ def _convert_messages(
107
+ self, messages: List[BaseMessage]
108
+ ) -> Tuple[Union[str, List[Dict]], List[Dict]]:
109
+ """Convert LangChain messages to Anthropic API format.
110
+
111
+ Returns:
112
+ (system, api_messages) tuple.
113
+ system: str or list of content blocks (preserves cache_control).
114
+ api_messages: list of Anthropic-format message dicts.
115
+ """
116
+ system_blocks: List[Any] = []
117
+ raw_messages: List[Dict] = []
118
+
119
+ for msg in messages:
120
+ if isinstance(msg, SystemMessage):
121
+ if isinstance(msg.content, str):
122
+ system_blocks.append({"type": "text", "text": msg.content})
123
+ elif isinstance(msg.content, list):
124
+ for block in msg.content:
125
+ if isinstance(block, dict):
126
+ system_blocks.append(block)
127
+ elif isinstance(block, str):
128
+ system_blocks.append({"type": "text", "text": block})
129
+
130
+ elif isinstance(msg, HumanMessage):
131
+ raw_messages.append({"role": "user", "content": msg.content})
132
+
133
+ elif isinstance(msg, AIMessage):
134
+ content_blocks = []
135
+ if msg.content:
136
+ if isinstance(msg.content, str):
137
+ content_blocks.append(
138
+ {"type": "text", "text": msg.content}
139
+ )
140
+ elif isinstance(msg.content, list):
141
+ for block in msg.content:
142
+ if isinstance(block, str):
143
+ content_blocks.append(
144
+ {"type": "text", "text": block}
145
+ )
146
+ elif isinstance(block, dict):
147
+ content_blocks.append(block)
148
+ for tc in msg.tool_calls or []:
149
+ content_blocks.append(
150
+ {
151
+ "type": "tool_use",
152
+ "id": tc["id"],
153
+ "name": tc["name"],
154
+ "input": tc["args"],
155
+ }
156
+ )
157
+ raw_messages.append(
158
+ {
159
+ "role": "assistant",
160
+ "content": content_blocks if content_blocks else "",
161
+ }
162
+ )
163
+
164
+ elif isinstance(msg, ToolMessage):
165
+ tool_content = msg.content
166
+ if not isinstance(tool_content, str):
167
+ tool_content = json.dumps(tool_content, ensure_ascii=False)
168
+ raw_messages.append(
169
+ {
170
+ "role": "user",
171
+ "content": [
172
+ {
173
+ "type": "tool_result",
174
+ "tool_use_id": msg.tool_call_id,
175
+ "content": tool_content,
176
+ }
177
+ ],
178
+ }
179
+ )
180
+
181
+ merged = _merge_consecutive_messages(raw_messages)
182
+
183
+ # Return system as string (simple) or list (structured with cache_control)
184
+ has_cache_control = any(
185
+ isinstance(b, dict) and "cache_control" in b for b in system_blocks
186
+ )
187
+ if len(system_blocks) == 1 and not has_cache_control:
188
+ system: Union[str, List[Dict]] = system_blocks[0].get("text", "")
189
+ elif system_blocks:
190
+ system = system_blocks
191
+ else:
192
+ system = ""
193
+
194
+ return system, merged
195
+
196
+ def _generate(
197
+ self,
198
+ messages: List[BaseMessage],
199
+ stop: Optional[List[str]] = None,
200
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
201
+ **kwargs,
202
+ ) -> ChatResult:
203
+ """Call Vertex AI Claude via rawPredict (non-streaming)."""
204
+ system, api_messages = self._convert_messages(messages)
205
+ access_token = self._get_access_token()
206
+
207
+ url = (
208
+ f"https://{self.location}-aiplatform.googleapis.com/v1/"
209
+ f"projects/{self.project_id}/locations/{self.location}/"
210
+ f"publishers/anthropic/models/{self.model}:rawPredict"
211
+ )
212
+
213
+ payload: Dict[str, Any] = {
214
+ "anthropic_version": "vertex-2023-10-16",
215
+ "messages": api_messages,
216
+ "max_tokens": self.max_tokens,
217
+ "temperature": self.temperature,
218
+ }
219
+ if system:
220
+ payload["system"] = system
221
+ if stop:
222
+ payload["stop_sequences"] = stop
223
+
224
+ # Tools from bind_tools()
225
+ tools = kwargs.get("tools", [])
226
+ if tools:
227
+ payload["tools"] = tools
228
+
229
+ # Tool choice
230
+ tool_choice = kwargs.get("tool_choice")
231
+ if tool_choice:
232
+ if isinstance(tool_choice, str):
233
+ if tool_choice == "auto":
234
+ payload["tool_choice"] = {"type": "auto"}
235
+ elif tool_choice == "any":
236
+ payload["tool_choice"] = {"type": "any"}
237
+ elif tool_choice == "none":
238
+ payload.pop("tools", None)
239
+ else:
240
+ payload["tool_choice"] = {
241
+ "type": "tool",
242
+ "name": tool_choice,
243
+ }
244
+ elif isinstance(tool_choice, dict):
245
+ payload["tool_choice"] = tool_choice
246
+
247
+ logger.info(
248
+ f"[ChatVertexAIClaude] rawPredict: model={self.model}, "
249
+ f"location={self.location}, messages={len(api_messages)}, "
250
+ f"tools={len(tools)}"
251
+ )
252
+
253
+ # Make API call via httpx
254
+ data = _http_post_json(url, payload, access_token)
255
+
256
+ # Parse response
257
+ text_parts = []
258
+ tool_calls = []
259
+ for block in data.get("content", []):
260
+ block_type = block.get("type", "")
261
+ if block_type == "text":
262
+ text_parts.append(block.get("text", ""))
263
+ elif block_type == "tool_use":
264
+ tool_calls.append(
265
+ {
266
+ "id": block["id"],
267
+ "name": block["name"],
268
+ "args": block.get("input", {}),
269
+ }
270
+ )
271
+
272
+ usage = data.get("usage", {})
273
+ input_tokens = usage.get("input_tokens", 0)
274
+ output_tokens = usage.get("output_tokens", 0)
275
+
276
+ ai_message = AIMessage(
277
+ content="".join(text_parts),
278
+ tool_calls=tool_calls,
279
+ usage_metadata={
280
+ "input_tokens": input_tokens,
281
+ "output_tokens": output_tokens,
282
+ "total_tokens": input_tokens + output_tokens,
283
+ },
284
+ response_metadata={
285
+ "model": self.model,
286
+ "stop_reason": data.get("stop_reason", ""),
287
+ },
288
+ )
289
+
290
+ logger.info(
291
+ f"[ChatVertexAIClaude] Response: "
292
+ f"text_len={len(ai_message.content)}, "
293
+ f"tool_calls={len(tool_calls)}, "
294
+ f"tokens=({input_tokens}+{output_tokens}={input_tokens + output_tokens})"
295
+ )
296
+
297
+ return ChatResult(
298
+ generations=[ChatGeneration(message=ai_message)],
299
+ llm_output={
300
+ "model": self.model,
301
+ "usage": {
302
+ "input_tokens": input_tokens,
303
+ "output_tokens": output_tokens,
304
+ },
305
+ },
306
+ )
307
+
308
+
309
+ def _http_post_json(
310
+ url: str, payload: Dict[str, Any], access_token: str
311
+ ) -> Dict[str, Any]:
312
+ """POST JSON to URL with Bearer auth. Returns parsed JSON response."""
313
+ headers = {
314
+ "Content-Type": "application/json",
315
+ "Authorization": f"Bearer {access_token}",
316
+ }
317
+ try:
318
+ with httpx.Client(timeout=300.0) as client:
319
+ response = client.post(url, headers=headers, json=payload)
320
+ if response.status_code != 200:
321
+ error_msg = (
322
+ f"Vertex AI API error: {response.status_code} - {response.text}"
323
+ )
324
+ logger.error(f"[ChatVertexAIClaude] {error_msg}")
325
+ raise Exception(error_msg)
326
+ return response.json()
327
+ except httpx.HTTPStatusError as e:
328
+ error_body = e.response.text if e.response else ""
329
+ error_msg = f"Vertex AI API error: {e.response.status_code} - {error_body}"
330
+ logger.error(f"[ChatVertexAIClaude] {error_msg}")
331
+ raise Exception(error_msg) from e
332
+
333
+
334
+ def _convert_tools_to_anthropic(tools: List[Any]) -> List[Dict]:
335
+ """Convert LangChain tools to Anthropic tool format."""
336
+ from langchain_core.utils.function_calling import convert_to_openai_tool
337
+
338
+ anthropic_tools = []
339
+ for tool in tools:
340
+ if isinstance(tool, dict):
341
+ if "input_schema" in tool:
342
+ anthropic_tools.append(tool)
343
+ elif "function" in tool:
344
+ func = tool["function"]
345
+ anthropic_tools.append(
346
+ {
347
+ "name": func["name"],
348
+ "description": func.get("description", ""),
349
+ "input_schema": func.get(
350
+ "parameters",
351
+ {"type": "object", "properties": {}},
352
+ ),
353
+ }
354
+ )
355
+ else:
356
+ anthropic_tools.append(tool)
357
+ else:
358
+ try:
359
+ oai_tool = convert_to_openai_tool(tool)
360
+ func = oai_tool["function"]
361
+ anthropic_tools.append(
362
+ {
363
+ "name": func["name"],
364
+ "description": func.get("description", ""),
365
+ "input_schema": func.get(
366
+ "parameters",
367
+ {"type": "object", "properties": {}},
368
+ ),
369
+ }
370
+ )
371
+ except Exception as e:
372
+ logger.warning(
373
+ f"[ChatVertexAIClaude] Failed to convert tool "
374
+ f"{getattr(tool, 'name', tool)}: {e}"
375
+ )
376
+
377
+ return anthropic_tools
378
+
379
+
380
+ def _merge_consecutive_messages(messages: List[Dict]) -> List[Dict]:
381
+ """Merge consecutive messages with the same role (required by Anthropic API)."""
382
+ if not messages:
383
+ return []
384
+
385
+ merged: List[Dict] = []
386
+ for msg in messages:
387
+ if merged and merged[-1]["role"] == msg["role"]:
388
+ prev_content = merged[-1]["content"]
389
+ curr_content = msg["content"]
390
+
391
+ # Normalize to list of content blocks
392
+ if isinstance(prev_content, str):
393
+ prev_content = [{"type": "text", "text": prev_content}]
394
+ elif not isinstance(prev_content, list):
395
+ prev_content = [prev_content]
396
+
397
+ if isinstance(curr_content, str):
398
+ curr_content = [{"type": "text", "text": curr_content}]
399
+ elif not isinstance(curr_content, list):
400
+ curr_content = [curr_content]
401
+
402
+ merged[-1]["content"] = prev_content + curr_content
403
+ else:
404
+ merged.append(msg)
405
+
406
+ return merged
@@ -4,8 +4,11 @@ import httpx
4
4
  import os
5
5
  import imghdr
6
6
  from pathlib import Path
7
+ from typing import Dict, Any, List, Tuple
7
8
  from dotenv import load_dotenv
8
9
 
10
+ from botrun_flow_lang.langgraph_agents.agents.util.usage_metadata import UsageMetadata
11
+
9
12
  load_dotenv()
10
13
 
11
14
 
@@ -50,7 +53,7 @@ def get_img_content_type(file_path: str | Path) -> str:
50
53
 
51
54
  def analyze_imgs_with_claude(
52
55
  img_urls: list[str], user_input: str, model_name: str = "claude-sonnet-4-5-20250929"
53
- ) -> str:
56
+ ) -> Tuple[str, UsageMetadata]:
54
57
  """
55
58
  Analyze multiple images using Claude Vision API
56
59
 
@@ -60,7 +63,7 @@ def analyze_imgs_with_claude(
60
63
  model_name: Claude model name to use
61
64
 
62
65
  Returns:
63
- str: Claude's analysis of the image content(s) based on the query
66
+ Tuple[str, UsageMetadata]: Claude's analysis and usage metadata
64
67
 
65
68
  Raises:
66
69
  ValueError: If image URLs are invalid or model parameters are incorrect
@@ -120,10 +123,20 @@ def analyze_imgs_with_claude(
120
123
  ],
121
124
  )
122
125
 
126
+ # Extract usage metadata
127
+ usage = UsageMetadata(
128
+ prompt_tokens=message.usage.input_tokens,
129
+ completion_tokens=message.usage.output_tokens,
130
+ total_tokens=message.usage.input_tokens + message.usage.output_tokens,
131
+ cache_creation_input_tokens=getattr(message.usage, 'cache_creation_input_tokens', 0) or 0,
132
+ cache_read_input_tokens=getattr(message.usage, 'cache_read_input_tokens', 0) or 0,
133
+ model=model_name,
134
+ )
135
+
123
136
  print(
124
137
  f"analyze_imgs_with_claude============> input_token: {message.usage.input_tokens} output_token: {message.usage.output_tokens}",
125
138
  )
126
- return message.content[0].text
139
+ return message.content[0].text, usage
127
140
  except anthropic.APIError as e:
128
141
  import traceback
129
142
 
@@ -144,7 +157,7 @@ def analyze_imgs_with_gemini(
144
157
  img_urls: list[str],
145
158
  user_input: str,
146
159
  model_name: str = "gemini-2.5-flash",
147
- ) -> str:
160
+ ) -> Tuple[str, UsageMetadata]:
148
161
  """
149
162
  Analyze multiple images using Gemini Vision API
150
163
 
@@ -154,7 +167,7 @@ def analyze_imgs_with_gemini(
154
167
  model_name: Gemini model name to use
155
168
 
156
169
  Returns:
157
- str: Gemini's analysis of the image content(s) based on the query
170
+ Tuple[str, UsageMetadata]: Gemini's analysis and usage metadata
158
171
 
159
172
  Raises:
160
173
  ValueError: If image URLs are invalid or model parameters are incorrect
@@ -216,10 +229,23 @@ def analyze_imgs_with_gemini(
216
229
  contents=contents,
217
230
  )
218
231
 
219
- print(
220
- f"analyze_imgs_with_gemini============> input_token: {response.usage_metadata.prompt_token_count} output_token: {response.usage_metadata.candidates_token_count}"
221
- )
222
- return response.text
232
+ # Extract usage metadata
233
+ usage = UsageMetadata(model=model_name)
234
+ if hasattr(response, "usage_metadata"):
235
+ usage_meta = response.usage_metadata
236
+ usage = UsageMetadata(
237
+ prompt_tokens=getattr(usage_meta, 'prompt_token_count', 0) or 0,
238
+ completion_tokens=getattr(usage_meta, 'candidates_token_count', 0) or 0,
239
+ total_tokens=getattr(usage_meta, 'total_token_count', 0) or 0,
240
+ cache_creation_input_tokens=0,
241
+ cache_read_input_tokens=getattr(usage_meta, 'cached_content_token_count', 0) or 0,
242
+ model=model_name,
243
+ )
244
+ print(
245
+ f"analyze_imgs_with_gemini============> input_token: {usage_meta.prompt_token_count} output_token: {usage_meta.candidates_token_count}"
246
+ )
247
+
248
+ return response.text, usage
223
249
 
224
250
  except httpx.RequestError as e:
225
251
  import traceback
@@ -233,7 +259,7 @@ def analyze_imgs_with_gemini(
233
259
  raise Exception(f"Error analyzing image(s) with Gemini {model_name}: {str(e)}")
234
260
 
235
261
 
236
- def analyze_imgs(img_urls: list[str], user_input: str) -> str:
262
+ def analyze_imgs(img_urls: list[str], user_input: str) -> Dict[str, Any]:
237
263
  """
238
264
  Analyze multiple images using configured AI models.
239
265
 
@@ -248,8 +274,13 @@ def analyze_imgs(img_urls: list[str], user_input: str) -> str:
248
274
  user_input: User's query about the image content(s)
249
275
 
250
276
  Returns:
251
- str: AI analysis of the image content(s) based on the query
277
+ Dict[str, Any]: {
278
+ "result": str, # AI analysis result
279
+ "usage_metadata": List[Dict] # Token usage for each LLM call
280
+ }
252
281
  """
282
+ usage_list: List[UsageMetadata] = []
283
+
253
284
  # Get models from environment variable, split by comma if multiple models
254
285
  models_str = os.getenv("IMG_ANALYZER_MODEL", "gemini-2.5-flash")
255
286
  print(f"[analyze_imgs] 分析IMG使用模型: {models_str}")
@@ -267,12 +298,20 @@ def analyze_imgs(img_urls: list[str], user_input: str) -> str:
267
298
  try:
268
299
  if model.startswith("gemini-"):
269
300
  print(f"[analyze_imgs] 嘗試使用 Gemini 模型: {model}")
270
- result = analyze_imgs_with_gemini(img_urls, user_input, model)
271
- return result
301
+ result, usage = analyze_imgs_with_gemini(img_urls, user_input, model)
302
+ usage_list.append(usage)
303
+ return {
304
+ "result": result,
305
+ "usage_metadata": [u.to_dict() for u in usage_list],
306
+ }
272
307
  elif model.startswith("claude-"):
273
308
  print(f"[analyze_imgs] 嘗試使用 Claude 模型: {model}")
274
- result = analyze_imgs_with_claude(img_urls, user_input, model)
275
- return result
309
+ result, usage = analyze_imgs_with_claude(img_urls, user_input, model)
310
+ usage_list.append(usage)
311
+ return {
312
+ "result": result,
313
+ "usage_metadata": [u.to_dict() for u in usage_list],
314
+ }
276
315
  else:
277
316
  print(f"[analyze_imgs] 不支持的模型格式: {model}, 跳過")
278
317
  errors.append(f"不支持的模型格式: {model}")
@@ -291,4 +330,7 @@ def analyze_imgs(img_urls: list[str], user_input: str) -> str:
291
330
 
292
331
  # If we've tried all models and none succeeded, return all errors
293
332
  error_summary = "\n".join(errors)
294
- return f"錯誤: 所有配置的模型都失敗了。詳細錯誤:\n{error_summary}"
333
+ return {
334
+ "result": f"錯誤: 所有配置的模型都失敗了。詳細錯誤:\n{error_summary}",
335
+ "usage_metadata": [u.to_dict() for u in usage_list],
336
+ }