dslighting 1.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. dsat/__init__.py +3 -0
  2. dsat/benchmark/__init__.py +1 -0
  3. dsat/benchmark/benchmark.py +168 -0
  4. dsat/benchmark/datasci.py +291 -0
  5. dsat/benchmark/mle.py +777 -0
  6. dsat/benchmark/sciencebench.py +304 -0
  7. dsat/common/__init__.py +0 -0
  8. dsat/common/constants.py +11 -0
  9. dsat/common/exceptions.py +48 -0
  10. dsat/common/typing.py +19 -0
  11. dsat/config.py +79 -0
  12. dsat/models/__init__.py +3 -0
  13. dsat/models/candidates.py +16 -0
  14. dsat/models/formats.py +52 -0
  15. dsat/models/task.py +64 -0
  16. dsat/operators/__init__.py +0 -0
  17. dsat/operators/aflow_ops.py +90 -0
  18. dsat/operators/autokaggle_ops.py +170 -0
  19. dsat/operators/automind_ops.py +38 -0
  20. dsat/operators/base.py +22 -0
  21. dsat/operators/code.py +45 -0
  22. dsat/operators/dsagent_ops.py +123 -0
  23. dsat/operators/llm_basic.py +84 -0
  24. dsat/prompts/__init__.py +0 -0
  25. dsat/prompts/aflow_prompt.py +76 -0
  26. dsat/prompts/aide_prompt.py +52 -0
  27. dsat/prompts/autokaggle_prompt.py +290 -0
  28. dsat/prompts/automind_prompt.py +29 -0
  29. dsat/prompts/common.py +51 -0
  30. dsat/prompts/data_interpreter_prompt.py +82 -0
  31. dsat/prompts/dsagent_prompt.py +88 -0
  32. dsat/runner.py +554 -0
  33. dsat/services/__init__.py +0 -0
  34. dsat/services/data_analyzer.py +387 -0
  35. dsat/services/llm.py +486 -0
  36. dsat/services/llm_single.py +421 -0
  37. dsat/services/sandbox.py +386 -0
  38. dsat/services/states/__init__.py +0 -0
  39. dsat/services/states/autokaggle_state.py +43 -0
  40. dsat/services/states/base.py +14 -0
  41. dsat/services/states/dsa_log.py +13 -0
  42. dsat/services/states/experience.py +237 -0
  43. dsat/services/states/journal.py +153 -0
  44. dsat/services/states/operator_library.py +290 -0
  45. dsat/services/vdb.py +76 -0
  46. dsat/services/workspace.py +178 -0
  47. dsat/tasks/__init__.py +3 -0
  48. dsat/tasks/handlers.py +376 -0
  49. dsat/templates/open_ended/grade_template.py +107 -0
  50. dsat/tools/__init__.py +4 -0
  51. dsat/utils/__init__.py +0 -0
  52. dsat/utils/context.py +172 -0
  53. dsat/utils/dynamic_import.py +71 -0
  54. dsat/utils/parsing.py +33 -0
  55. dsat/workflows/__init__.py +12 -0
  56. dsat/workflows/base.py +53 -0
  57. dsat/workflows/factory.py +439 -0
  58. dsat/workflows/manual/__init__.py +0 -0
  59. dsat/workflows/manual/autokaggle_workflow.py +148 -0
  60. dsat/workflows/manual/data_interpreter_workflow.py +153 -0
  61. dsat/workflows/manual/deepanalyze_workflow.py +484 -0
  62. dsat/workflows/manual/dsagent_workflow.py +76 -0
  63. dsat/workflows/search/__init__.py +0 -0
  64. dsat/workflows/search/aflow_workflow.py +344 -0
  65. dsat/workflows/search/aide_workflow.py +283 -0
  66. dsat/workflows/search/automind_workflow.py +237 -0
  67. dsat/workflows/templates/__init__.py +0 -0
  68. dsat/workflows/templates/basic_kaggle_loop.py +71 -0
  69. dslighting/__init__.py +170 -0
  70. dslighting/core/__init__.py +13 -0
  71. dslighting/core/agent.py +646 -0
  72. dslighting/core/config_builder.py +318 -0
  73. dslighting/core/data_loader.py +422 -0
  74. dslighting/core/task_detector.py +422 -0
  75. dslighting/utils/__init__.py +19 -0
  76. dslighting/utils/defaults.py +151 -0
  77. dslighting-1.3.9.dist-info/METADATA +554 -0
  78. dslighting-1.3.9.dist-info/RECORD +80 -0
  79. dslighting-1.3.9.dist-info/WHEEL +5 -0
  80. dslighting-1.3.9.dist-info/top_level.txt +2 -0
dsat/services/llm.py ADDED
@@ -0,0 +1,486 @@
1
+ # dsat/services/llm.py
2
+
3
+ """
4
+ Unified, asynchronous LLM service powered by LiteLLM.
5
+ Provides a simple interface for standard calls, structured JSON output,
6
+ and automatic cost tracking.
7
+ """
8
+ import logging
9
+ import asyncio
10
+ import yaml
11
+ import copy
12
+ import time
13
+ import uuid
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+ from typing import Type, Optional, Any, Dict, List
17
+
18
+ import litellm
19
+ from pydantic import BaseModel, ValidationError
20
+
21
+ from dsat.config import LLMConfig # Use the main pydantic config
22
+ from dsat.common.exceptions import LLMError
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # Configure LiteLLM globally
27
+ litellm.telemetry = False # Disable anonymous telemetry
28
+ litellm.input_callbacks = []
29
+ litellm.success_callbacks = []
30
+ litellm.failure_callbacks = []
31
+
32
+ # Load custom model pricing from YAML configuration file
33
+ def _load_custom_model_pricing():
34
+ """Load custom model pricing configuration from config.yaml file."""
35
+ try:
36
+ # Get the path to the config.yaml file relative to this module
37
+ current_dir = Path(__file__).parent
38
+ framework_dir = current_dir.parent.parent # Go up to ds_agent_framework
39
+ config_yaml_path = framework_dir / "config.yaml"
40
+
41
+ if config_yaml_path.exists():
42
+ with open(config_yaml_path, 'r', encoding='utf-8') as f:
43
+ config = yaml.safe_load(f)
44
+ return config.get('custom_model_pricing', {})
45
+ else:
46
+ # Changed to debug to avoid confusing warnings for pip-installed packages
47
+ logger.debug(f"Config file not found at {config_yaml_path} (this is expected for pip-installed packages)")
48
+ return {}
49
+ except Exception as e:
50
+ logger.error(f"Failed to load cost configuration: {e}")
51
+ return {}
52
+
53
+ # Load and apply custom model pricing
54
+ CUSTOM_MODEL_PRICING = _load_custom_model_pricing()
55
+ if CUSTOM_MODEL_PRICING:
56
+ litellm.model_cost.update(CUSTOM_MODEL_PRICING)
57
+ logger.info(f"Loaded custom model pricing for {len(CUSTOM_MODEL_PRICING)} models")
58
+
59
+ class LLMService:
60
+ """
61
+ A robust wrapper around LiteLLM that handles requests, structured formatting,
62
+ and cost tracking. It's configured via the main DSATConfig's LLMConfig.
63
+ """
64
+ def __init__(self, config: LLMConfig):
65
+ self.config = config
66
+ self.total_cost = 0.0
67
+ self.total_prompt_tokens = 0
68
+ self.total_completion_tokens = 0
69
+ self.total_prompt_cost = 0.0
70
+ self.total_completion_cost = 0.0
71
+ self.call_history: List[Dict[str, Any]] = []
72
+
73
+ # Parse API key pool from string list format
74
+ import json
75
+ try:
76
+ self.api_keys = json.loads(config.api_key) if config.api_key and config.api_key.startswith('[') else [config.api_key]
77
+ except:
78
+ self.api_keys = [config.api_key]
79
+ self.current_key_index = 0
80
+ logger.info(f"Initialized LLM service with {len(self.api_keys)} API keys")
81
+
82
+ def _get_current_api_key(self) -> str:
83
+ """Get current API key from the pool."""
84
+ return self.api_keys[self.current_key_index]
85
+
86
+ def _switch_api_key(self) -> bool:
87
+ """Switch to next API key in pool. Returns True if switched, False if no more keys."""
88
+ if len(self.api_keys) <= 1:
89
+ return False
90
+ self.current_key_index = (self.current_key_index + 1) % len(self.api_keys)
91
+ logger.warning(f"Switching to API key {self.current_key_index + 1}/{len(self.api_keys)}")
92
+ return True
93
+
94
+ @staticmethod
95
+ def _safe_float(value: Any) -> Optional[float]:
96
+ try:
97
+ return float(value)
98
+ except (TypeError, ValueError):
99
+ return None
100
+
101
+ @staticmethod
102
+ def _safe_int(value: Any) -> Optional[int]:
103
+ try:
104
+ return int(value)
105
+ except (TypeError, ValueError):
106
+ return None
107
+
108
+ def _is_retryable_error(self, error: Exception) -> bool:
109
+ """
110
+ Determines whether an error is retryable based on litellm exception types.
111
+ Only network timeouts, rate limits, and temporary service issues should be retried.
112
+ Authentication errors, invalid requests, and other permanent failures should not be retried.
113
+ """
114
+ # Import litellm exceptions locally to avoid import issues
115
+ try:
116
+ import litellm.exceptions as litellm_exceptions
117
+ except ImportError:
118
+ # If litellm exceptions module is not available, be conservative and retry
119
+ return True
120
+
121
+ # Non-retryable errors - fail immediately
122
+ non_retryable_errors = (
123
+ litellm_exceptions.AuthenticationError, # API key issues
124
+ litellm_exceptions.InvalidRequestError, # Request format/parameter issues
125
+ litellm_exceptions.PermissionDeniedError, # Insufficient permissions
126
+ litellm_exceptions.NotFoundError, # Model/endpoint not found
127
+ )
128
+
129
+ # Retryable errors - can be retried
130
+ retryable_errors = (
131
+ litellm_exceptions.RateLimitError, # Rate limit exceeded
132
+ litellm_exceptions.ServiceUnavailableError, # Temporary service issues
133
+ litellm_exceptions.Timeout, # Network timeout
134
+ litellm_exceptions.APIConnectionError, # Connection issues
135
+ litellm_exceptions.InternalServerError, # Server-side temporary issues
136
+ )
137
+
138
+ # Check for specific error types
139
+ if isinstance(error, non_retryable_errors):
140
+ return False
141
+ elif isinstance(error, retryable_errors):
142
+ return True
143
+ else:
144
+ # For unknown errors, be conservative and retry
145
+ # This handles generic network errors, etc.
146
+ return True
147
+
148
+ def _supports_response_format(self) -> bool:
149
+ """
150
+ Whether it's safe to pass `response_format` through LiteLLM for this model.
151
+
152
+ Some OpenAI reasoning models (e.g. `o4-mini-*`) reject `response_format` and
153
+ require JSON-only behavior to be enforced via prompt instead.
154
+ """
155
+ raw_model = (self.config.model or "").strip()
156
+ model = raw_model.split("/")[-1].strip()
157
+ model_lower = model.lower()
158
+ if model_lower.startswith("o4-mini-") or model_lower == "o4-mini":
159
+ return False
160
+ if model_lower == "kimi-k2-instruct-0905":
161
+ return False
162
+ return True
163
+
164
+ @staticmethod
165
+ def _is_insufficient_balance_error(error: Exception) -> bool:
166
+ """
167
+ Detect insufficient balance/quota errors that should trigger API key rotation.
168
+ """
169
+ message = str(error).lower()
170
+ return (
171
+ "insufficient" in message and "balance" in message
172
+ ) or "insufficient_quota" in message or "insufficient quota" in message
173
+
174
+ async def _make_llm_call_with_retries(
175
+ self, messages: list, response_format: Optional[dict] = None, max_retries: int = 3, base_delay: float = 1.0
176
+ ):
177
+ """
178
+ Internal method to make LLM calls with centralized retry logic and exponential backoff.
179
+ This method is the single point of contact with the LiteLLM library.
180
+
181
+ Args:
182
+ messages: List of message dictionaries for the LLM.
183
+ response_format: Optional response format specification.
184
+ max_retries: Maximum number of retry attempts.
185
+ base_delay: Base delay in seconds for exponential backoff.
186
+
187
+ Returns:
188
+ The raw LiteLLM response object upon success.
189
+
190
+ Raises:
191
+ LLMError: If all retry attempts fail due to API errors or empty responses.
192
+ """
193
+ if response_format and not self._supports_response_format():
194
+ logger.info(
195
+ "Dropping unsupported `response_format` for model %s; using prompt-enforced JSON instead.",
196
+ self.config.model,
197
+ )
198
+ response_format = None
199
+
200
+ logger.info(f"prompt: {messages[-1]['content']}")
201
+ last_exception = None
202
+ for attempt in range(max_retries):
203
+ call_id = uuid.uuid4().hex
204
+ call_started_at = datetime.utcnow()
205
+ perf_start = time.perf_counter()
206
+ try:
207
+ kwargs = {
208
+ "model": self.config.model,
209
+ "messages": messages,
210
+ "temperature": self.config.temperature,
211
+ "api_key": self._get_current_api_key(),
212
+ "api_base": self.config.api_base
213
+ }
214
+ if self.config.provider:
215
+ kwargs["custom_llm_provider"] = self.config.provider
216
+
217
+ if response_format:
218
+ kwargs["response_format"] = response_format
219
+
220
+ response = await litellm.acompletion(**kwargs)
221
+
222
+ try:
223
+ content = response.choices[0].message.content
224
+ if content and content.strip():
225
+ duration = time.perf_counter() - perf_start
226
+ self._record_successful_call(
227
+ call_id=call_id,
228
+ call_started_at=call_started_at,
229
+ duration=duration,
230
+ messages=messages,
231
+ response=response,
232
+ content=content,
233
+ response_format=response_format
234
+ )
235
+ return response # Success!
236
+ else:
237
+ # Treat empty response as a failure to be retried
238
+ logger.warning(f"LLM returned an empty response on attempt {attempt + 1}/{max_retries}.")
239
+ last_exception = LLMError("LLM returned an empty response.")
240
+ except (IndexError, AttributeError) as content_error:
241
+ logger.warning(f"Invalid response structure on attempt {attempt + 1}/{max_retries}: {content_error}")
242
+ last_exception = LLMError(f"Invalid response structure: {content_error}")
243
+
244
+ except Exception as e:
245
+ # Special handling for RateLimitError - switch API key immediately
246
+ import litellm.exceptions as litellm_exceptions
247
+ if isinstance(e, litellm_exceptions.RateLimitError):
248
+ logger.warning(f"RateLimitError on attempt {attempt + 1}/{max_retries}: {e}")
249
+ if self._switch_api_key():
250
+ logger.info(f"Switched to next API key, retrying immediately...")
251
+ last_exception = e
252
+ continue # Skip delay, retry immediately with new key
253
+ else:
254
+ logger.warning(f"No more API keys to switch, will use retry delay")
255
+ last_exception = e
256
+ elif isinstance(e, litellm_exceptions.APIError) and self._is_insufficient_balance_error(e):
257
+ logger.warning(f"Insufficient balance on attempt {attempt + 1}/{max_retries}: {e}")
258
+ if self._switch_api_key():
259
+ logger.info("Switched to next API key after insufficient balance, retrying immediately...")
260
+ last_exception = e
261
+ continue
262
+ else:
263
+ logger.warning("No more API keys to switch after insufficient balance, will use retry delay")
264
+ last_exception = e
265
+ # Check if this is a retryable error
266
+ elif self._is_retryable_error(e):
267
+ logger.warning(f"Retryable LLM error on attempt {attempt + 1}/{max_retries}: {e}")
268
+ last_exception = e
269
+ logger.debug(f"Debug info - messages: {messages}, response_format: {response_format if response_format else 'None'}")
270
+ else:
271
+ # Non-retryable error - fail immediately
272
+ logger.error(f"Non-retryable LLM error: {e}")
273
+ raise LLMError(f"LLM call failed with non-retryable error: {e}") from e
274
+
275
+ # If this was the last attempt, break the loop to raise the final error
276
+ if attempt == max_retries - 1:
277
+ break
278
+
279
+ # Exponential backoff with jitter
280
+ delay = base_delay * (3 ** attempt) + (asyncio.get_event_loop().time() % 1)
281
+ logger.info(f"Retrying LLM call in {delay:.2f} seconds ({attempt + 2}/{max_retries})...")
282
+ await asyncio.sleep(delay)
283
+
284
+ raise LLMError(f"LLM call failed after {max_retries} attempts. Last error: {last_exception}") from last_exception
285
+
286
+ def _record_successful_call(
287
+ self,
288
+ call_id: str,
289
+ call_started_at: datetime,
290
+ duration: float,
291
+ messages: list,
292
+ response: Any,
293
+ content: str,
294
+ response_format: Optional[dict],
295
+ ) -> None:
296
+ """
297
+ 将一次成功的调用附加到历史中,并更新累计 token / 费用。
298
+ """
299
+ usage_payload = self._extract_usage(response)
300
+ try:
301
+ call_cost_raw = litellm.completion_cost(completion_response=response)
302
+ call_cost = float(call_cost_raw) if call_cost_raw is not None else 0.0
303
+ except Exception:
304
+ call_cost = 0.0
305
+
306
+ self.total_cost += call_cost
307
+
308
+ prompt_tokens = usage_payload.get("prompt_tokens") if usage_payload else None
309
+ completion_tokens = usage_payload.get("completion_tokens") if usage_payload else None
310
+
311
+ if prompt_tokens:
312
+ self.total_prompt_tokens += prompt_tokens
313
+ if completion_tokens:
314
+ self.total_completion_tokens += completion_tokens
315
+
316
+ prompt_cost_val = usage_payload.get("prompt_tokens_cost") if usage_payload else None
317
+ completion_cost_val = usage_payload.get("completion_tokens_cost") if usage_payload else None
318
+
319
+ if prompt_cost_val is not None:
320
+ self.total_prompt_cost += prompt_cost_val
321
+ if completion_cost_val is not None:
322
+ self.total_completion_cost += completion_cost_val
323
+
324
+ total_tokens = usage_payload.get("total_tokens") if usage_payload else None
325
+ cost_per_token = (call_cost / total_tokens) if total_tokens else None
326
+
327
+ history_entry = {
328
+ "call_id": call_id,
329
+ "model": self.config.model,
330
+ "provider": self.config.provider,
331
+ "timestamp_utc": call_started_at.isoformat() + "Z",
332
+ "duration_seconds": round(duration, 4),
333
+ "response_format": "json_object" if response_format else "text",
334
+ "messages": copy.deepcopy(messages),
335
+ "response": content,
336
+ "usage": usage_payload or None,
337
+ "cost": call_cost,
338
+ "cost_per_token": cost_per_token,
339
+ }
340
+ self.call_history.append(history_entry)
341
+ logger.info(f"LLM call complete. Model: {self.config.model}, Cost: ${call_cost:.6f}")
342
+
343
+ def _extract_usage(self, response: Any) -> Dict[str, Any]:
344
+ """
345
+ 从 LiteLLM Response 中提取 token / 费用信息,确保可 JSON 序列化。
346
+ """
347
+ usage = getattr(response, "usage", None)
348
+ if not usage:
349
+ return {}
350
+
351
+ payload: Dict[str, Any] = {
352
+ "prompt_tokens": self._safe_int(getattr(usage, "prompt_tokens", None)),
353
+ "completion_tokens": self._safe_int(getattr(usage, "completion_tokens", None)),
354
+ "total_tokens": self._safe_int(getattr(usage, "total_tokens", None)),
355
+ "prompt_tokens_cost": self._safe_float(getattr(usage, "prompt_tokens_cost", None)),
356
+ "completion_tokens_cost": self._safe_float(getattr(usage, "completion_tokens_cost", None)),
357
+ }
358
+ total_tokens_cost = self._safe_float(getattr(usage, "total_tokens_cost", None))
359
+ if total_tokens_cost is None:
360
+ prompt_cost = payload.get("prompt_tokens_cost")
361
+ completion_cost = payload.get("completion_tokens_cost")
362
+ if prompt_cost is not None and completion_cost is not None:
363
+ total_tokens_cost = prompt_cost + completion_cost
364
+ payload["total_tokens_cost"] = total_tokens_cost
365
+ return payload
366
+
367
+ async def call(self, prompt: str, system_message: Optional[str] = None, max_retries: Optional[int] = None) -> str:
368
+ """
369
+ Makes a standard, asynchronous call to the LLM and returns the text response.
370
+ The retry logic is handled by the internal _make_llm_call_with_retries method.
371
+
372
+ Args:
373
+ prompt: The user's prompt.
374
+ system_message: An optional system message to guide the LLM's behavior.
375
+ max_retries: Maximum number of retry attempts (default: 3).
376
+
377
+ Returns:
378
+ The string content of the LLM's response.
379
+ """
380
+ retries = max_retries if max_retries is not None else self.config.max_retries
381
+ messages = []
382
+ if system_message:
383
+ messages.append({"role": "system", "content": system_message})
384
+ messages.append({"role": "user", "content": prompt})
385
+
386
+ logger.debug(f"Calling LLM ({self.config.model}) with prompt: {prompt[:100]}...")
387
+
388
+ response = await self._make_llm_call_with_retries(messages, max_retries=retries)
389
+ content = response.choices[0].message.content
390
+ logger.info(f"content: {content}")
391
+ return content
392
+
393
+ async def call_with_json(self, prompt: str, output_model: Type[BaseModel], max_retries: Optional[int] = None) -> BaseModel:
394
+ """
395
+ Calls the LLM and forces the output to be a JSON object conforming to the
396
+ provided Pydantic model. The retry logic is handled by the internal method.
397
+
398
+ Args:
399
+ prompt: The user's prompt.
400
+ output_model: The Pydantic model class for the desired output structure.
401
+ max_retries: Maximum number of retry attempts (default: 3).
402
+
403
+ Returns:
404
+ An instantiated Pydantic model with the LLM's response.
405
+ """
406
+ retries = max_retries if max_retries is not None else self.config.max_retries
407
+ system_message = (
408
+ "You are a helpful assistant that always responds with a JSON object "
409
+ "that strictly adheres to the provided JSON Schema. Do not add any "
410
+ "other text, explanations, or markdown formatting."
411
+ )
412
+
413
+ prompt_with_schema = (
414
+ f"{prompt}\n\n# RESPONSE JSON SCHEMA:\n"
415
+ f"```json\n{output_model.model_json_schema()}\n```"
416
+ )
417
+
418
+ messages = [
419
+ {"role": "system", "content": system_message},
420
+ {"role": "user", "content": prompt_with_schema}
421
+ ]
422
+
423
+ logger.debug(f"Calling LLM ({self.config.model}) for structured JSON output...")
424
+
425
+ for attempt in range(max(1, retries)):
426
+ if self._supports_response_format():
427
+ response = await self._make_llm_call_with_retries(
428
+ messages,
429
+ response_format={"type": "json_object"},
430
+ max_retries=retries,
431
+ )
432
+ else:
433
+ if attempt == 0:
434
+ logger.info(
435
+ "Model %s does not support `response_format`; falling back to prompt-enforced JSON.",
436
+ self.config.model,
437
+ )
438
+ response = await self._make_llm_call_with_retries(
439
+ messages,
440
+ response_format=None,
441
+ max_retries=retries,
442
+ )
443
+
444
+ try:
445
+ response_content = response.choices[0].message.content
446
+ logger.info(f"content: {response_content}")
447
+ except (IndexError, AttributeError) as e:
448
+ raise LLMError(f"Invalid response structure from LLM: {e}") from e
449
+
450
+ try:
451
+ return output_model.model_validate_json(response_content)
452
+ except ValidationError as e:
453
+ logger.error(
454
+ "Failed to validate LLM JSON response against Pydantic model (attempt %d/%d): %s",
455
+ attempt + 1,
456
+ retries,
457
+ e,
458
+ )
459
+ logger.debug("Invalid JSON received: %s", response_content)
460
+ if attempt + 1 >= retries:
461
+ raise LLMError(
462
+ f"LLM returned invalid JSON that could not be parsed: {e}"
463
+ ) from e
464
+
465
+ def get_total_cost(self) -> float:
466
+ """Returns the total accumulated cost for this LLM instance."""
467
+ return self.total_cost
468
+
469
+ def get_call_history(self) -> List[Dict[str, Any]]:
470
+ """Returns a deep copy of the call history for telemetry persistence."""
471
+ return copy.deepcopy(self.call_history)
472
+
473
+ def get_usage_summary(self) -> Dict[str, Any]:
474
+ """汇总本实例的 token/费用信息。"""
475
+ total_tokens = self.total_prompt_tokens + self.total_completion_tokens
476
+ summary = {
477
+ "prompt_tokens": self.total_prompt_tokens,
478
+ "completion_tokens": self.total_completion_tokens,
479
+ "total_tokens": total_tokens,
480
+ "prompt_tokens_cost": round(self.total_prompt_cost, 12),
481
+ "completion_tokens_cost": round(self.total_completion_cost, 12),
482
+ "total_cost": round(self.total_cost, 12),
483
+ "call_count": len(self.call_history),
484
+ }
485
+ summary["cost_per_token"] = (self.total_cost / total_tokens) if total_tokens else None
486
+ return summary