realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Token Tracker - LLM Token usage and cost tracking system (Advanced)
|
|
5
|
+
Uses tiktoken for precise token counting, supports multiple models and more accurate cost calculation
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import asdict, dataclass, field
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
import json
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
# Try importing tiktoken (if available)
|
|
14
|
+
try:
|
|
15
|
+
import tiktoken
|
|
16
|
+
|
|
17
|
+
TIKTOKEN_AVAILABLE = True
|
|
18
|
+
except ImportError:
|
|
19
|
+
TIKTOKEN_AVAILABLE = False
|
|
20
|
+
tiktoken = None
|
|
21
|
+
|
|
22
|
+
# Try importing litellm (optional advanced library)
|
|
23
|
+
try:
|
|
24
|
+
import litellm
|
|
25
|
+
from litellm import token_counter
|
|
26
|
+
|
|
27
|
+
LITELLM_AVAILABLE = True
|
|
28
|
+
except ImportError:
|
|
29
|
+
LITELLM_AVAILABLE = False
|
|
30
|
+
litellm = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Model pricing table (price per 1K tokens, unit: USD)
|
|
34
|
+
# Data source: Official pricing from various vendors (November 2024)
|
|
35
|
+
MODEL_PRICING = {
|
|
36
|
+
# OpenAI GPT Series
|
|
37
|
+
"gpt-4o": {"input": 0.0025, "output": 0.010},
|
|
38
|
+
"gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
|
|
39
|
+
"gpt-4-turbo": {"input": 0.01, "output": 0.03},
|
|
40
|
+
"gpt-4": {"input": 0.03, "output": 0.06},
|
|
41
|
+
"gpt-4-32k": {"input": 0.06, "output": 0.12},
|
|
42
|
+
"gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
|
|
43
|
+
"gpt-3.5-turbo-16k": {"input": 0.003, "output": 0.004},
|
|
44
|
+
# DeepSeek Series
|
|
45
|
+
"deepseek-chat": {"input": 0.00014, "output": 0.00028},
|
|
46
|
+
"deepseek-coder": {"input": 0.00014, "output": 0.00028},
|
|
47
|
+
# Anthropic Claude Series
|
|
48
|
+
"claude-3-opus": {"input": 0.015, "output": 0.075},
|
|
49
|
+
"claude-3-sonnet": {"input": 0.003, "output": 0.015},
|
|
50
|
+
"claude-3-haiku": {"input": 0.00025, "output": 0.00125},
|
|
51
|
+
"claude-3-5-sonnet": {"input": 0.003, "output": 0.015},
|
|
52
|
+
# Google Gemini Series
|
|
53
|
+
"gemini-pro": {"input": 0.0005, "output": 0.0015},
|
|
54
|
+
"gemini-1.5-pro": {"input": 0.00125, "output": 0.005},
|
|
55
|
+
"gemini-1.5-flash": {"input": 0.000075, "output": 0.0003},
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_tiktoken_encoding(model_name: str):
|
|
60
|
+
"""
|
|
61
|
+
Get tiktoken encoder (for precise token counting)
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
model_name: Model name
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
tiktoken.Encoding object, returns None if not available
|
|
68
|
+
"""
|
|
69
|
+
if not TIKTOKEN_AVAILABLE:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
# Try getting encoding based on model name
|
|
74
|
+
if "gpt-4" in model_name.lower() or "gpt-3.5" in model_name.lower():
|
|
75
|
+
return tiktoken.encoding_for_model(model_name)
|
|
76
|
+
if "gpt-4o" in model_name.lower():
|
|
77
|
+
return tiktoken.encoding_for_model("gpt-4o")
|
|
78
|
+
# Default to cl100k_base (encoding for GPT-3.5/GPT-4)
|
|
79
|
+
return tiktoken.get_encoding("cl100k_base")
|
|
80
|
+
except Exception:
|
|
81
|
+
# If fails, use default encoding
|
|
82
|
+
return tiktoken.get_encoding("cl100k_base")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def count_tokens_with_tiktoken(text: str, model_name: str) -> int:
|
|
86
|
+
"""
|
|
87
|
+
Precisely calculate token count using tiktoken
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
text: Text to calculate
|
|
91
|
+
model_name: Model name (for selecting correct encoding)
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Token count
|
|
95
|
+
"""
|
|
96
|
+
if not TIKTOKEN_AVAILABLE:
|
|
97
|
+
return 0
|
|
98
|
+
|
|
99
|
+
encoding = get_tiktoken_encoding(model_name)
|
|
100
|
+
if encoding is None:
|
|
101
|
+
return 0
|
|
102
|
+
|
|
103
|
+
return len(encoding.encode(text))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def count_tokens_with_litellm(messages: list[dict], model_name: str) -> dict[str, int]:
|
|
107
|
+
"""
|
|
108
|
+
Calculate token count using litellm (if available)
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
messages: Message list
|
|
112
|
+
model_name: Model name
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
{'prompt_tokens': int, 'completion_tokens': int, 'total_tokens': int}
|
|
116
|
+
"""
|
|
117
|
+
if not LITELLM_AVAILABLE:
|
|
118
|
+
return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
# Use litellm's token_counter
|
|
122
|
+
token_count = token_counter(model=model_name, messages=messages)
|
|
123
|
+
return {
|
|
124
|
+
"prompt_tokens": token_count,
|
|
125
|
+
"completion_tokens": 0, # litellm only counts prompt tokens
|
|
126
|
+
"total_tokens": token_count,
|
|
127
|
+
}
|
|
128
|
+
except Exception:
|
|
129
|
+
return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def calculate_cost_with_litellm(model: str, prompt_tokens: int, completion_tokens: int) -> float:
|
|
133
|
+
"""
|
|
134
|
+
Calculate cost using litellm (more accurate if available)
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
model: Model name
|
|
138
|
+
prompt_tokens: Input tokens
|
|
139
|
+
completion_tokens: Output tokens
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Cost (USD)
|
|
143
|
+
"""
|
|
144
|
+
if not LITELLM_AVAILABLE:
|
|
145
|
+
# Fall back to manual calculation
|
|
146
|
+
return calculate_cost(model, prompt_tokens, completion_tokens)
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
# Use litellm's completion_cost function
|
|
150
|
+
cost = litellm.completion_cost(
|
|
151
|
+
model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
|
|
152
|
+
)
|
|
153
|
+
return cost
|
|
154
|
+
except Exception:
|
|
155
|
+
# If failed, fall back to manual calculation
|
|
156
|
+
return calculate_cost(model, prompt_tokens, completion_tokens)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def get_model_pricing(model_name: str) -> dict[str, float]:
|
|
160
|
+
"""
|
|
161
|
+
Get model pricing information
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
model_name: Model name
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
{'input': float, 'output': float} Price per 1K tokens (USD)
|
|
168
|
+
"""
|
|
169
|
+
# Try exact match
|
|
170
|
+
if model_name in MODEL_PRICING:
|
|
171
|
+
return MODEL_PRICING[model_name]
|
|
172
|
+
|
|
173
|
+
# Try fuzzy match (handle model names with version numbers)
|
|
174
|
+
model_lower = model_name.lower()
|
|
175
|
+
for key, pricing in MODEL_PRICING.items():
|
|
176
|
+
if key.lower() in model_lower or model_lower in key.lower():
|
|
177
|
+
return pricing
|
|
178
|
+
|
|
179
|
+
# If using litellm, try to get price from litellm
|
|
180
|
+
if LITELLM_AVAILABLE:
|
|
181
|
+
try:
|
|
182
|
+
# litellm has built-in pricing table
|
|
183
|
+
model_info = litellm.get_model_info(model=model_name)
|
|
184
|
+
if model_info and "input_cost_per_token" in model_info:
|
|
185
|
+
return {
|
|
186
|
+
"input": model_info.get("input_cost_per_token", 0) * 1000,
|
|
187
|
+
"output": model_info.get("output_cost_per_token", 0) * 1000,
|
|
188
|
+
}
|
|
189
|
+
except Exception:
|
|
190
|
+
pass
|
|
191
|
+
|
|
192
|
+
# Default price (use gpt-4o-mini as conservative estimate)
|
|
193
|
+
return MODEL_PRICING.get("gpt-4o-mini", {"input": 0.00015, "output": 0.0006})
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def calculate_cost(model_name: str, prompt_tokens: int, completion_tokens: int) -> float:
|
|
197
|
+
"""
|
|
198
|
+
Calculate LLM call cost (backward compatibility function)
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
model_name: Model name
|
|
202
|
+
prompt_tokens: Number of input tokens
|
|
203
|
+
completion_tokens: Number of output tokens
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Cost (USD)
|
|
207
|
+
"""
|
|
208
|
+
pricing = get_model_pricing(model_name)
|
|
209
|
+
|
|
210
|
+
input_cost = (prompt_tokens / 1000.0) * pricing["input"]
|
|
211
|
+
output_cost = (completion_tokens / 1000.0) * pricing["output"]
|
|
212
|
+
|
|
213
|
+
return input_cost + output_cost
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
@dataclass
|
|
217
|
+
class TokenUsage:
|
|
218
|
+
"""Token usage record for a single LLM call"""
|
|
219
|
+
|
|
220
|
+
agent_name: str
|
|
221
|
+
stage: str
|
|
222
|
+
model: str
|
|
223
|
+
prompt_tokens: int = 0
|
|
224
|
+
completion_tokens: int = 0
|
|
225
|
+
total_tokens: int = 0
|
|
226
|
+
cost_usd: float = 0.0
|
|
227
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
228
|
+
# New field
|
|
229
|
+
calculation_method: str = "api" # "api", "tiktoken", "litellm", "estimated"
|
|
230
|
+
|
|
231
|
+
def to_dict(self) -> dict[str, Any]:
|
|
232
|
+
return asdict(self)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class TokenTracker:
|
|
236
|
+
"""
|
|
237
|
+
Token Tracker (Advanced Version)
|
|
238
|
+
Supports multiple token counting methods: API response > tiktoken > litellm > estimation
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
def __init__(self, prefer_tiktoken: bool = True, prefer_litellm: bool = False):
|
|
242
|
+
"""
|
|
243
|
+
Initialize tracker.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
prefer_tiktoken: If API doesn't return usage, prefer tiktoken calculation (default True)
|
|
247
|
+
prefer_litellm: Whether to prefer litellm (requires litellm installation, default False)
|
|
248
|
+
"""
|
|
249
|
+
self.usage_records: list[TokenUsage] = []
|
|
250
|
+
self.total_prompt_tokens = 0
|
|
251
|
+
self.total_completion_tokens = 0
|
|
252
|
+
self.total_tokens = 0
|
|
253
|
+
self.total_cost_usd = 0.0
|
|
254
|
+
self.prefer_tiktoken = prefer_tiktoken and TIKTOKEN_AVAILABLE
|
|
255
|
+
self.prefer_litellm = prefer_litellm and LITELLM_AVAILABLE
|
|
256
|
+
|
|
257
|
+
# Callback for real-time updates (e.g., to display_manager)
|
|
258
|
+
self._on_usage_added_callback = None
|
|
259
|
+
|
|
260
|
+
def set_on_usage_added_callback(self, callback):
|
|
261
|
+
"""
|
|
262
|
+
Set a callback to be called whenever usage is added.
|
|
263
|
+
The callback receives the summary dict.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
callback: Function that takes summary dict as argument
|
|
267
|
+
"""
|
|
268
|
+
self._on_usage_added_callback = callback
|
|
269
|
+
|
|
270
|
+
def add_usage(
|
|
271
|
+
self,
|
|
272
|
+
agent_name: str,
|
|
273
|
+
stage: str,
|
|
274
|
+
model: str,
|
|
275
|
+
prompt_tokens: int = 0,
|
|
276
|
+
completion_tokens: int = 0,
|
|
277
|
+
token_counts: dict[str, int] | None = None,
|
|
278
|
+
# New parameters: for precise calculation
|
|
279
|
+
system_prompt: str | None = None,
|
|
280
|
+
user_prompt: str | None = None,
|
|
281
|
+
response_text: str | None = None,
|
|
282
|
+
messages: list[dict] | None = None,
|
|
283
|
+
):
|
|
284
|
+
"""
|
|
285
|
+
Add token usage record (supports multiple calculation methods)
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
agent_name: Agent name
|
|
289
|
+
stage: Stage name
|
|
290
|
+
model: Model name
|
|
291
|
+
prompt_tokens: Input tokens (will be overridden if token_counts is provided)
|
|
292
|
+
completion_tokens: Output tokens (will be overridden if token_counts is provided)
|
|
293
|
+
token_counts: Optional token count dictionary (from API response, most accurate)
|
|
294
|
+
system_prompt: System prompt (for tiktoken calculation)
|
|
295
|
+
user_prompt: User prompt (for tiktoken calculation)
|
|
296
|
+
response_text: Response text (for tiktoken calculation)
|
|
297
|
+
messages: Message list (for litellm calculation)
|
|
298
|
+
"""
|
|
299
|
+
calculation_method = "api"
|
|
300
|
+
|
|
301
|
+
# If token_counts is provided (from API response), prioritize using it
|
|
302
|
+
if token_counts:
|
|
303
|
+
prompt_tokens = token_counts.get("prompt_tokens", prompt_tokens)
|
|
304
|
+
completion_tokens = token_counts.get("completion_tokens", completion_tokens)
|
|
305
|
+
calculation_method = "api"
|
|
306
|
+
# If no API data, try using tiktoken for precise calculation
|
|
307
|
+
elif self.prefer_tiktoken and system_prompt and user_prompt:
|
|
308
|
+
prompt_tokens = count_tokens_with_tiktoken(system_prompt + "\n" + user_prompt, model)
|
|
309
|
+
if response_text:
|
|
310
|
+
completion_tokens = count_tokens_with_tiktoken(response_text, model)
|
|
311
|
+
calculation_method = "tiktoken"
|
|
312
|
+
# If litellm is available and messages are provided
|
|
313
|
+
elif self.prefer_litellm and messages:
|
|
314
|
+
result = count_tokens_with_litellm(messages, model)
|
|
315
|
+
prompt_tokens = result["prompt_tokens"]
|
|
316
|
+
completion_tokens = result.get("completion_tokens", completion_tokens)
|
|
317
|
+
calculation_method = "litellm"
|
|
318
|
+
# If none available, use estimation (fallback)
|
|
319
|
+
elif system_prompt and user_prompt:
|
|
320
|
+
# Simple estimation
|
|
321
|
+
estimated_prompt_tokens = int(
|
|
322
|
+
(len(system_prompt.split()) + len(user_prompt.split())) * 1.3
|
|
323
|
+
)
|
|
324
|
+
prompt_tokens = estimated_prompt_tokens
|
|
325
|
+
if response_text:
|
|
326
|
+
completion_tokens = int(len(response_text.split()) * 1.3)
|
|
327
|
+
calculation_method = "estimated"
|
|
328
|
+
|
|
329
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
330
|
+
|
|
331
|
+
# Calculate cost (prefer litellm, otherwise manual calculation)
|
|
332
|
+
if self.prefer_litellm and LITELLM_AVAILABLE:
|
|
333
|
+
cost_usd = calculate_cost_with_litellm(model, prompt_tokens, completion_tokens)
|
|
334
|
+
else:
|
|
335
|
+
cost_usd = calculate_cost(model, prompt_tokens, completion_tokens)
|
|
336
|
+
|
|
337
|
+
# Create usage record
|
|
338
|
+
usage = TokenUsage(
|
|
339
|
+
agent_name=agent_name,
|
|
340
|
+
stage=stage,
|
|
341
|
+
model=model,
|
|
342
|
+
prompt_tokens=prompt_tokens,
|
|
343
|
+
completion_tokens=completion_tokens,
|
|
344
|
+
total_tokens=total_tokens,
|
|
345
|
+
cost_usd=cost_usd,
|
|
346
|
+
calculation_method=calculation_method,
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
self.usage_records.append(usage)
|
|
350
|
+
|
|
351
|
+
# Update totals
|
|
352
|
+
self.total_prompt_tokens += prompt_tokens
|
|
353
|
+
self.total_completion_tokens += completion_tokens
|
|
354
|
+
self.total_tokens += total_tokens
|
|
355
|
+
self.total_cost_usd += cost_usd
|
|
356
|
+
|
|
357
|
+
# Invoke callback for real-time updates
|
|
358
|
+
if self._on_usage_added_callback:
|
|
359
|
+
try:
|
|
360
|
+
self._on_usage_added_callback(self.get_summary())
|
|
361
|
+
except Exception:
|
|
362
|
+
pass # Don't let callback errors affect main flow
|
|
363
|
+
|
|
364
|
+
def get_summary(self) -> dict[str, Any]:
|
|
365
|
+
"""
|
|
366
|
+
Get usage summary
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
{
|
|
370
|
+
'total_prompt_tokens': int,
|
|
371
|
+
'total_completion_tokens': int,
|
|
372
|
+
'total_tokens': int,
|
|
373
|
+
'total_cost_usd': float,
|
|
374
|
+
'total_calls': int,
|
|
375
|
+
'by_agent': Dict[str, Dict],
|
|
376
|
+
'by_model': Dict[str, Dict],
|
|
377
|
+
'by_method': Dict[str, Dict], # New: statistics by calculation method
|
|
378
|
+
'tiktoken_available': bool,
|
|
379
|
+
'litellm_available': bool
|
|
380
|
+
}
|
|
381
|
+
"""
|
|
382
|
+
by_agent: dict[str, dict[str, Any]] = {}
|
|
383
|
+
by_model: dict[str, dict[str, Any]] = {}
|
|
384
|
+
by_method: dict[str, dict[str, Any]] = {}
|
|
385
|
+
|
|
386
|
+
for usage in self.usage_records:
|
|
387
|
+
# Statistics by Agent
|
|
388
|
+
if usage.agent_name not in by_agent:
|
|
389
|
+
by_agent[usage.agent_name] = {
|
|
390
|
+
"prompt_tokens": 0,
|
|
391
|
+
"completion_tokens": 0,
|
|
392
|
+
"total_tokens": 0,
|
|
393
|
+
"cost_usd": 0.0,
|
|
394
|
+
"calls": 0,
|
|
395
|
+
}
|
|
396
|
+
by_agent[usage.agent_name]["prompt_tokens"] += usage.prompt_tokens
|
|
397
|
+
by_agent[usage.agent_name]["completion_tokens"] += usage.completion_tokens
|
|
398
|
+
by_agent[usage.agent_name]["total_tokens"] += usage.total_tokens
|
|
399
|
+
by_agent[usage.agent_name]["cost_usd"] += usage.cost_usd
|
|
400
|
+
by_agent[usage.agent_name]["calls"] += 1
|
|
401
|
+
|
|
402
|
+
# Statistics by model
|
|
403
|
+
if usage.model not in by_model:
|
|
404
|
+
by_model[usage.model] = {
|
|
405
|
+
"prompt_tokens": 0,
|
|
406
|
+
"completion_tokens": 0,
|
|
407
|
+
"total_tokens": 0,
|
|
408
|
+
"cost_usd": 0.0,
|
|
409
|
+
"calls": 0,
|
|
410
|
+
}
|
|
411
|
+
by_model[usage.model]["prompt_tokens"] += usage.prompt_tokens
|
|
412
|
+
by_model[usage.model]["completion_tokens"] += usage.completion_tokens
|
|
413
|
+
by_model[usage.model]["total_tokens"] += usage.total_tokens
|
|
414
|
+
by_model[usage.model]["cost_usd"] += usage.cost_usd
|
|
415
|
+
by_model[usage.model]["calls"] += 1
|
|
416
|
+
|
|
417
|
+
# Statistics by calculation method
|
|
418
|
+
method = usage.calculation_method
|
|
419
|
+
if method not in by_method:
|
|
420
|
+
by_method[method] = {
|
|
421
|
+
"prompt_tokens": 0,
|
|
422
|
+
"completion_tokens": 0,
|
|
423
|
+
"total_tokens": 0,
|
|
424
|
+
"cost_usd": 0.0,
|
|
425
|
+
"calls": 0,
|
|
426
|
+
}
|
|
427
|
+
by_method[method]["prompt_tokens"] += usage.prompt_tokens
|
|
428
|
+
by_method[method]["completion_tokens"] += usage.completion_tokens
|
|
429
|
+
by_method[method]["total_tokens"] += usage.total_tokens
|
|
430
|
+
by_method[method]["cost_usd"] += usage.cost_usd
|
|
431
|
+
by_method[method]["calls"] += 1
|
|
432
|
+
|
|
433
|
+
return {
|
|
434
|
+
"total_prompt_tokens": self.total_prompt_tokens,
|
|
435
|
+
"total_completion_tokens": self.total_completion_tokens,
|
|
436
|
+
"total_tokens": self.total_tokens,
|
|
437
|
+
"total_cost_usd": self.total_cost_usd,
|
|
438
|
+
"total_calls": len(self.usage_records),
|
|
439
|
+
"by_agent": by_agent,
|
|
440
|
+
"by_model": by_model,
|
|
441
|
+
"by_method": by_method, # New: statistics by calculation method
|
|
442
|
+
"tiktoken_available": TIKTOKEN_AVAILABLE,
|
|
443
|
+
"litellm_available": LITELLM_AVAILABLE,
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
def format_summary(self) -> str:
|
|
447
|
+
"""
|
|
448
|
+
Format usage summary as readable string
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
Formatted summary string
|
|
452
|
+
"""
|
|
453
|
+
summary = self.get_summary()
|
|
454
|
+
|
|
455
|
+
lines = [
|
|
456
|
+
"=" * 70,
|
|
457
|
+
"💰 LLM Cost Statistics",
|
|
458
|
+
"=" * 70,
|
|
459
|
+
f"Total calls: {summary['total_calls']}",
|
|
460
|
+
f"Total Tokens: {summary['total_tokens']:,}",
|
|
461
|
+
f" - Input: {summary['total_prompt_tokens']:,}",
|
|
462
|
+
f" - Output: {summary['total_completion_tokens']:,}",
|
|
463
|
+
f"Total cost: ${summary['total_cost_usd']:.6f} USD",
|
|
464
|
+
]
|
|
465
|
+
|
|
466
|
+
# If advanced features are used, show tool status and calculation method statistics
|
|
467
|
+
if summary.get("tiktoken_available") or summary.get("litellm_available"):
|
|
468
|
+
lines.append("")
|
|
469
|
+
lines.append("Calculation Tool Status:")
|
|
470
|
+
lines.append(
|
|
471
|
+
f" - tiktoken: {'✓ Available' if summary['tiktoken_available'] else '✗ Unavailable'}"
|
|
472
|
+
)
|
|
473
|
+
lines.append(
|
|
474
|
+
f" - litellm: {'✓ Available' if summary['litellm_available'] else '✗ Unavailable'}"
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
if summary.get("by_method"):
|
|
478
|
+
lines.append("")
|
|
479
|
+
lines.append("Statistics by Calculation Method:")
|
|
480
|
+
lines.append("-" * 70)
|
|
481
|
+
for method, stats in sorted(summary["by_method"].items()):
|
|
482
|
+
method_name = {
|
|
483
|
+
"api": "API Response",
|
|
484
|
+
"tiktoken": "tiktoken Precise Calculation",
|
|
485
|
+
"litellm": "litellm Calculation",
|
|
486
|
+
"estimated": "Estimation",
|
|
487
|
+
}.get(method, method)
|
|
488
|
+
lines.append(f" {method_name}:")
|
|
489
|
+
lines.append(f" Calls: {stats['calls']}")
|
|
490
|
+
lines.append(f" Tokens: {stats['total_tokens']:,}")
|
|
491
|
+
lines.append(f" Cost: ${stats['cost_usd']:.6f} USD")
|
|
492
|
+
lines.append("")
|
|
493
|
+
|
|
494
|
+
lines.append("Statistics by Agent:")
|
|
495
|
+
lines.append("-" * 70)
|
|
496
|
+
for agent_name, stats in sorted(summary["by_agent"].items()):
|
|
497
|
+
lines.append(f" {agent_name}:")
|
|
498
|
+
lines.append(f" Calls: {stats['calls']}")
|
|
499
|
+
lines.append(
|
|
500
|
+
f" Tokens: {stats['total_tokens']:,} (Input: {stats['prompt_tokens']:,}, Output: {stats['completion_tokens']:,})"
|
|
501
|
+
)
|
|
502
|
+
lines.append(f" Cost: ${stats['cost_usd']:.6f} USD")
|
|
503
|
+
lines.append("")
|
|
504
|
+
|
|
505
|
+
lines.append("Statistics by Model:")
|
|
506
|
+
lines.append("-" * 70)
|
|
507
|
+
for model, stats in sorted(summary["by_model"].items()):
|
|
508
|
+
lines.append(f" {model}:")
|
|
509
|
+
lines.append(f" Calls: {stats['calls']}")
|
|
510
|
+
lines.append(
|
|
511
|
+
f" Tokens: {stats['total_tokens']:,} (Input: {stats['prompt_tokens']:,}, Output: {stats['completion_tokens']:,})"
|
|
512
|
+
)
|
|
513
|
+
lines.append(f" Cost: ${stats['cost_usd']:.6f} USD")
|
|
514
|
+
lines.append("")
|
|
515
|
+
|
|
516
|
+
lines.append("=" * 70)
|
|
517
|
+
|
|
518
|
+
return "\n".join(lines)
|
|
519
|
+
|
|
520
|
+
def reset(self):
|
|
521
|
+
"""Reset all statistics"""
|
|
522
|
+
self.usage_records.clear()
|
|
523
|
+
self.total_prompt_tokens = 0
|
|
524
|
+
self.total_completion_tokens = 0
|
|
525
|
+
self.total_tokens = 0
|
|
526
|
+
self.total_cost_usd = 0.0
|
|
527
|
+
|
|
528
|
+
def save(self, filepath: str):
|
|
529
|
+
"""
|
|
530
|
+
Save usage records to file
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
filepath: Save path
|
|
534
|
+
"""
|
|
535
|
+
data = {
|
|
536
|
+
"summary": self.get_summary(),
|
|
537
|
+
"records": [usage.to_dict() for usage in self.usage_records],
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
541
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
src/api/__init__.py
ADDED
|
File without changes
|