auto-coder 0.1.254__py3-none-any.whl → 0.1.256__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -8,8 +8,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
8
8
  import traceback
9
9
  from autocoder.common.utils_code_auto_generate import chat_with_continue
10
10
  from byzerllm.utils.str2model import to_model
11
+ from autocoder.utils.llms import get_llm_names, get_model_info
11
12
 
12
- from autocoder.utils.llms import get_llm_names
13
13
  class RankResult(BaseModel):
14
14
  rank_result: List[int]
15
15
 
@@ -97,13 +97,42 @@ class CodeModificationRanker:
97
97
 
98
98
  # Collect all results
99
99
  results = []
100
- for future in as_completed(futures):
100
+ # 获取模型名称列表
101
+ model_names = []
102
+ for llm in self.llms:
103
+ # 获取当前llm实例对应的模型名称
104
+ names = get_llm_names(llm)
105
+ model_names.extend(names)
106
+
107
+ # 获取模型价格信息
108
+ model_info_map = {}
109
+ for name in model_names:
110
+ # 第二个参数是产品模式,从args中获取
111
+ info = get_model_info(name, self.args.product_mode)
112
+ if info:
113
+ model_info_map[name] = {
114
+ "input_cost": info.get("input_price", 0.0), # 每百万tokens成本
115
+ "output_cost": info.get("output_price", 0.0) # 每百万tokens成本
116
+ }
117
+
118
+ # 计算总成本
119
+ total_input_cost = 0.0
120
+ total_output_cost = 0.0
121
+
122
+ for future, model_name in zip(futures, model_names):
101
123
  try:
102
124
  result = future.result()
103
125
  input_tokens_count += result.input_tokens_count
104
126
  generated_tokens_count += result.generated_tokens_count
105
127
  v = to_model(result.content,RankResult)
106
128
  results.append(v.rank_result)
129
+
130
+ # 计算成本
131
+ info = model_info_map.get(model_name, {})
132
+ # 计算公式:token数 * 单价 / 1000000
133
+ total_input_cost += (result.input_tokens_count * info.get("input_cost", 0.0)) / 1000000
134
+ total_output_cost += (result.generated_tokens_count * info.get("output_cost", 0.0)) / 1000000
135
+
107
136
  except Exception as e:
108
137
  self.printer.print_in_terminal(
109
138
  "ranking_failed_request", style="yellow", error=str(e))
@@ -113,6 +142,10 @@ class CodeModificationRanker:
113
142
  raise Exception(
114
143
  self.printer.get_message_from_key("ranking_all_failed"))
115
144
 
145
+ # 四舍五入到4位小数
146
+ total_input_cost = round(total_input_cost, 4)
147
+ total_output_cost = round(total_output_cost, 4)
148
+
116
149
  # Calculate scores for each candidate
117
150
  candidate_scores = defaultdict(float)
118
151
  for rank_result in results:
@@ -137,7 +170,10 @@ class CodeModificationRanker:
137
170
  best_candidate=sorted_candidates[0],
138
171
  scores=score_details,
139
172
  input_tokens=input_tokens_count,
140
- output_tokens=generated_tokens_count
173
+ output_tokens=generated_tokens_count,
174
+ input_cost=total_input_cost,
175
+ output_cost=total_output_cost,
176
+ model_names=", ".join(model_names)
141
177
  )
142
178
 
143
179
  rerank_contents = [generate_result.contents[i]
@@ -33,7 +33,11 @@ COMMANDS = {
33
33
  "/add": "",
34
34
  "/add_model": "",
35
35
  "/remove": "",
36
- "/list": ""
36
+ "/list": "",
37
+ "/speed": "",
38
+ "/speed-test": "",
39
+ "/input_price": "",
40
+ "/output_price": "",
37
41
  }
38
42
  }
39
43
 
@@ -0,0 +1,392 @@
1
+ import time
2
+ import byzerllm
3
+ from typing import Dict, Any, List, Optional
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+ from rich.panel import Panel
7
+ from autocoder.common.printer import Printer
8
+ from autocoder import models as models_module
9
+ from autocoder.utils.llms import get_single_llm
10
+ import byzerllm
11
+ import pkg_resources
12
+ from concurrent.futures import ThreadPoolExecutor
13
+ from typing import Dict, List, Tuple
14
+ from pydantic import BaseModel
15
+
16
+ class ModelSpeedTestResult(BaseModel):
17
+ model_name: str
18
+ tokens_per_second: float
19
+ first_token_time: float
20
+ input_tokens_count: float
21
+ generated_tokens_count: float
22
+ input_tokens_cost: float
23
+ generated_tokens_cost: float
24
+ status: str
25
+ error: Optional[str] = None
26
+
27
+ class SpeedTestResults(BaseModel):
28
+ results: List[ModelSpeedTestResult]
29
+
30
+ byzerllm_content = ""
31
+ try:
32
+ byzerllm_conten_path = pkg_resources.resource_filename(
33
+ "autocoder", "data/byzerllm.md"
34
+ )
35
+ with open(byzerllm_conten_path, "r",encoding="utf-8") as f:
36
+ byzerllm_content = f.read()
37
+ except FileNotFoundError:
38
+ pass
39
+
40
+ @byzerllm.prompt()
41
+ def long_context_prompt() -> str:
42
+ '''
43
+ 下面是我们提供的一份文档:
44
+ <document>
45
+ {{ content }}
46
+ </document>
47
+
48
+ 请根据上述文档,实现用户的需求:
49
+
50
+ <query>
51
+ 我想开发一个翻译程序,使用prompt 函数实现。
52
+ </query>
53
+ '''
54
+ return {
55
+ "content": byzerllm_content
56
+ }
57
+
58
+ @byzerllm.prompt()
59
+ def short_context_prompt() -> str:
60
+ '''
61
+ Hello, can you help me test the response speed?
62
+ '''
63
+ return {}
64
+
65
+ def test_model_speed(model_name: str,
66
+ product_mode: str,
67
+ test_rounds: int = 3,
68
+ enable_long_context: bool = False
69
+ ) -> Dict[str, Any]:
70
+ from autocoder.models import get_model_by_name
71
+ """
72
+ 测试单个模型的速度
73
+
74
+ Args:
75
+ model_name: 模型名称
76
+ product_mode: 产品模式 (lite/pro)
77
+ test_rounds: 测试轮数
78
+
79
+ Returns:
80
+ Dict包含测试结果:
81
+ - avg_time: 平均响应时间
82
+ - min_time: 最小响应时间
83
+ - max_time: 最大响应时间
84
+ - first_token_time: 首token时间
85
+ - success: 是否测试成功
86
+ - error: 错误信息(如果有)
87
+ """
88
+ try:
89
+ llm = get_single_llm(model_name, product_mode)
90
+ model_info = get_model_by_name(model_name)
91
+
92
+ times = []
93
+ first_token_times = []
94
+ tokens_per_seconds = []
95
+ input_tokens_counts = []
96
+ generated_tokens_counts = []
97
+
98
+ input_tokens_costs = []
99
+ generated_tokens_costs = []
100
+
101
+ input_tokens_cost_per_m = model_info.get("input_price", 0.0) / 1000000
102
+ output_tokens_cost_per_m = model_info.get("output_price", 0.0) / 1000000
103
+
104
+ test_query = short_context_prompt.prompt()
105
+ if enable_long_context:
106
+ test_query = long_context_prompt.prompt()
107
+
108
+ content = ""
109
+ for _ in range(test_rounds):
110
+ start_time = time.time()
111
+ first_token_received = False
112
+ first_token_time = None
113
+ last_meta = None
114
+ input_tokens_count = 0
115
+ generated_tokens_count = 0
116
+ input_tokens_cost = 0
117
+ generated_tokens_cost = 0
118
+ for chunk,meta in llm.stream_chat_oai(conversations=[{
119
+ "role": "user",
120
+ "content": test_query
121
+ }],delta_mode=True):
122
+ content += chunk
123
+ last_meta = meta
124
+ current_time = time.time()
125
+ if not first_token_received:
126
+ first_token_time = current_time - start_time
127
+ first_token_received = True
128
+ first_token_times.append(first_token_time)
129
+
130
+ end_time = time.time()
131
+ generated_tokens_count = 0
132
+ if last_meta:
133
+ generated_tokens_count = last_meta.generated_tokens_count
134
+ input_tokens_count = last_meta.input_tokens_count
135
+ input_tokens_cost = input_tokens_count * input_tokens_cost_per_m
136
+ generated_tokens_cost = generated_tokens_count * output_tokens_cost_per_m
137
+
138
+ input_tokens_costs.append(input_tokens_cost)
139
+ generated_tokens_costs.append(generated_tokens_cost)
140
+ generated_tokens_counts.append(generated_tokens_count)
141
+ input_tokens_counts.append(input_tokens_count)
142
+
143
+ tokens_per_seconds.append(generated_tokens_count / (end_time - start_time))
144
+ times.append(end_time - start_time)
145
+
146
+
147
+ avg_time = sum(times) / len(times)
148
+ return {
149
+ "tokens_per_second": sum(tokens_per_seconds) / len(tokens_per_seconds),
150
+ "avg_time": avg_time,
151
+ "min_time": min(times),
152
+ "max_time": max(times),
153
+ "first_token_time": sum(first_token_times) / len(first_token_times),
154
+ "input_tokens_count": sum(input_tokens_counts) / len(input_tokens_counts),
155
+ "generated_tokens_count": sum(generated_tokens_counts) / len(generated_tokens_counts),
156
+ "success": True,
157
+ "error": None,
158
+ "input_tokens_cost": sum(input_tokens_costs) / len(input_tokens_costs),
159
+ "generated_tokens_cost": sum(generated_tokens_costs) / len(generated_tokens_costs)
160
+ }
161
+ except Exception as e:
162
+ return {
163
+ "tokens_per_second": 0,
164
+ "avg_time": 0,
165
+ "min_time": 0,
166
+ "max_time": 0,
167
+ "first_token_time": 0,
168
+ "input_tokens_count": 0,
169
+ "generated_tokens_count": 0,
170
+ "success": False,
171
+ "error": str(e),
172
+ "input_tokens_cost": 0.0,
173
+ "generated_tokens_cost": 0.0
174
+ }
175
+
176
+ def test_model_speed_wrapper(args: Tuple[str, str, int, bool]) -> Tuple[str, Dict[str, Any]]:
177
+ """
178
+ 包装测试函数以适应线程池调用
179
+
180
+ Args:
181
+ args: (model_name, product_mode, test_rounds)的元组
182
+
183
+ Returns:
184
+ (model_name, test_results)的元组
185
+ """
186
+ model_name, product_mode, test_rounds,enable_long_context = args
187
+ results = test_model_speed(model_name, product_mode, test_rounds,enable_long_context)
188
+ return (model_name, results)
189
+
190
+
191
+ def run_speed_test(product_mode: str, test_rounds: int = 3, max_workers: Optional[int] = None, enable_long_context: bool = False) -> SpeedTestResults:
192
+ """
193
+ 运行所有已激活模型的速度测试
194
+
195
+ Args:
196
+ product_mode: 产品模式 (lite/pro)
197
+ test_rounds: 每个模型测试的轮数
198
+ max_workers: 最大线程数,默认为None(ThreadPoolExecutor会自动设置)
199
+ enable_long_context: 是否启用长文本上下文测试
200
+
201
+ Returns:
202
+ SpeedTestResults: 包含所有模型测试结果的pydantic模型
203
+ """
204
+ # 获取所有模型
205
+ models_data = models_module.load_models()
206
+ active_models = [m for m in models_data if "api_key" in m] if product_mode == "lite" else models_data
207
+
208
+ if not active_models:
209
+ return SpeedTestResults(results=[])
210
+
211
+ # 准备测试参数
212
+ test_args = [(model["name"], product_mode, test_rounds, enable_long_context) for model in active_models]
213
+
214
+ # 存储结果用于排序
215
+ results_list = []
216
+
217
+ # 使用线程池并发测试
218
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
219
+ # 提交所有测试任务并获取future对象
220
+ future_to_model = {executor.submit(test_model_speed_wrapper, args): args[0]
221
+ for args in test_args}
222
+
223
+ # 收集结果
224
+ for future in future_to_model:
225
+ model_name = future_to_model[future]
226
+
227
+ try:
228
+ _, results = future.result()
229
+
230
+ if results["success"]:
231
+ status = "✓"
232
+ results_list.append((
233
+ results['tokens_per_second'],
234
+ ModelSpeedTestResult(
235
+ model_name=model_name,
236
+ tokens_per_second=results['tokens_per_second'],
237
+ first_token_time=results['first_token_time'],
238
+ input_tokens_count=results['input_tokens_count'],
239
+ generated_tokens_count=results['generated_tokens_count'],
240
+ status=status,
241
+ input_tokens_cost=results['input_tokens_cost'],
242
+ generated_tokens_cost=results['generated_tokens_cost'],
243
+ )
244
+ ))
245
+ try:
246
+ # 更新模型的平均速度
247
+ models_module.update_model_speed(model_name, results['tokens_per_second'])
248
+ except Exception:
249
+ pass
250
+ else:
251
+ results_list.append((
252
+ 0,
253
+ ModelSpeedTestResult(
254
+ model_name=model_name,
255
+ tokens_per_second=0,
256
+ first_token_time=0,
257
+ input_tokens_count=0,
258
+ generated_tokens_count=0,
259
+ status=f"✗ {results['error']}",
260
+ error=results['error'],
261
+ input_tokens_cost=0.0,
262
+ generated_tokens_cost=0.0
263
+ )
264
+ ))
265
+ except Exception as e:
266
+ results_list.append((
267
+ 0,
268
+ ModelSpeedTestResult(
269
+ model_name=model_name,
270
+ tokens_per_second=0,
271
+ first_token_time=0,
272
+ input_tokens_count=0,
273
+ generated_tokens_count=0,
274
+ status=f"✗ {str(e)}",
275
+ error=str(e),
276
+ input_tokens_cost=0.0,
277
+ generated_tokens_cost=0.0
278
+ )
279
+ ))
280
+
281
+ # 按速度排序
282
+ results_list.sort(key=lambda x: x[0], reverse=True)
283
+
284
+ return SpeedTestResults(results=[result[1] for result in results_list])
285
+
286
+ def render_speed_test_in_terminal(product_mode: str, test_rounds: int = 3, max_workers: Optional[int] = None,enable_long_context: bool = False) -> None:
287
+ """
288
+ 运行所有已激活模型的速度测试
289
+
290
+ Args:
291
+ product_mode: 产品模式 (lite/pro)
292
+ test_rounds: 每个模型测试的轮数
293
+ max_workers: 最大线程数,默认为None(ThreadPoolExecutor会自动设置)
294
+ """
295
+ printer = Printer()
296
+ console = Console()
297
+
298
+ # 获取所有模型
299
+ models_data = models_module.load_models()
300
+ active_models = [m for m in models_data if "api_key" in m] if product_mode == "lite" else models_data
301
+
302
+ if not active_models:
303
+ printer.print_in_terminal("models_no_active", style="yellow")
304
+ return
305
+
306
+ # 创建结果表格
307
+ table = Table(
308
+ title=printer.get_message_from_key("models_speed_test_results"),
309
+ show_header=True,
310
+ header_style="bold magenta",
311
+ show_lines=True
312
+ )
313
+
314
+ table.add_column("Model", style="cyan", width=30)
315
+ table.add_column("Tokens/s", style="green", width=15)
316
+ table.add_column("First Token(s)", style="magenta", width=15)
317
+ table.add_column("Input Tokens", style="magenta", width=15)
318
+ table.add_column("Generated Tokens", style="magenta", width=15)
319
+ table.add_column("Input Tokens Cost", style="yellow", width=15)
320
+ table.add_column("Generated Tokens Cost", style="yellow", width=15)
321
+ table.add_column("Status", style="red", width=20)
322
+
323
+ # 准备测试参数
324
+ test_args = [(model["name"], product_mode, test_rounds, enable_long_context) for model in active_models]
325
+
326
+ # 存储结果用于排序
327
+ results_list = []
328
+
329
+ # 使用线程池并发测试
330
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
331
+ printer.print_in_terminal("models_testing_start", style="yellow")
332
+
333
+ # 提交所有测试任务并获取future对象
334
+ future_to_model = {executor.submit(test_model_speed_wrapper, args): args[0]
335
+ for args in test_args}
336
+
337
+ # 收集结果
338
+ completed = 0
339
+ total = len(future_to_model)
340
+ for future in future_to_model:
341
+ completed += 1
342
+ printer.print_in_terminal("models_testing_progress", style="yellow", completed=completed, total=total)
343
+ model_name = future_to_model[future]
344
+ printer.print_in_terminal("models_testing", style="yellow", name=model_name)
345
+
346
+ try:
347
+ _, results = future.result()
348
+
349
+ if results["success"]:
350
+ status = "✓"
351
+ results['status'] = status
352
+ results_list.append((
353
+ results['tokens_per_second'],
354
+ model_name,
355
+ results
356
+ ))
357
+ try:
358
+ # 更新模型的平均速度
359
+ models_module.update_model_speed(model_name, results['tokens_per_second'])
360
+ except Exception as e:
361
+ pass
362
+ else:
363
+ status = f"✗ ({results['error']})"
364
+ results_list.append((
365
+ 0,
366
+ model_name,
367
+ {"tokens_per_second":0,"avg_time": 0, "input_tokens_count":0, "generated_tokens_count":0, "min_time": 0, "max_time": 0, "first_token_time": 0, "input_tokens_cost": 0.0, "generated_tokens_cost": 0.0, "status": status}
368
+ ))
369
+ except Exception as e:
370
+ results_list.append((
371
+ 0,
372
+ model_name,
373
+ {"tokens_per_second":0,"avg_time": 0, "input_tokens_count":0, "generated_tokens_count":0, "min_time": 0, "max_time": 0, "first_token_time": 0, "input_tokens_cost": 0.0, "generated_tokens_cost": 0.0, "status": f"✗ ({str(e)})"}
374
+ ))
375
+
376
+ # 按速度排序
377
+ results_list.sort(key=lambda x: x[0], reverse=True)
378
+
379
+ # 添加排序后的结果到表格
380
+ for tokens_per_second, model_name, results in results_list:
381
+ table.add_row(
382
+ model_name,
383
+ f"{tokens_per_second:.2f}",
384
+ f"{results['first_token_time']:.2f}",
385
+ f"{results['input_tokens_count']}",
386
+ f"{results['generated_tokens_count']}",
387
+ f"{results['input_tokens_cost']:.4f}",
388
+ f"{results['generated_tokens_cost']:.4f}",
389
+ results['status']
390
+ )
391
+
392
+ console.print(Panel(table, border_style="blue"))