flexllm 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. flexllm/__init__.py +224 -0
  2. flexllm/__main__.py +1096 -0
  3. flexllm/async_api/__init__.py +9 -0
  4. flexllm/async_api/concurrent_call.py +100 -0
  5. flexllm/async_api/concurrent_executor.py +1036 -0
  6. flexllm/async_api/core.py +373 -0
  7. flexllm/async_api/interface.py +12 -0
  8. flexllm/async_api/progress.py +277 -0
  9. flexllm/base_client.py +988 -0
  10. flexllm/batch_tools/__init__.py +16 -0
  11. flexllm/batch_tools/folder_processor.py +317 -0
  12. flexllm/batch_tools/table_processor.py +363 -0
  13. flexllm/cache/__init__.py +10 -0
  14. flexllm/cache/response_cache.py +293 -0
  15. flexllm/chain_of_thought_client.py +1120 -0
  16. flexllm/claudeclient.py +402 -0
  17. flexllm/client_pool.py +698 -0
  18. flexllm/geminiclient.py +563 -0
  19. flexllm/llm_client.py +523 -0
  20. flexllm/llm_parser.py +60 -0
  21. flexllm/mllm_client.py +559 -0
  22. flexllm/msg_processors/__init__.py +174 -0
  23. flexllm/msg_processors/image_processor.py +729 -0
  24. flexllm/msg_processors/image_processor_helper.py +485 -0
  25. flexllm/msg_processors/messages_processor.py +341 -0
  26. flexllm/msg_processors/unified_processor.py +1404 -0
  27. flexllm/openaiclient.py +256 -0
  28. flexllm/pricing/__init__.py +104 -0
  29. flexllm/pricing/data.json +1201 -0
  30. flexllm/pricing/updater.py +223 -0
  31. flexllm/provider_router.py +213 -0
  32. flexllm/token_counter.py +270 -0
  33. flexllm/utils/__init__.py +1 -0
  34. flexllm/utils/core.py +41 -0
  35. flexllm-0.3.3.dist-info/METADATA +573 -0
  36. flexllm-0.3.3.dist-info/RECORD +39 -0
  37. flexllm-0.3.3.dist-info/WHEEL +4 -0
  38. flexllm-0.3.3.dist-info/entry_points.txt +3 -0
  39. flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
flexllm/__main__.py ADDED
@@ -0,0 +1,1096 @@
1
+ """
2
+ flexllm CLI - LLM 客户端命令行工具
3
+
4
+ 提供简洁的 LLM 调用命令:
5
+ flexllm ask "你的问题"
6
+ flexllm chat
7
+ flexllm batch input.jsonl -o output.jsonl
8
+ flexllm models
9
+ flexllm test
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import os
15
+ import sys
16
+ import asyncio
17
+ from pathlib import Path
18
+ from typing import Optional, List, Tuple, Annotated
19
+
20
+ try:
21
+ import typer
22
+ from typer import Typer, Option, Argument
23
+
24
+ app = Typer(
25
+ name="flexllm",
26
+ help="flexllm - 高性能 LLM 客户端命令行工具",
27
+ add_completion=True,
28
+ no_args_is_help=True,
29
+ )
30
+ HAS_TYPER = True
31
+ except ImportError:
32
+ HAS_TYPER = False
33
+ app = None
34
+
35
+
36
+ class FlexLLMConfig:
37
+ """配置管理"""
38
+
39
+ def __init__(self):
40
+ self.config = self._load_config()
41
+
42
+ def _get_config_paths(self):
43
+ """获取配置文件搜索路径"""
44
+ paths = []
45
+ paths.append(Path.cwd() / "flexllm_config.yaml")
46
+ paths.append(Path.home() / ".flexllm" / "config.yaml")
47
+ return paths
48
+
49
+ def _load_config(self) -> dict:
50
+ """加载配置文件"""
51
+ default_config = {"default": None, "models": []}
52
+
53
+ for config_path in self._get_config_paths():
54
+ if config_path.exists():
55
+ try:
56
+ import yaml
57
+
58
+ with open(config_path, "r", encoding="utf-8") as f:
59
+ file_config = yaml.safe_load(f)
60
+ if file_config:
61
+ return {**default_config, **file_config}
62
+ except ImportError:
63
+ pass
64
+ except Exception:
65
+ pass
66
+
67
+ env_config = self._config_from_env()
68
+ if env_config:
69
+ default_config["models"] = [env_config]
70
+ default_config["default"] = env_config.get("id")
71
+
72
+ return default_config
73
+
74
+ def _config_from_env(self) -> Optional[dict]:
75
+ """从环境变量构建配置"""
76
+ base_url = os.environ.get("FLEXLLM_BASE_URL") or os.environ.get("OPENAI_BASE_URL")
77
+ api_key = os.environ.get("FLEXLLM_API_KEY") or os.environ.get("OPENAI_API_KEY")
78
+ model = os.environ.get("FLEXLLM_MODEL") or os.environ.get("OPENAI_MODEL")
79
+
80
+ if base_url and api_key and model:
81
+ return {
82
+ "id": model,
83
+ "name": model,
84
+ "base_url": base_url,
85
+ "api_key": api_key,
86
+ "provider": "openai",
87
+ }
88
+ return None
89
+
90
+ def get_model_config(self, name_or_id: str = None) -> Optional[dict]:
91
+ """获取模型配置"""
92
+ models = self.config.get("models", [])
93
+
94
+ if not models:
95
+ env_config = self._config_from_env()
96
+ if env_config:
97
+ return env_config
98
+ return None
99
+
100
+ if name_or_id is None:
101
+ name_or_id = self.config.get("default")
102
+ if not name_or_id:
103
+ return models[0] if models else None
104
+
105
+ for m in models:
106
+ if m.get("name") == name_or_id:
107
+ return m
108
+
109
+ for m in models:
110
+ if m.get("id") == name_or_id:
111
+ return m
112
+
113
+ return None
114
+
115
+ def get_config_path(self) -> Optional[Path]:
116
+ """获取存在的配置文件路径"""
117
+ for path in self._get_config_paths():
118
+ if path.exists():
119
+ return path
120
+ return None
121
+
122
+ def get_batch_config(self) -> dict:
123
+ """
124
+ 获取 batch 命令的配置
125
+
126
+ 配置优先级: 用户配置文件 > 默认值
127
+ 返回合并后的完整配置字典
128
+ """
129
+ # 默认值
130
+ defaults = {
131
+ # 缓存配置
132
+ "cache": False,
133
+ "cache_ttl": 86400,
134
+ # 网络配置
135
+ "timeout": 120,
136
+ "retry_times": 3,
137
+ "retry_delay": 1.0,
138
+ # 采样参数
139
+ "top_p": None,
140
+ "top_k": None,
141
+ # 思考模式
142
+ "thinking": None,
143
+ # 处理配置
144
+ "preprocess_msg": False,
145
+ "flush_interval": 1.0,
146
+ # 输出配置
147
+ "return_usage": False,
148
+ }
149
+
150
+ # 从配置文件读取 batch 配置节
151
+ user_batch_config = self.config.get("batch", {})
152
+
153
+ # 合并配置(用户配置覆盖默认值)
154
+ result = {**defaults}
155
+ for key in defaults:
156
+ if key in user_batch_config:
157
+ result[key] = user_batch_config[key]
158
+
159
+ return result
160
+
161
+
162
+ # 全局配置实例
163
+ _config: Optional[FlexLLMConfig] = None
164
+
165
+
166
+ def get_config() -> FlexLLMConfig:
167
+ global _config
168
+ if _config is None:
169
+ _config = FlexLLMConfig()
170
+ return _config
171
+
172
+
173
+ # ========== 输入格式处理 ==========
174
+
175
+
176
+ def detect_input_format(record: dict) -> Tuple[str, List[str]]:
177
+ """检测输入记录的格式类型"""
178
+ if "messages" in record:
179
+ return "openai_chat", ["messages"]
180
+ if "instruction" in record:
181
+ return "alpaca", ["instruction", "input"]
182
+ for field in ["q", "question", "prompt"]:
183
+ if field in record:
184
+ return "simple", [field, "system"]
185
+ return "unknown", []
186
+
187
+
188
+ def convert_to_messages(
189
+ record: dict, format_type: str, message_fields: List[str], global_system: str = None
190
+ ) -> Tuple[List[dict], dict]:
191
+ """将输入记录转换为 messages 格式"""
192
+ messages = []
193
+ used_fields = set()
194
+
195
+ if format_type == "openai_chat":
196
+ messages = record["messages"]
197
+ used_fields.add("messages")
198
+
199
+ elif format_type == "alpaca":
200
+ instruction = record.get("instruction", "")
201
+ input_text = record.get("input", "")
202
+ used_fields.update(["instruction", "input", "output"])
203
+
204
+ content = instruction
205
+ if input_text:
206
+ content = f"{instruction}\n\n{input_text}"
207
+ messages = [{"role": "user", "content": content}]
208
+
209
+ elif format_type == "simple":
210
+ prompt_field = None
211
+ for field in ["q", "question", "prompt"]:
212
+ if field in record:
213
+ prompt_field = field
214
+ break
215
+
216
+ if prompt_field:
217
+ used_fields.add(prompt_field)
218
+ system = global_system or record.get("system")
219
+ if "system" in record:
220
+ used_fields.add("system")
221
+
222
+ if system:
223
+ messages.append({"role": "system", "content": system})
224
+ messages.append({"role": "user", "content": record[prompt_field]})
225
+
226
+ if global_system and format_type != "openai_chat":
227
+ messages = [m for m in messages if m.get("role") != "system"]
228
+ messages.insert(0, {"role": "system", "content": global_system})
229
+
230
+ metadata = {k: v for k, v in record.items() if k not in used_fields}
231
+ return messages, metadata
232
+
233
+
234
+ def parse_batch_input(input_path: str = None) -> Tuple[List[dict], str, List[str]]:
235
+ """解析批量输入文件或 stdin"""
236
+ records = []
237
+
238
+ if input_path:
239
+ with open(input_path, "r", encoding="utf-8") as f:
240
+ for line in f:
241
+ line = line.strip()
242
+ if line:
243
+ records.append(json.loads(line))
244
+ else:
245
+ for line in sys.stdin:
246
+ line = line.strip()
247
+ if line:
248
+ records.append(json.loads(line))
249
+
250
+ if not records:
251
+ raise ValueError("输入为空")
252
+
253
+ format_type, message_fields = detect_input_format(records[0])
254
+
255
+ if format_type == "unknown":
256
+ available_fields = list(records[0].keys())
257
+ raise ValueError(
258
+ f"无法识别输入格式,未找到以下字段之一:\n"
259
+ f" - messages (openai_chat 格式)\n"
260
+ f" - instruction (alpaca 格式)\n"
261
+ f" - q/question/prompt (simple 格式)\n\n"
262
+ f"发现的字段: {available_fields}\n"
263
+ f"提示: 使用 dtflow 转换格式: dt transform data.jsonl --preset=openai_chat"
264
+ )
265
+
266
+ return records, format_type, message_fields
267
+
268
+
269
+ # ========== CLI 命令 ==========
270
+
271
+ if HAS_TYPER:
272
+
273
+ @app.command()
274
+ def ask(
275
+ prompt: Annotated[Optional[str], Argument(help="用户问题")] = None,
276
+ system: Annotated[Optional[str], Option("-s", "--system", help="系统提示词")] = None,
277
+ model: Annotated[Optional[str], Option("-m", "--model", help="模型名称")] = None,
278
+ ):
279
+ """LLM 快速问答(支持管道输入)
280
+
281
+ Examples:
282
+ flexllm ask "什么是Python"
283
+ flexllm ask "解释代码" -s "你是代码专家"
284
+ echo "长文本" | flexllm ask "总结一下"
285
+ """
286
+ stdin_content = None
287
+ if not sys.stdin.isatty():
288
+ stdin_content = sys.stdin.read().strip()
289
+
290
+ if not prompt and not stdin_content:
291
+ print("错误: 请提供问题", file=sys.stderr)
292
+ raise typer.Exit(1)
293
+
294
+ if stdin_content:
295
+ full_prompt = f"{stdin_content}\n\n{prompt}" if prompt else stdin_content
296
+ else:
297
+ full_prompt = prompt
298
+
299
+ config = get_config()
300
+ model_config = config.get_model_config(model)
301
+ if not model_config:
302
+ print("错误: 未找到模型配置,使用 'flexllm list' 查看可用模型", file=sys.stderr)
303
+ print(
304
+ "提示: 设置环境变量 FLEXLLM_BASE_URL, FLEXLLM_API_KEY, FLEXLLM_MODEL 或创建 ~/.flexllm/config.yaml",
305
+ file=sys.stderr,
306
+ )
307
+ raise typer.Exit(1)
308
+
309
+ model_id = model_config.get("id")
310
+ base_url = model_config.get("base_url")
311
+ api_key = model_config.get("api_key", "EMPTY")
312
+
313
+ async def _ask():
314
+ from flexllm import LLMClient
315
+
316
+ client = LLMClient(model=model_id, base_url=base_url, api_key=api_key)
317
+ messages = []
318
+ if system:
319
+ messages.append({"role": "system", "content": system})
320
+ messages.append({"role": "user", "content": full_prompt})
321
+ return await client.chat_completions(messages)
322
+
323
+ try:
324
+ result = asyncio.run(_ask())
325
+ if result is None:
326
+ return
327
+ if isinstance(result, str):
328
+ print(result)
329
+ return
330
+ if hasattr(result, "status") and result.status == "error":
331
+ error_msg = result.data.get("detail", result.data.get("error", "未知错误"))
332
+ print(f"错误: {error_msg}", file=sys.stderr)
333
+ return
334
+ print(str(result))
335
+ except Exception as e:
336
+ print(f"错误: {e}", file=sys.stderr)
337
+ raise typer.Exit(1)
338
+
339
+ @app.command()
340
+ def chat(
341
+ message: Annotated[Optional[str], Argument(help="单条消息(不提供则进入多轮对话)")] = None,
342
+ model: Annotated[Optional[str], Option("-m", "--model", help="模型名称")] = None,
343
+ base_url: Annotated[Optional[str], Option("--base-url", help="API 地址")] = None,
344
+ api_key: Annotated[Optional[str], Option("--api-key", help="API 密钥")] = None,
345
+ system_prompt: Annotated[Optional[str], Option("-s", "--system", help="系统提示词")] = None,
346
+ temperature: Annotated[float, Option("-t", "--temperature", help="采样温度")] = 0.7,
347
+ max_tokens: Annotated[int, Option("--max-tokens", help="最大生成 token 数")] = 4096,
348
+ no_stream: Annotated[bool, Option("--no-stream", help="禁用流式输出")] = False,
349
+ ):
350
+ """交互式对话
351
+
352
+ Examples:
353
+ flexllm chat # 多轮对话
354
+ flexllm chat "你好" # 单条对话
355
+ flexllm chat --model gpt-4 "你好" # 指定模型
356
+ """
357
+ config = get_config()
358
+ model_config = config.get_model_config(model)
359
+ if model_config:
360
+ model = model or model_config.get("id")
361
+ base_url = base_url or model_config.get("base_url")
362
+ api_key = api_key or model_config.get("api_key", "EMPTY")
363
+
364
+ if not base_url:
365
+ print("错误: 未配置 base_url", file=sys.stderr)
366
+ raise typer.Exit(1)
367
+
368
+ stream = not no_stream
369
+
370
+ if message:
371
+ _single_chat(message, model, base_url, api_key, system_prompt, temperature, max_tokens, stream)
372
+ else:
373
+ _interactive_chat(model, base_url, api_key, system_prompt, temperature, max_tokens, stream)
374
+
375
+ @app.command()
376
+ def batch(
377
+ input: Annotated[Optional[str], Argument(help="输入文件路径(省略则从 stdin 读取)")] = None,
378
+ output: Annotated[Optional[str], Option("-o", "--output", help="输出文件路径(必需)")] = None,
379
+ model: Annotated[Optional[str], Option("-m", "--model", help="模型名称")] = None,
380
+ concurrency: Annotated[int, Option("-c", "--concurrency", help="并发数")] = 10,
381
+ max_qps: Annotated[Optional[float], Option("--max-qps", help="每秒最大请求数")] = None,
382
+ system: Annotated[Optional[str], Option("-s", "--system", help="全局 system prompt")] = None,
383
+ temperature: Annotated[Optional[float], Option("-t", "--temperature", help="采样温度")] = None,
384
+ max_tokens: Annotated[Optional[int], Option("--max-tokens", help="最大生成 token 数")] = None,
385
+ # 新增 CLI 快捷选项
386
+ cache: Annotated[Optional[bool], Option("--cache/--no-cache", help="启用/禁用响应缓存")] = None,
387
+ return_usage: Annotated[bool, Option("--return-usage", help="输出 token 统计")] = False,
388
+ preprocess_msg: Annotated[bool, Option("--preprocess-msg", help="预处理图片消息")] = False,
389
+ ):
390
+ """批量处理 JSONL 文件(支持断点续传)
391
+
392
+ 自动检测输入格式:openai_chat, alpaca, simple (q/question/prompt)
393
+
394
+ 高级配置可在 ~/.flexllm/config.yaml 的 batch 节中设置。
395
+ CLI 参数优先级高于配置文件。
396
+
397
+ Examples:
398
+ flexllm batch input.jsonl -o output.jsonl
399
+ flexllm batch input.jsonl -o output.jsonl -c 20 -m gpt-4
400
+ flexllm batch input.jsonl -o output.jsonl --cache --return-usage
401
+ cat input.jsonl | flexllm batch -o output.jsonl
402
+ """
403
+ if not output:
404
+ print("错误: 必须指定输出文件 (-o output.jsonl)", file=sys.stderr)
405
+ raise typer.Exit(1)
406
+
407
+ if not output.endswith(".jsonl"):
408
+ print(f"错误: 输出文件必须使用 .jsonl 扩展名,当前: {output}", file=sys.stderr)
409
+ raise typer.Exit(1)
410
+
411
+ has_stdin = not sys.stdin.isatty()
412
+ if not input and not has_stdin:
413
+ print("错误: 请提供输入文件或通过管道传入数据", file=sys.stderr)
414
+ raise typer.Exit(1)
415
+
416
+ config = get_config()
417
+ model_config = config.get_model_config(model)
418
+ if not model_config:
419
+ print("错误: 未找到模型配置", file=sys.stderr)
420
+ print("提示: 使用 'flexllm list' 查看可用模型", file=sys.stderr)
421
+ raise typer.Exit(1)
422
+
423
+ model_id = model_config.get("id")
424
+ base_url = model_config.get("base_url")
425
+ api_key = model_config.get("api_key", "EMPTY")
426
+
427
+ # 获取 batch 配置(配置文件 + 默认值)
428
+ batch_config = config.get_batch_config()
429
+
430
+ # CLI 参数覆盖配置文件
431
+ effective_cache = cache if cache is not None else batch_config["cache"]
432
+ effective_return_usage = return_usage or batch_config["return_usage"]
433
+ effective_preprocess_msg = preprocess_msg or batch_config["preprocess_msg"]
434
+
435
+ try:
436
+ records, format_type, message_fields = parse_batch_input(input)
437
+ print(f"输入格式: {format_type}", file=sys.stderr)
438
+ print(f"记录数: {len(records)}", file=sys.stderr)
439
+
440
+ messages_list = []
441
+ metadata_list = []
442
+
443
+ for record in records:
444
+ messages, metadata = convert_to_messages(record, format_type, message_fields, system)
445
+ messages_list.append(messages)
446
+ metadata_list.append(metadata if metadata else None)
447
+
448
+ has_metadata = any(m for m in metadata_list)
449
+ if not has_metadata:
450
+ metadata_list = None
451
+
452
+ async def _run_batch():
453
+ from flexllm import LLMClient
454
+ from .cache import ResponseCacheConfig
455
+
456
+ # 构建缓存配置
457
+ cache_config = None
458
+ if effective_cache:
459
+ cache_config = ResponseCacheConfig.ipc(ttl=batch_config["cache_ttl"])
460
+
461
+ client_kwargs = {
462
+ "model": model_id,
463
+ "base_url": base_url,
464
+ "api_key": api_key,
465
+ "concurrency_limit": concurrency,
466
+ "timeout": batch_config["timeout"],
467
+ "retry_times": batch_config["retry_times"],
468
+ "retry_delay": batch_config["retry_delay"],
469
+ "cache": cache_config,
470
+ }
471
+ if max_qps is not None:
472
+ client_kwargs["max_qps"] = max_qps
473
+
474
+ client = LLMClient(**client_kwargs)
475
+
476
+ # 构建 chat_completions_batch 的 kwargs
477
+ kwargs = {}
478
+ if temperature is not None:
479
+ kwargs["temperature"] = temperature
480
+ if max_tokens is not None:
481
+ kwargs["max_tokens"] = max_tokens
482
+ # 从配置文件读取采样参数
483
+ if batch_config["top_p"] is not None:
484
+ kwargs["top_p"] = batch_config["top_p"]
485
+ if batch_config["top_k"] is not None:
486
+ kwargs["top_k"] = batch_config["top_k"]
487
+ if batch_config["thinking"] is not None:
488
+ kwargs["thinking"] = batch_config["thinking"]
489
+
490
+ results, summary = await client.chat_completions_batch(
491
+ messages_list=messages_list,
492
+ output_jsonl=output,
493
+ show_progress=True,
494
+ return_summary=True,
495
+ return_usage=effective_return_usage,
496
+ preprocess_msg=effective_preprocess_msg,
497
+ flush_interval=batch_config["flush_interval"],
498
+ metadata_list=metadata_list,
499
+ **kwargs,
500
+ )
501
+ return results, summary
502
+
503
+ results, summary = asyncio.run(_run_batch())
504
+
505
+ if summary:
506
+ print(f"\n完成: {summary}", file=sys.stderr)
507
+ print(f"输出文件: {output}", file=sys.stderr)
508
+
509
+ except json.JSONDecodeError as e:
510
+ print(f"错误: JSON 解析失败 - {e}", file=sys.stderr)
511
+ raise typer.Exit(1)
512
+ except ValueError as e:
513
+ print(f"错误: {e}", file=sys.stderr)
514
+ raise typer.Exit(1)
515
+ except FileNotFoundError:
516
+ print(f"错误: 文件不存在 - {input}", file=sys.stderr)
517
+ raise typer.Exit(1)
518
+ except Exception as e:
519
+ print(f"错误: {e}", file=sys.stderr)
520
+ raise typer.Exit(1)
521
+
522
+ @app.command()
523
+ def models(
524
+ base_url: Annotated[Optional[str], Option("--base-url", help="API 地址")] = None,
525
+ api_key: Annotated[Optional[str], Option("--api-key", help="API 密钥")] = None,
526
+ name: Annotated[Optional[str], Option("-n", "--name", help="模型配置名称")] = None,
527
+ ):
528
+ """列出远程服务器上的可用模型"""
529
+ import requests
530
+
531
+ config = get_config()
532
+ model_config = config.get_model_config(name)
533
+ if model_config:
534
+ base_url = base_url or model_config.get("base_url")
535
+ api_key = api_key or model_config.get("api_key", "EMPTY")
536
+ provider = model_config.get("provider", "openai")
537
+ else:
538
+ provider = "openai"
539
+
540
+ if not base_url:
541
+ print("错误: 未配置 base_url", file=sys.stderr)
542
+ raise typer.Exit(1)
543
+
544
+ is_gemini = provider == "gemini" or "generativelanguage.googleapis.com" in base_url
545
+
546
+ try:
547
+ if is_gemini:
548
+ url = f"{base_url.rstrip('/')}/models?key={api_key}"
549
+ response = requests.get(url, timeout=10)
550
+ else:
551
+ headers = {"Authorization": f"Bearer {api_key}"}
552
+ response = requests.get(f"{base_url.rstrip('/')}/models", headers=headers, timeout=10)
553
+
554
+ if response.status_code == 200:
555
+ models_data = response.json()
556
+
557
+ print(f"\n可用模型列表")
558
+ print(f"服务器: {base_url}")
559
+ print("-" * 50)
560
+
561
+ if is_gemini:
562
+ models_list = models_data.get("models", [])
563
+ if models_list:
564
+ for i, m in enumerate(models_list, 1):
565
+ model_name = m.get("name", "").replace("models/", "")
566
+ print(f" {i:2d}. {model_name}")
567
+ print(f"\n共 {len(models_list)} 个模型")
568
+ else:
569
+ print("未找到可用模型")
570
+ else:
571
+ if isinstance(models_data, dict) and "data" in models_data:
572
+ models_list = models_data["data"]
573
+ elif isinstance(models_data, list):
574
+ models_list = models_data
575
+ else:
576
+ models_list = []
577
+
578
+ if models_list:
579
+ for i, m in enumerate(models_list, 1):
580
+ if isinstance(m, dict):
581
+ model_id = m.get("id", m.get("name", "unknown"))
582
+ print(f" {i:2d}. {model_id}")
583
+ else:
584
+ print(f" {i:2d}. {m}")
585
+ print(f"\n共 {len(models_list)} 个模型")
586
+ else:
587
+ print("未找到可用模型")
588
+ else:
589
+ print(f"错误: HTTP {response.status_code}", file=sys.stderr)
590
+ raise typer.Exit(1)
591
+
592
+ except requests.exceptions.RequestException as e:
593
+ print(f"连接失败: {e}", file=sys.stderr)
594
+ raise typer.Exit(1)
595
+ except Exception as e:
596
+ print(f"错误: {e}", file=sys.stderr)
597
+ raise typer.Exit(1)
598
+
599
+ @app.command("list")
600
+ def list_models():
601
+ """列出本地配置的模型"""
602
+ config = get_config()
603
+ models = config.config.get("models", [])
604
+ default = config.config.get("default", "")
605
+
606
+ if not models:
607
+ print("未配置模型")
608
+ print("提示: 创建 ~/.flexllm/config.yaml 或设置环境变量")
609
+ return
610
+
611
+ print(f"已配置模型 (共 {len(models)} 个):\n")
612
+ for m in models:
613
+ name = m.get("name", m.get("id", "?"))
614
+ model_id = m.get("id", "?")
615
+ provider = m.get("provider", "openai")
616
+ is_default = " (默认)" if name == default or model_id == default else ""
617
+
618
+ print(f" {name}{is_default}")
619
+ if name != model_id:
620
+ print(f" id: {model_id}")
621
+ print(f" provider: {provider}")
622
+ print()
623
+
624
+ @app.command("set-model")
625
+ def set_model(
626
+ model_name: Annotated[str, Argument(help="模型名称或 ID")],
627
+ ):
628
+ """设置默认模型
629
+
630
+ Examples:
631
+ flexllm set-model gpt-4
632
+ flexllm set-model local-ollama
633
+ """
634
+ config = get_config()
635
+ config_path = config.get_config_path()
636
+
637
+ if not config_path:
638
+ print("错误: 未找到配置文件", file=sys.stderr)
639
+ print("提示: 先运行 'flexllm init' 初始化配置文件", file=sys.stderr)
640
+ raise typer.Exit(1)
641
+
642
+ model_config = config.get_model_config(model_name)
643
+ if not model_config:
644
+ print(f"错误: 未找到模型 '{model_name}'", file=sys.stderr)
645
+ print("提示: 使用 'flexllm list' 查看已配置的模型", file=sys.stderr)
646
+ raise typer.Exit(1)
647
+
648
+ try:
649
+ import yaml
650
+
651
+ with open(config_path, "r", encoding="utf-8") as f:
652
+ file_config = yaml.safe_load(f) or {}
653
+
654
+ default_value = model_config.get("name", model_config.get("id"))
655
+ old_default = file_config.get("default")
656
+ file_config["default"] = default_value
657
+
658
+ with open(config_path, "w", encoding="utf-8") as f:
659
+ yaml.dump(file_config, f, default_flow_style=False, allow_unicode=True)
660
+
661
+ print(f"默认模型已设置为: {default_value}")
662
+ if old_default and old_default != default_value:
663
+ print(f"(原默认模型: {old_default})")
664
+
665
+ config.config["default"] = default_value
666
+
667
+ except ImportError:
668
+ print("错误: 需要安装 pyyaml: pip install pyyaml", file=sys.stderr)
669
+ raise typer.Exit(1)
670
+ except Exception as e:
671
+ print(f"错误: {e}", file=sys.stderr)
672
+ raise typer.Exit(1)
673
+
674
+ @app.command()
675
+ def test(
676
+ model: Annotated[Optional[str], Option("-m", "--model", help="模型名称")] = None,
677
+ base_url: Annotated[Optional[str], Option("--base-url", help="API 地址")] = None,
678
+ api_key: Annotated[Optional[str], Option("--api-key", help="API 密钥")] = None,
679
+ message: Annotated[
680
+ str, Option("--message", help="测试消息")
681
+ ] = "Hello, please respond with 'OK' if you can see this message.",
682
+ timeout: Annotated[int, Option("--timeout", help="超时时间(秒)")] = 30,
683
+ ):
684
+ """测试 LLM 服务连接"""
685
+ import requests
686
+ import time
687
+
688
+ config = get_config()
689
+ model_config = config.get_model_config(model)
690
+ if model_config:
691
+ model = model or model_config.get("id")
692
+ base_url = base_url or model_config.get("base_url")
693
+ api_key = api_key or model_config.get("api_key", "EMPTY")
694
+
695
+ if not base_url:
696
+ print("错误: 未配置 base_url", file=sys.stderr)
697
+ raise typer.Exit(1)
698
+
699
+ print(f"\nLLM 服务连接测试")
700
+ print("-" * 50)
701
+
702
+ print(f"\n1. 测试服务器连接...")
703
+ print(f" 地址: {base_url}")
704
+ try:
705
+ start = time.time()
706
+ response = requests.get(
707
+ f"{base_url.rstrip('/')}/models",
708
+ headers={"Authorization": f"Bearer {api_key}"},
709
+ timeout=timeout,
710
+ )
711
+ elapsed = time.time() - start
712
+
713
+ if response.status_code == 200:
714
+ print(f" ✓ 连接成功 ({elapsed:.2f}s)")
715
+ models_data = response.json()
716
+ if isinstance(models_data, dict) and "data" in models_data:
717
+ model_count = len(models_data["data"])
718
+ elif isinstance(models_data, list):
719
+ model_count = len(models_data)
720
+ else:
721
+ model_count = 0
722
+ print(f" 可用模型数: {model_count}")
723
+ else:
724
+ print(f" ✗ 连接失败: HTTP {response.status_code}")
725
+ raise typer.Exit(1)
726
+ except Exception as e:
727
+ print(f" ✗ 连接失败: {e}")
728
+ raise typer.Exit(1)
729
+
730
+ if model:
731
+ print(f"\n2. 测试 Chat API...")
732
+ print(f" 模型: {model}")
733
+ try:
734
+ start = time.time()
735
+ response = requests.post(
736
+ f"{base_url.rstrip('/')}/chat/completions",
737
+ headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
738
+ json={"model": model, "messages": [{"role": "user", "content": message}], "max_tokens": 50},
739
+ timeout=timeout,
740
+ )
741
+ elapsed = time.time() - start
742
+
743
+ if response.status_code == 200:
744
+ result = response.json()
745
+ content = result.get("choices", [{}])[0].get("message", {}).get("content", "")
746
+ print(f" ✓ 调用成功 ({elapsed:.2f}s)")
747
+ print(f" 响应: {content[:100]}...")
748
+ else:
749
+ print(f" ✗ 调用失败: HTTP {response.status_code}")
750
+ print(f" {response.text[:200]}")
751
+ except Exception as e:
752
+ print(f" ✗ 调用失败: {e}")
753
+
754
+ print("\n测试完成")
755
+
756
+ @app.command()
757
+ def init(
758
+ path: Annotated[Optional[str], Option("-p", "--path", help="配置文件路径")] = None,
759
+ ):
760
+ """初始化配置文件"""
761
+ if path is None:
762
+ config_path = Path.home() / ".flexllm" / "config.yaml"
763
+ else:
764
+ config_path = Path(path)
765
+
766
+ if config_path.exists():
767
+ print(f"配置文件已存在: {config_path}")
768
+ return
769
+
770
+ config_path.parent.mkdir(parents=True, exist_ok=True)
771
+
772
+ default_config = """# flexllm 配置文件
773
+ # 配置搜索路径:
774
+ # 1. 当前目录: ./flexllm_config.yaml
775
+ # 2. 用户目录: ~/.flexllm/config.yaml
776
+
777
+ # 默认模型
778
+ default: "gpt-4"
779
+
780
+ # 模型列表
781
+ models:
782
+ - id: gpt-4
783
+ name: gpt-4
784
+ provider: openai
785
+ base_url: https://api.openai.com/v1
786
+ api_key: your-api-key
787
+
788
+ - id: local-ollama
789
+ name: local-ollama
790
+ provider: openai
791
+ base_url: http://localhost:11434/v1
792
+ api_key: EMPTY
793
+
794
+ # batch 命令配置(可选)
795
+ # 这些配置可通过 CLI 参数覆盖
796
+ # batch:
797
+ # # 缓存配置
798
+ # cache: false # 是否启用响应缓存
799
+ # cache_ttl: 86400 # 缓存过期时间(秒),默认 24 小时
800
+ #
801
+ # # 网络配置
802
+ # timeout: 120 # 请求超时时间(秒)
803
+ # retry_times: 3 # 重试次数
804
+ # retry_delay: 1.0 # 重试延迟(秒)
805
+ #
806
+ # # 采样参数(覆盖模型默认值)
807
+ # # top_p: 0.9
808
+ # # top_k: 50
809
+ #
810
+ # # 思考模式(适用于支持的模型如 DeepSeek-R1)
811
+ # # thinking: true # 或 "minimal"/"low"/"medium"/"high"
812
+ #
813
+ # # 处理配置
814
+ # preprocess_msg: false # 是否预处理图片消息(URL 转 base64)
815
+ # flush_interval: 1.0 # 文件刷新间隔(秒)
816
+ #
817
+ # # 输出配置
818
+ # return_usage: false # 是否输出 token 统计
819
+ """
820
+
821
+ try:
822
+ with open(config_path, "w", encoding="utf-8") as f:
823
+ f.write(default_config)
824
+ print(f"已创建配置文件: {config_path}")
825
+ print("请编辑配置文件填入 API 密钥")
826
+ except Exception as e:
827
+ print(f"创建失败: {e}", file=sys.stderr)
828
+ raise typer.Exit(1)
829
+
830
+ @app.command()
831
+ def pricing(
832
+ model: Annotated[Optional[str], Argument(help="模型名称(支持模糊匹配)")] = None,
833
+ update: Annotated[bool, Option("--update", help="从 OpenRouter 更新定价表")] = False,
834
+ json_output: Annotated[bool, Option("--json", help="输出 JSON 格式")] = False,
835
+ ):
836
+ """查询模型定价信息
837
+
838
+ Examples:
839
+ flexllm pricing # 列出所有模型定价
840
+ flexllm pricing gpt-4o # 查询 gpt-4o 定价
841
+ flexllm pricing claude # 模糊匹配 claude 相关模型
842
+ flexllm pricing --update # 从 OpenRouter 更新定价表
843
+ """
844
+ from .pricing import get_pricing, reload_pricing
845
+
846
+ MODEL_PRICING = get_pricing()
847
+
848
+ if update:
849
+ # 调用更新脚本
850
+ print("正在从 OpenRouter API 获取最新定价...")
851
+ try:
852
+ from .pricing.updater import collect_pricing, update_pricing_file
853
+
854
+ pricing_map = collect_pricing()
855
+ print(f"获取到 {len(pricing_map)} 个模型定价")
856
+
857
+ if update_pricing_file(pricing_map):
858
+ reload_pricing() # 重新加载定价数据
859
+ print("✓ data.json 已更新")
860
+ else:
861
+ print("✗ 更新失败", file=sys.stderr)
862
+ raise typer.Exit(1)
863
+ except Exception as e:
864
+ print(f"更新失败: {e}", file=sys.stderr)
865
+ raise typer.Exit(1)
866
+ return
867
+
868
+ # 查询定价
869
+ if model:
870
+ # 模糊匹配
871
+ matches = {
872
+ name: price for name, price in MODEL_PRICING.items()
873
+ if model.lower() in name.lower()
874
+ }
875
+
876
+ if not matches:
877
+ print(f"未找到匹配 '{model}' 的模型", file=sys.stderr)
878
+ print(f"\n可用模型: {', '.join(sorted(MODEL_PRICING.keys())[:10])}...", file=sys.stderr)
879
+ raise typer.Exit(1)
880
+
881
+ if json_output:
882
+ import json as json_module
883
+ output = {
884
+ name: {
885
+ "input_per_1m": round(p["input"] * 1e6, 4),
886
+ "output_per_1m": round(p["output"] * 1e6, 4),
887
+ }
888
+ for name, p in sorted(matches.items())
889
+ }
890
+ print(json_module.dumps(output, indent=2, ensure_ascii=False))
891
+ else:
892
+ print(f"\n模型定价 (匹配 '{model}'):\n")
893
+ print(f"{'模型':<30} {'输入 ($/1M)':<15} {'输出 ($/1M)':<15}")
894
+ print("-" * 60)
895
+ for name in sorted(matches.keys()):
896
+ p = matches[name]
897
+ input_price = p["input"] * 1e6
898
+ output_price = p["output"] * 1e6
899
+ print(f"{name:<30} ${input_price:<14.4f} ${output_price:<14.4f}")
900
+ print(f"\n共 {len(matches)} 个模型")
901
+ else:
902
+ # 列出所有模型
903
+ if json_output:
904
+ import json as json_module
905
+ output = {
906
+ name: {
907
+ "input_per_1m": round(p["input"] * 1e6, 4),
908
+ "output_per_1m": round(p["output"] * 1e6, 4),
909
+ }
910
+ for name, p in sorted(MODEL_PRICING.items())
911
+ }
912
+ print(json_module.dumps(output, indent=2, ensure_ascii=False))
913
+ else:
914
+ # 按厂商分组显示
915
+ groups = {}
916
+ for name, price in MODEL_PRICING.items():
917
+ if name.startswith(("gpt-", "o1", "o3", "o4")):
918
+ group = "OpenAI"
919
+ elif name.startswith("claude-"):
920
+ group = "Anthropic"
921
+ elif name.startswith("gemini-"):
922
+ group = "Google"
923
+ elif name.startswith("deepseek"):
924
+ group = "DeepSeek"
925
+ elif name.startswith(("qwen", "qwen2", "qwen3")):
926
+ group = "Alibaba"
927
+ elif name.startswith(("mistral", "ministral", "codestral", "devstral")):
928
+ group = "Mistral"
929
+ elif name.startswith("llama-"):
930
+ group = "Meta"
931
+ elif name.startswith("grok"):
932
+ group = "xAI"
933
+ elif name.startswith("nova"):
934
+ group = "Amazon"
935
+ else:
936
+ group = "Other"
937
+
938
+ if group not in groups:
939
+ groups[group] = []
940
+ groups[group].append((name, price))
941
+
942
+ print(f"\n模型定价表 (共 {len(MODEL_PRICING)} 个模型):\n")
943
+ print(f"{'模型':<30} {'输入 ($/1M)':<15} {'输出 ($/1M)':<15}")
944
+ print("=" * 60)
945
+
946
+ for group_name in ["OpenAI", "Anthropic", "Google", "DeepSeek", "Alibaba", "Mistral", "Meta", "xAI", "Amazon", "Other"]:
947
+ if group_name not in groups:
948
+ continue
949
+ models = groups[group_name]
950
+ print(f"\n[{group_name}]")
951
+ for name, p in sorted(models):
952
+ input_price = p["input"] * 1e6
953
+ output_price = p["output"] * 1e6
954
+ print(f" {name:<28} ${input_price:<14.4f} ${output_price:<14.4f}")
955
+
956
+ @app.command()
957
+ def version():
958
+ """显示版本信息"""
959
+ try:
960
+ from importlib.metadata import version as get_version
961
+
962
+ v = get_version("flexllm")
963
+ except Exception:
964
+ v = "0.1.0"
965
+ print(f"flexllm {v}")
966
+
967
+
968
+ # ========== 辅助函数 ==========
969
+
970
+
971
+ def _single_chat(message, model, base_url, api_key, system_prompt, temperature, max_tokens, stream):
972
+ """单次对话"""
973
+
974
+ async def _run():
975
+ from flexllm import LLMClient
976
+
977
+ client = LLMClient(model=model, base_url=base_url, api_key=api_key)
978
+
979
+ messages = []
980
+ if system_prompt:
981
+ messages.append({"role": "system", "content": system_prompt})
982
+ messages.append({"role": "user", "content": message})
983
+
984
+ if stream:
985
+ print("Assistant: ", end="", flush=True)
986
+ async for chunk in client.chat_completions_stream(
987
+ messages, temperature=temperature, max_tokens=max_tokens
988
+ ):
989
+ print(chunk, end="", flush=True)
990
+ print()
991
+ else:
992
+ result = await client.chat_completions(messages, temperature=temperature, max_tokens=max_tokens)
993
+ print(f"Assistant: {result}")
994
+
995
+ try:
996
+ asyncio.run(_run())
997
+ except KeyboardInterrupt:
998
+ print("\n[中断]")
999
+ except Exception as e:
1000
+ print(f"错误: {e}", file=sys.stderr)
1001
+
1002
+
1003
+ def _interactive_chat(model, base_url, api_key, system_prompt, temperature, max_tokens, stream):
1004
+ """多轮交互对话"""
1005
+
1006
+ async def _run():
1007
+ from flexllm import LLMClient
1008
+
1009
+ client = LLMClient(model=model, base_url=base_url, api_key=api_key)
1010
+
1011
+ messages = []
1012
+ if system_prompt:
1013
+ messages.append({"role": "system", "content": system_prompt})
1014
+
1015
+ print(f"\n多轮对话模式")
1016
+ print(f"模型: {model}")
1017
+ print(f"服务器: {base_url}")
1018
+ print(f"输入 'quit' 或 Ctrl+C 退出")
1019
+ print("-" * 50)
1020
+
1021
+ while True:
1022
+ try:
1023
+ user_input = input("\nYou: ").strip()
1024
+
1025
+ if user_input.lower() in ["quit", "exit", "q"]:
1026
+ print("再见!")
1027
+ break
1028
+
1029
+ if not user_input:
1030
+ continue
1031
+
1032
+ messages.append({"role": "user", "content": user_input})
1033
+
1034
+ if stream:
1035
+ print("Assistant: ", end="", flush=True)
1036
+ full_response = ""
1037
+ async for chunk in client.chat_completions_stream(
1038
+ messages, temperature=temperature, max_tokens=max_tokens
1039
+ ):
1040
+ print(chunk, end="", flush=True)
1041
+ full_response += chunk
1042
+ print()
1043
+ messages.append({"role": "assistant", "content": full_response})
1044
+ else:
1045
+ result = await client.chat_completions(
1046
+ messages, temperature=temperature, max_tokens=max_tokens
1047
+ )
1048
+ print(f"Assistant: {result}")
1049
+ messages.append({"role": "assistant", "content": result})
1050
+
1051
+ except EOFError:
1052
+ print("\n再见!")
1053
+ break
1054
+
1055
+ try:
1056
+ asyncio.run(_run())
1057
+ except KeyboardInterrupt:
1058
+ print("\n再见!")
1059
+
1060
+
1061
+ # ========== Fallback CLI ==========
1062
+
1063
+
1064
+ def _fallback_cli():
1065
+ """没有 typer 时的简单 CLI"""
1066
+ args = sys.argv[1:]
1067
+
1068
+ if not args or args[0] in ["-h", "--help", "help"]:
1069
+ print("flexllm CLI")
1070
+ print("\n命令:")
1071
+ print(" ask <prompt> 快速问答")
1072
+ print(" chat 交互对话")
1073
+ print(" batch 批量处理 JSONL 文件")
1074
+ print(" models 列出远程模型")
1075
+ print(" list 列出配置模型")
1076
+ print(" set-model <name> 设置默认模型")
1077
+ print(" test 测试连接")
1078
+ print(" init 初始化配置")
1079
+ print(" version 显示版本")
1080
+ print("\n安装 typer 获得更好的 CLI 体验: pip install typer")
1081
+ return
1082
+
1083
+ print("错误: 需要安装 typer: pip install typer", file=sys.stderr)
1084
+ print("或者: pip install flexllm[cli]", file=sys.stderr)
1085
+
1086
+
1087
+ def main():
1088
+ """CLI 入口点"""
1089
+ if HAS_TYPER:
1090
+ app()
1091
+ else:
1092
+ _fallback_cli()
1093
+
1094
+
1095
+ if __name__ == "__main__":
1096
+ main()