llmproxy-withlog 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llmproxy/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """LLM Proxy - OpenAI-compatible LLM reverse proxy with real-time analytics."""
2
+
3
+ __version__ = "1.0.0"
llmproxy/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Allow running as: python -m llmproxy"""
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
llmproxy/analyzer.py ADDED
@@ -0,0 +1,283 @@
1
+ """Offline log analysis tool for LLM Proxy."""
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ import sys
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+
12
+ def find_log_files(paths: Optional[list[str]] = None, log_dir: str = "logs") -> list[Path]:
13
+ if paths:
14
+ return sorted(Path(p) for p in paths if Path(p).exists())
15
+ return sorted(Path(log_dir).glob("*.log"))
16
+
17
+
18
+ def parse_streaming_chunks(raw_body: str) -> dict:
19
+ content_parts = []
20
+ tool_calls: dict[int, dict] = {}
21
+ usage = None
22
+ model = None
23
+ finish_reason = None
24
+ for line in raw_body.strip().splitlines():
25
+ line = line.strip()
26
+ if not line or line == "data: [DONE]":
27
+ continue
28
+ if not line.startswith("data: "):
29
+ continue
30
+ try:
31
+ chunk = json.loads(line[6:])
32
+ except json.JSONDecodeError:
33
+ continue
34
+ if not chunk:
35
+ continue
36
+ if chunk.get("model"):
37
+ model = chunk.get("model")
38
+ if chunk.get("lastOne"):
39
+ usage = chunk.get("usage")
40
+ for choice in chunk.get("choices", []):
41
+ delta = choice.get("delta", {})
42
+ c = delta.get("content")
43
+ if c:
44
+ content_parts.append(c)
45
+ for tc in delta.get("tool_calls", []):
46
+ idx = tc.get("index", 0)
47
+ if idx not in tool_calls:
48
+ tool_calls[idx] = {"id": "", "type": "function",
49
+ "function": {"name": "", "arguments": ""}}
50
+ if tc.get("id"):
51
+ tool_calls[idx]["id"] = tc["id"]
52
+ if tc.get("type"):
53
+ tool_calls[idx]["type"] = tc["type"]
54
+ fn = tc.get("function", {})
55
+ if fn.get("name"):
56
+ tool_calls[idx]["function"]["name"] = fn["name"]
57
+ if fn.get("arguments"):
58
+ tool_calls[idx]["function"]["arguments"] += fn["arguments"]
59
+ fr = choice.get("finish_reason")
60
+ if fr:
61
+ finish_reason = fr
62
+ merged_tc = [tool_calls[i] for i in sorted(tool_calls.keys())] if tool_calls else None
63
+ result = {"content": "".join(content_parts), "tool_calls": merged_tc,
64
+ "finish_reason": finish_reason, "model": model}
65
+ if usage:
66
+ result["usage"] = usage
67
+ return result
68
+
69
+
70
+ def extract_user_messages(body_str: str) -> list[dict]:
71
+ try:
72
+ data = json.loads(body_str)
73
+ except json.JSONDecodeError:
74
+ return []
75
+ messages = data.get("messages", [])
76
+ result = []
77
+ for msg in messages:
78
+ role = msg.get("role", "")
79
+ content = msg.get("content", "")
80
+ if isinstance(content, list):
81
+ text_parts = [item.get("text", "") for item in content
82
+ if isinstance(item, dict) and item.get("type") == "text"]
83
+ content = "\n".join(text_parts)
84
+ result.append({"role": role, "content": content})
85
+ return result
86
+
87
+
88
+ def parse_log_file(filepath: Path, no_system: bool = False,
89
+ only_user: bool = False) -> list[dict]:
90
+ records = []
91
+ with open(filepath, "r", encoding="utf-8", errors="replace") as f:
92
+ text = f.read()
93
+ pattern = re.compile(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) INFO (>>>|<<<) (\w+)")
94
+ matches = list(pattern.finditer(text))
95
+ for i, m in enumerate(matches):
96
+ timestamp = m.group(1)
97
+ direction = m.group(2)
98
+ record_type = m.group(3)
99
+ start = m.end()
100
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
101
+ block = text[start:end].strip()
102
+ if direction == ">>>":
103
+ body_match = re.match(r"\s*MODEL=(\S+)\s+BODY=(.*)", block, re.DOTALL)
104
+ if not body_match:
105
+ continue
106
+ model = body_match.group(1)
107
+ body_str = body_match.group(2).strip()
108
+ messages = extract_user_messages(body_str)
109
+ user_contents = []
110
+ for msg in messages:
111
+ if msg["role"] == "user":
112
+ user_contents.append(msg["content"])
113
+ elif msg["role"] == "system" and not no_system:
114
+ content = msg["content"]
115
+ if len(content) > 200 and only_user:
116
+ continue
117
+ user_contents.append(f"[SYSTEM] {content[:500]}..." if len(content) > 500 else f"[SYSTEM] {content}")
118
+ if only_user:
119
+ user_contents = [c for c in user_contents if not c.startswith("[SYSTEM]")]
120
+ if not user_contents and only_user:
121
+ continue
122
+ records.append({
123
+ "timestamp": timestamp, "model": model, "direction": "input",
124
+ "user_input": "\n".join(user_contents) if user_contents else "(no user message)",
125
+ "assistant_output": None, "tool_calls": None,
126
+ "finish_reason": None, "usage": None, "status": None,
127
+ })
128
+ elif direction == "<<<":
129
+ body_match = re.match(r"\s*MODEL=(\S+)\s+STATUS=(\d+)\s+BODY=(.*)", block, re.DOTALL)
130
+ if not body_match:
131
+ error_match = re.match(r"\s*MODEL=(\S+)\s+REASON=(.*)", block, re.DOTALL)
132
+ if error_match:
133
+ records.append({
134
+ "timestamp": timestamp, "model": error_match.group(1),
135
+ "direction": "error", "user_input": None,
136
+ "assistant_output": None, "tool_calls": None,
137
+ "finish_reason": "error", "usage": None,
138
+ "status": "error", "error_reason": error_match.group(2).strip(),
139
+ })
140
+ continue
141
+ model = body_match.group(1)
142
+ status = int(body_match.group(2))
143
+ body_str = body_match.group(3).strip()
144
+ if body_str.startswith("data:"):
145
+ parsed = parse_streaming_chunks(body_str)
146
+ assistant_output = parsed["content"]
147
+ tool_calls = parsed.get("tool_calls")
148
+ finish_reason = parsed.get("finish_reason")
149
+ usage = parsed.get("usage")
150
+ else:
151
+ try:
152
+ data = json.loads(body_str)
153
+ assistant_output = ""
154
+ tool_calls = None
155
+ finish_reason = None
156
+ for choice in data.get("choices", []):
157
+ msg = choice.get("message", {})
158
+ if msg.get("content"):
159
+ assistant_output += msg["content"]
160
+ if msg.get("tool_calls"):
161
+ tool_calls = msg["tool_calls"]
162
+ if choice.get("finish_reason"):
163
+ finish_reason = choice["finish_reason"]
164
+ usage = data.get("usage")
165
+ except json.JSONDecodeError:
166
+ assistant_output = body_str[:500]
167
+ tool_calls = None
168
+ finish_reason = None
169
+ usage = None
170
+ records.append({
171
+ "timestamp": timestamp, "model": model, "direction": "output",
172
+ "user_input": None, "assistant_output": assistant_output,
173
+ "tool_calls": tool_calls, "finish_reason": finish_reason,
174
+ "usage": usage, "status": status,
175
+ })
176
+ return records
177
+
178
+
179
+ def pair_conversations(records: list[dict]) -> list[dict]:
180
+ conversations = []
181
+ current_input = None
182
+ for rec in records:
183
+ if rec["direction"] == "input":
184
+ current_input = rec
185
+ elif rec["direction"] == "output" and current_input:
186
+ conversations.append({
187
+ "timestamp": current_input["timestamp"],
188
+ "model": current_input["model"],
189
+ "user_input": current_input["user_input"],
190
+ "assistant_output": rec["assistant_output"],
191
+ "tool_calls": rec["tool_calls"],
192
+ "finish_reason": rec["finish_reason"],
193
+ "usage": rec["usage"],
194
+ "status": rec["status"],
195
+ })
196
+ current_input = None
197
+ elif rec["direction"] == "error" and current_input:
198
+ conversations.append({
199
+ "timestamp": current_input["timestamp"],
200
+ "model": current_input["model"],
201
+ "user_input": current_input["user_input"],
202
+ "assistant_output": None, "tool_calls": None,
203
+ "finish_reason": "error", "usage": None,
204
+ "status": "error", "error_reason": rec.get("error_reason", ""),
205
+ })
206
+ current_input = None
207
+ return conversations
208
+
209
+
210
+ def main():
211
+ import argparse
212
+ parser = argparse.ArgumentParser(description="LLM Proxy log analyzer")
213
+ parser.add_argument("files", nargs="*", help="Log file paths")
214
+ parser.add_argument("--all", action="store_true", help="Analyze all logs in logs/ dir")
215
+ parser.add_argument("--format", choices=["text", "json"], default="text")
216
+ parser.add_argument("--output", "-o", help="Output file path")
217
+ parser.add_argument("--no-system", action="store_true", help="Skip system messages")
218
+ parser.add_argument("--only-user", action="store_true", help="Only user role input")
219
+ parser.add_argument("--stats", action="store_true", help="Only output statistics")
220
+ args = parser.parse_args()
221
+
222
+ if args.files:
223
+ log_files = find_log_files(args.files)
224
+ else:
225
+ log_files = find_log_files()
226
+
227
+ if not log_files:
228
+ print("No log files found", file=sys.stderr)
229
+ sys.exit(1)
230
+
231
+ all_conversations = []
232
+ for log_file in log_files:
233
+ records = parse_log_file(log_file, no_system=args.no_system, only_user=args.only_user)
234
+ conversations = pair_conversations(records)
235
+ all_conversations.extend(conversations)
236
+
237
+ if args.stats:
238
+ total = len(all_conversations)
239
+ errors = sum(1 for c in all_conversations if c.get("status") == "error")
240
+ total_input_tokens = sum((c.get("usage") or {}).get("prompt_tokens", 0) or 0 for c in all_conversations)
241
+ total_output_tokens = sum((c.get("usage") or {}).get("completion_tokens", 0) or 0 for c in all_conversations)
242
+ models = {}
243
+ for c in all_conversations:
244
+ m = c.get("model", "unknown")
245
+ models[m] = models.get(m, 0) + 1
246
+ print(json.dumps({
247
+ "total_conversations": total, "errors": errors,
248
+ "total_input_tokens": total_input_tokens,
249
+ "total_output_tokens": total_output_tokens,
250
+ "models": models,
251
+ }, ensure_ascii=False, indent=2))
252
+ return
253
+
254
+ if args.format == "json":
255
+ report = json.dumps(all_conversations, ensure_ascii=False, indent=2)
256
+ else:
257
+ lines = ["=" * 80, "LLM Proxy Log Analysis Report",
258
+ f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
259
+ f"Conversations: {len(all_conversations)}", "=" * 80]
260
+ for i, conv in enumerate(all_conversations, 1):
261
+ lines += ["", f"--- #{i} | {conv['timestamp']} | {conv['model']} ---", "",
262
+ f"[User]\n{conv['user_input'] or '(empty)'}", "",
263
+ f"[Assistant] finish={conv.get('finish_reason', 'N/A')}\n{conv['assistant_output'] or '(empty)'}"]
264
+ if conv.get("tool_calls"):
265
+ lines.append(f"\n[Tool Calls] {len(conv['tool_calls'])}")
266
+ for j, tc in enumerate(conv["tool_calls"]):
267
+ fn = tc.get("function", {})
268
+ lines.append(f" [{j+1}] {fn.get('name', '?')}({fn.get('arguments', '')[:200]})")
269
+ if conv.get("usage"):
270
+ u = conv["usage"]
271
+ lines.append(f"\n[Token] prompt={u.get('prompt_tokens','?')} completion={u.get('completion_tokens','?')} total={u.get('total_tokens','?')}")
272
+ lines.append("-" * 80)
273
+ report = "\n".join(lines)
274
+
275
+ if args.output:
276
+ with open(args_output, "w", encoding="utf-8") as f:
277
+ f.write(report)
278
+ else:
279
+ print(report)
280
+
281
+
282
+ if __name__ == "__main__":
283
+ main()
llmproxy/cli.py ADDED
@@ -0,0 +1,33 @@
1
+ """CLI entry point for llmproxy."""
2
+
3
+ import sys
4
+
5
+
6
+ def main():
7
+ """Run the LLM Proxy server."""
8
+ import argparse
9
+ import uvicorn
10
+ from .server import create_app
11
+
12
+ parser = argparse.ArgumentParser(description="LLM Proxy server")
13
+ parser.add_argument("--config", default=None, help="Path to config.ini")
14
+ parser.add_argument("--host", default=None, help="Override bind host")
15
+ parser.add_argument("--port", type=int, default=None, help="Override bind port")
16
+ args = parser.parse_args()
17
+
18
+ app = create_app(config_path=args.config)
19
+
20
+ # Read host/port from config if not overridden
21
+ import configparser
22
+ import os
23
+ config_path = args.config or os.environ.get("LLMPROXY_CONFIG", "config.ini")
24
+ cfg = configparser.ConfigParser()
25
+ cfg.read(config_path)
26
+ host = args.host or cfg.get("proxy", "host", fallback="0.0.0.0")
27
+ port = args.port or cfg.getint("proxy", "port", fallback=8000)
28
+
29
+ uvicorn.run(app, host=host, port=port)
30
+
31
+
32
+ if __name__ == "__main__":
33
+ main()
llmproxy/server.py ADDED
@@ -0,0 +1,269 @@
1
+ """Core proxy server module."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ from datetime import datetime
7
+ from threading import Lock
8
+ from typing import Optional
9
+
10
+ import httpx
11
+ from fastapi import FastAPI, HTTPException, Request, Response
12
+ from fastapi.responses import JSONResponse, StreamingResponse
13
+
14
+ REPORT_DIR = "reports"
15
+ os.makedirs(REPORT_DIR, exist_ok=True)
16
+
17
+ _log = logging.getLogger("llmproxy")
18
+ _log.setLevel(logging.INFO)
19
+ if not _log.handlers:
20
+ _log.addHandler(logging.StreamHandler())
21
+
22
+ _report_write_lock = Lock()
23
+
24
+
25
+ def _extract_user_input(req_json: dict) -> str:
26
+ messages = req_json.get("messages", [])
27
+ parts = []
28
+ for msg in messages:
29
+ if msg.get("role") == "user":
30
+ content = msg.get("content", "")
31
+ if isinstance(content, list):
32
+ text_parts = [
33
+ item.get("text", "")
34
+ for item in content
35
+ if isinstance(item, dict) and item.get("type") == "text"
36
+ ]
37
+ content = "\n".join(text_parts)
38
+ parts.append(content)
39
+ return "\n".join(parts) if parts else "(no user message)"
40
+
41
+
42
+ def _parse_output(output_text: str) -> dict:
43
+ result: dict = {"content": "", "tool_calls": None, "finish_reason": None, "usage": None}
44
+ if output_text.lstrip().startswith("data:"):
45
+ content_parts: list[str] = []
46
+ tc_map: dict[int, dict] = {}
47
+ for line in output_text.strip().splitlines():
48
+ line = line.strip()
49
+ if not line or line == "data: [DONE]":
50
+ continue
51
+ if not line.startswith("data: "):
52
+ continue
53
+ try:
54
+ chunk = json.loads(line[6:])
55
+ except json.JSONDecodeError:
56
+ continue
57
+ if not chunk:
58
+ continue
59
+ if chunk.get("lastOne"):
60
+ result["usage"] = chunk.get("usage")
61
+ for choice in chunk.get("choices", []):
62
+ delta = choice.get("delta", {})
63
+ c = delta.get("content")
64
+ if c:
65
+ content_parts.append(c)
66
+ for tc in delta.get("tool_calls", []):
67
+ idx = tc.get("index", 0)
68
+ if idx not in tc_map:
69
+ tc_map[idx] = {"id": "", "type": "function",
70
+ "function": {"name": "", "arguments": ""}}
71
+ if tc.get("id"):
72
+ tc_map[idx]["id"] = tc["id"]
73
+ if tc.get("type"):
74
+ tc_map[idx]["type"] = tc["type"]
75
+ fn = tc.get("function", {})
76
+ if fn.get("name"):
77
+ tc_map[idx]["function"]["name"] = fn["name"]
78
+ if fn.get("arguments"):
79
+ tc_map[idx]["function"]["arguments"] += fn["arguments"]
80
+ fr = choice.get("finish_reason")
81
+ if fr:
82
+ result["finish_reason"] = fr
83
+ result["content"] = "".join(content_parts)
84
+ if tc_map:
85
+ result["tool_calls"] = [tc_map[i] for i in sorted(tc_map)]
86
+ else:
87
+ try:
88
+ data = json.loads(output_text)
89
+ except json.JSONDecodeError:
90
+ result["content"] = output_text[:500]
91
+ return result
92
+ for choice in data.get("choices", []):
93
+ msg = choice.get("message", {})
94
+ if msg.get("content"):
95
+ result["content"] += msg["content"]
96
+ if msg.get("tool_calls"):
97
+ result["tool_calls"] = msg["tool_calls"]
98
+ if choice.get("finish_reason"):
99
+ result["finish_reason"] = choice["finish_reason"]
100
+ result["usage"] = data.get("usage")
101
+ return result
102
+
103
+
104
+ def _format_tool_calls(tool_calls: list[dict]) -> str:
105
+ lines = []
106
+ for i, tc in enumerate(tool_calls):
107
+ fn = tc.get("function", {})
108
+ args = fn.get("arguments", "")
109
+ try:
110
+ args_str = json.dumps(json.loads(args), ensure_ascii=False)
111
+ except (json.JSONDecodeError, TypeError):
112
+ args_str = args
113
+ lines.append(f" [{i+1}] {fn.get('name', '?')}({args_str[:300]})")
114
+ return "\n".join(lines)
115
+
116
+
117
+ def append_report(now: datetime, model_name: str, req_json: dict,
118
+ output_text: str, status_code: int) -> None:
119
+ ymd = now.strftime("%Y%m%d")
120
+ report_path = os.path.join(REPORT_DIR, f"{ymd}-analysis.txt")
121
+ user_input = _extract_user_input(req_json)
122
+ parsed = _parse_output(output_text)
123
+ _report_write_lock.acquire()
124
+ try:
125
+ with open(report_path, "a", encoding="utf-8") as f:
126
+ f.write(f"\n{'─'*70}\n")
127
+ f.write(f"时间: {now.strftime('%Y-%m-%d %H:%M:%S')} ")
128
+ f.write(f"模型: {model_name} ")
129
+ f.write(f"状态: {status_code} ")
130
+ f.write(f"finish: {parsed.get('finish_reason') or 'N/A'}\n\n")
131
+ f.write(f"[用户输入]\n{user_input}\n\n")
132
+ if parsed.get("tool_calls"):
133
+ f.write(f"[工具调用] {len(parsed['tool_calls'])} 个\n")
134
+ f.write(_format_tool_calls(parsed["tool_calls"]))
135
+ f.write("\n\n")
136
+ f.write(f"[LLM 输出]\n{parsed['content']}\n")
137
+ if parsed.get("usage"):
138
+ u = parsed["usage"]
139
+ f.write(f"\n[Token] prompt={u.get('prompt_tokens','?')} "
140
+ f"completion={u.get('completion_tokens','?')} "
141
+ f"total={u.get('total_tokens','?')}\n")
142
+ finally:
143
+ _report_write_lock.release()
144
+
145
+
146
+ def create_app(config_path: Optional[str] = None) -> FastAPI:
147
+ """Create and configure the FastAPI application."""
148
+ import configparser
149
+
150
+ if config_path is None:
151
+ config_path = os.environ.get("LLMPROXY_CONFIG", "config.ini")
152
+
153
+ cfg = configparser.ConfigParser()
154
+ cfg.read(config_path)
155
+
156
+ models: dict[str, tuple[str, str, str]] = {}
157
+ for name, val in cfg["models"].items():
158
+ if "|" in val:
159
+ key, base, model = val.split("|", 2)
160
+ models[name] = (key.strip(), base.strip(), model.strip())
161
+
162
+ proxy_host = cfg.get("proxy", "host", fallback="0.0.0.0")
163
+ proxy_port = cfg.getint("proxy", "port", fallback=8000)
164
+ proxy_api_key = cfg.get("auth", "proxy_api_key", fallback="")
165
+
166
+ log_dir = "logs"
167
+ os.makedirs(log_dir, exist_ok=True)
168
+
169
+ def get_logger(ymd: str) -> logging.Logger:
170
+ fh = logging.FileHandler(os.path.join(log_dir, f"{ymd}.log"), encoding="utf-8")
171
+ fh.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
172
+ _log.handlers = [h for h in _log.handlers if not isinstance(h, logging.FileHandler)]
173
+ _log.addHandler(fh)
174
+ return _log
175
+
176
+ def verify_auth(auth_header: Optional[str]):
177
+ if not proxy_api_key:
178
+ return
179
+ if not auth_header:
180
+ raise HTTPException(401, "Missing Authorization header")
181
+ if auth_header.removeprefix("Bearer ").strip() != proxy_api_key:
182
+ raise HTTPException(403, "Invalid proxy API key")
183
+
184
+ app = FastAPI(title="LLM Proxy", version="1.0.0")
185
+
186
+ @app.get("/v1/version")
187
+ async def get_version(request: Request):
188
+ verify_auth(request.headers.get("Authorization"))
189
+ return JSONResponse({"version": "1.0.0", "name": "llmproxy"})
190
+
191
+ @app.get("/v1/props")
192
+ async def get_props(request: Request):
193
+ verify_auth(request.headers.get("Authorization"))
194
+ return JSONResponse({
195
+ "models": list(models.keys()),
196
+ "host": proxy_host,
197
+ "port": proxy_port,
198
+ "auth_enabled": bool(proxy_api_key),
199
+ })
200
+
201
+ @app.get("/v1/models")
202
+ async def list_models(request: Request):
203
+ verify_auth(request.headers.get("Authorization"))
204
+ data = [{"id": m, "object": "model",
205
+ "created": int(datetime.now().timestamp()),
206
+ "owned_by": "llmproxy"} for m in models]
207
+ return JSONResponse({"object": "list", "data": data})
208
+
209
+ @app.api_route("/v1/{path:path}", methods=["POST", "GET", "PUT", "DELETE", "PATCH"])
210
+ async def proxy(request: Request, path: str):
211
+ verify_auth(request.headers.get("Authorization"))
212
+ body_bytes = await request.body()
213
+ try:
214
+ req_json = json.loads(body_bytes) if body_bytes else {}
215
+ except json.JSONDecodeError:
216
+ raise HTTPException(400, "Invalid JSON body")
217
+
218
+ model_name = req_json.get("model", "").lower()
219
+ if not model_name or model_name not in models:
220
+ raise HTTPException(400, f"Unsupported model '{model_name}'. Available: {list(models.keys())}")
221
+
222
+ api_key, base_url, model = models[model_name]
223
+ req_json["model"] = model
224
+ target_url = f"{base_url.rstrip('/')}/{path}"
225
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
226
+ input_text = json.dumps(req_json)
227
+ now = datetime.now()
228
+ ymd = now.strftime("%Y%m%d")
229
+ log = get_logger(ymd)
230
+ log.info(">>> INPUT MODEL=%(model)s BODY=%(body)s",
231
+ {"name": model_name, "model": model, "body": input_text})
232
+
233
+ async with httpx.AsyncClient(timeout=300) as client:
234
+ try:
235
+ resp = await client.request(method=request.method, url=target_url,
236
+ headers=headers, content=input_text)
237
+ content_type = resp.headers.get("content-type", "")
238
+
239
+ if "text/event-stream" in content_type:
240
+ output_chunks: list[bytes] = []
241
+
242
+ async def stream_with_capture():
243
+ async for chunk in resp.aiter_bytes():
244
+ output_chunks.append(chunk)
245
+ yield chunk
246
+ output_text = b"".join(output_chunks).decode("utf-8", errors="replace")
247
+ append_report(now, model_name, req_json, output_text, resp.status_code)
248
+
249
+ log.info("<<< OUTPUT MODEL=%(model)s STATUS=%(status)d STREAMING=true",
250
+ {"model": model_name, "status": resp.status_code})
251
+ return StreamingResponse(
252
+ stream_with_capture(), status_code=resp.status_code,
253
+ media_type="text/event-stream",
254
+ headers={k: v for k, v in resp.headers.items() if k.lower() != "content-encoding"},
255
+ )
256
+ else:
257
+ output_text = resp.text
258
+ log.info("<<< OUTPUT MODEL=%(model)s STATUS=%(status)d BODY=%(body)s",
259
+ {"model": model_name, "status": resp.status_code, "body": output_text})
260
+ append_report(now, model_name, req_json, output_text, resp.status_code)
261
+ return Response(content=output_text, status_code=resp.status_code,
262
+ media_type=content_type)
263
+
264
+ except httpx.RequestError as e:
265
+ log.error("<<< ERROR MODEL=%(model)s REASON=%(reason)s",
266
+ {"model": model_name, "reason": str(e)})
267
+ raise HTTPException(502, f"Upstream request failed: {e}")
268
+
269
+ return app
@@ -0,0 +1,166 @@
1
+ Metadata-Version: 2.4
2
+ Name: llmproxy-withlog
3
+ Version: 1.0.0
4
+ Summary: OpenAI-compatible LLM reverse proxy with real-time conversation analytics
5
+ Author-email: william <william@example.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/hkjgvugkjh/llmproxy
8
+ Project-URL: Repository, https://github.com/hkjgvugkjh/llmproxy
9
+ Project-URL: Issues, https://github.com/hkjgvugkjh/llmproxy/issues
10
+ Keywords: llm,proxy,openai,api,analytics,chatbot
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Internet :: WWW/HTTP :: HTTP Servers
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: fastapi>=0.110
24
+ Requires-Dist: httpx>=0.27
25
+ Requires-Dist: uvicorn[standard]>=0.29
26
+ Provides-Extra: dev
27
+ Requires-Dist: build>=1.0; extra == "dev"
28
+ Requires-Dist: twine>=5.0; extra == "dev"
29
+
30
+ # LLM Proxy
31
+
32
+ OpenAI 兼容的 LLM 反向代理,支持多模型路由、请求日志记录和实时对话分析报告。
33
+
34
+ ## 功能特性
35
+
36
+ - **多模型路由**:通过 `config.ini` 配置多个上游 LLM 客户端,统一用 OpenAI 格式调用
37
+ - **请求日志**:每次请求/响应自动记录到 `logs/YYYYMMDD.log`
38
+ - **实时分析报告**:每个请求完成后自动追加到 `reports/YYYYMMDD-analysis.txt`,包含:
39
+ - 用户输入(仅 user 角色)
40
+ - LLM 完整输出(流式响应自动合并 SSE chunk)
41
+ - 工具调用详情
42
+ - Token 用量
43
+ - **流式响应支持**:边转发边收集,不改变流式行为
44
+ - **并发安全**:多请求并发写入报告时自动加锁
45
+ - **独立日志分析工具**:`analyze_logs.py` 支持对历史日志做离线分析和统计
46
+
47
+ ## 环境要求
48
+
49
+ - Python 3.10+
50
+ - 依赖:`fastapi` `httpx` `uvicorn`
51
+
52
+ ## 快速开始
53
+
54
+ ### 1. 安装依赖
55
+
56
+ ```bash
57
+ pip install -r requirements.txt
58
+ ```
59
+
60
+ ### 2. 配置
61
+
62
+ 编辑 `config.ini`,填入你的上游 LLM 信息:
63
+
64
+ ```ini
65
+ [models]
66
+ my-model = YOUR_API_KEY|https://api.openai.com/v1|gpt-4o
67
+
68
+ [proxy]
69
+ host = 0.0.0.0
70
+ port = 8000
71
+
72
+ [auth]
73
+ proxy_api_key = my-secret-key # 可选,留空不校验
74
+ ```
75
+
76
+ ### 3. 启动
77
+
78
+ ```bash
79
+ # 前台启动
80
+ python3 llm_proxy.py
81
+
82
+ # 后台启动
83
+ ./start.sh
84
+
85
+ # systemd 服务
86
+ sudo cp llmproxy.service /etc/systemd/system/
87
+ sudo systemctl enable --now llmproxy
88
+ ```
89
+
90
+ ### 4. 调用示例
91
+
92
+ ```bash
93
+ # 列出可用模型
94
+ curl -s http://localhost:8000/v1/models \
95
+ -H "Authorization: Bearer my-secret-key" | jq
96
+
97
+ # 对话
98
+ curl -s http://localhost:8000/v1/chat/completions \
99
+ -H "Authorization: Bearer my-secret-key" \
100
+ -H "Content-Type: application/json" \
101
+ -d '{"model":"my-model","messages":[{"role":"user","content":"你好"}]}'
102
+ ```
103
+
104
+ ## API 端点
105
+
106
+ | 方法 | 路径 | 说明 |
107
+ |------|------|------|
108
+ | GET | `/v1/version` | 版本信息 |
109
+ | GET | `/v1/props` | 配置信息(模型列表等) |
110
+ | GET | `/v1/models` | 模型列表(OpenAI 格式) |
111
+ | ALL | `/v1/{path}` | 透传到上游(支持 POST/GET/PUT/DELETE/PATCH) |
112
+
113
+ ## 文件结构
114
+
115
+ ```
116
+ llmproxy/
117
+ ├── llm_proxy.py # 主程序(代理 + 实时报告)
118
+ ├── analyze_logs.py # 离线日志分析工具
119
+ ├── daily_analyze.sh # 每日分析定时脚本
120
+ ├── config.ini # 配置文件(需自行填写)
121
+ ├── requirements.txt # Python 依赖
122
+ ├── start.sh # 后台启动脚本
123
+ ├── llmproxy.service # systemd 服务文件
124
+ ├── README.md # 本文件
125
+ ├── logs/ # 请求日志(自动生成)
126
+ │ └── YYYYMMDD.log
127
+ └── reports/ # 对话分析报告(自动生成)
128
+ └── YYYYMMDD-analysis.txt
129
+ ```
130
+
131
+ ## 日志分析工具
132
+
133
+ `analyze_logs.py` 支持对历史日志做离线分析:
134
+
135
+ ```bash
136
+ # 分析指定文件,输出文本报告
137
+ python3 analyze_logs.py logs/20260523.log --only-user
138
+
139
+ # 输出 JSON 格式
140
+ python3 analyze_logs.py logs/20260523.log --format json -o result.json
141
+
142
+ # 仅统计信息
143
+ python3 analyze_logs.py logs/20260523.log --stats
144
+
145
+ # 分析所有日志
146
+ python3 analyze_logs.py --all --stats
147
+ ```
148
+
149
+ ## 报告格式示例
150
+
151
+ ```
152
+ ──────────────────────────────────────────────────────────────────────
153
+ 时间: 2026-05-23 15:00:00 模型: my-model 状态: 200 finish: stop
154
+
155
+ [用户输入]
156
+ 你好,请介绍一下你自己
157
+
158
+ [LLM 输出]
159
+ 你好!我是一个 AI 助手,可以帮你完成各种任务...
160
+
161
+ [Token] prompt=128 completion=64 total=192
162
+ ```
163
+
164
+ ## License
165
+
166
+ MIT
@@ -0,0 +1,10 @@
1
+ llmproxy/__init__.py,sha256=mQU3InoddrvbgN25ZNipN7M_tQgBtECwmor4fWjmzDE,103
2
+ llmproxy/__main__.py,sha256=J4sjpF3UFAL_-7fnknWRrQxqwBYMK3WpWqeHt5KqiTY,105
3
+ llmproxy/analyzer.py,sha256=i80cxKAVvmXCY0v5EgA_ZLn6_hLqoH99RgBixoE04rE,12161
4
+ llmproxy/cli.py,sha256=bgLndus-fhYxMyR8iJa5Ym3SAmN9S_S48fUZFe90eJ4,1012
5
+ llmproxy/server.py,sha256=GvQDQ9aamatOYx3m5n8G4-Okq68wRd5ygkzn7P1XXCQ,11223
6
+ llmproxy_withlog-1.0.0.dist-info/METADATA,sha256=e88LjkA-g6Ag416wvo4Im1gQvrHkK_Id1Sid-0VThmM,4915
7
+ llmproxy_withlog-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
8
+ llmproxy_withlog-1.0.0.dist-info/entry_points.txt,sha256=fDGSPH4tcc0o-IfOe0HzNxvNKblMDL5C-87MTsJtJdw,47
9
+ llmproxy_withlog-1.0.0.dist-info/top_level.txt,sha256=4cSfjBrXRgldvKNBqhAGPfmwvXGCrIag84jA8yzUars,9
10
+ llmproxy_withlog-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ llmproxy = llmproxy.cli:main
@@ -0,0 +1 @@
1
+ llmproxy