autoglm-gui 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
AutoGLM_GUI/server.py CHANGED
@@ -1,620 +1,5 @@
1
1
  """AutoGLM-GUI Backend API Server."""
2
2
 
3
- import asyncio
4
- import json
5
- import os
6
- from importlib.metadata import version as get_version
7
- from importlib.resources import files
8
- from pathlib import Path
3
+ from AutoGLM_GUI.api import app
9
4
 
10
- from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
11
- from fastapi.middleware.cors import CORSMiddleware
12
- from fastapi.responses import FileResponse, StreamingResponse
13
- from fastapi.staticfiles import StaticFiles
14
- from phone_agent import PhoneAgent
15
- from phone_agent.agent import AgentConfig
16
- from phone_agent.model import ModelConfig
17
- from pydantic import BaseModel, Field
18
-
19
- from AutoGLM_GUI.adb_plus import capture_screenshot
20
- from AutoGLM_GUI.scrcpy_stream import ScrcpyStreamer
21
-
22
- # 全局 scrcpy streamer 实例和锁
23
- scrcpy_streamer: ScrcpyStreamer | None = None
24
- scrcpy_lock = asyncio.Lock()
25
-
26
- # 获取包版本号
27
- try:
28
- __version__ = get_version("autoglm-gui")
29
- except Exception:
30
- __version__ = "dev"
31
-
32
- app = FastAPI(title="AutoGLM-GUI API", version=__version__)
33
-
34
- # CORS 配置 (开发环境需要)
35
- app.add_middleware(
36
- CORSMiddleware,
37
- allow_origins=["http://localhost:3000"],
38
- allow_credentials=True,
39
- allow_methods=["*"],
40
- allow_headers=["*"],
41
- )
42
-
43
- # 全局单例 agent
44
- agent: PhoneAgent | None = None
45
- last_model_config: ModelConfig | None = None
46
- last_agent_config: AgentConfig | None = None
47
-
48
- # 默认配置 (优先从环境变量读取,支持 reload 模式)
49
- DEFAULT_BASE_URL: str = os.getenv("AUTOGLM_BASE_URL", "")
50
- DEFAULT_MODEL_NAME: str = os.getenv("AUTOGLM_MODEL_NAME", "autoglm-phone-9b")
51
- DEFAULT_API_KEY: str = os.getenv("AUTOGLM_API_KEY", "EMPTY")
52
-
53
-
54
- def _non_blocking_takeover(message: str) -> None:
55
- """Log takeover requests without blocking for console input."""
56
- print(f"[Takeover] {message}")
57
-
58
-
59
- # 请求/响应模型
60
- class APIModelConfig(BaseModel):
61
- base_url: str | None = None
62
- api_key: str | None = None
63
- model_name: str | None = None
64
- max_tokens: int = 3000
65
- temperature: float = 0.0
66
- top_p: float = 0.85
67
- frequency_penalty: float = 0.2
68
-
69
-
70
- class APIAgentConfig(BaseModel):
71
- max_steps: int = 100
72
- device_id: str | None = None
73
- lang: str = "cn"
74
- system_prompt: str | None = None
75
- verbose: bool = True
76
-
77
-
78
- class InitRequest(BaseModel):
79
- model: APIModelConfig | None = Field(default=None, alias="model_config")
80
- agent: APIAgentConfig | None = Field(default=None, alias="agent_config")
81
-
82
-
83
- class ChatRequest(BaseModel):
84
- message: str
85
-
86
-
87
- class ChatResponse(BaseModel):
88
- result: str
89
- steps: int
90
- success: bool
91
-
92
-
93
- class StatusResponse(BaseModel):
94
- version: str
95
- initialized: bool
96
- step_count: int
97
-
98
-
99
- class ScreenshotRequest(BaseModel):
100
- device_id: str | None = None
101
-
102
-
103
- class ScreenshotResponse(BaseModel):
104
- success: bool
105
- image: str # base64 encoded PNG
106
- width: int
107
- height: int
108
- is_sensitive: bool
109
- error: str | None = None
110
-
111
-
112
- class TapRequest(BaseModel):
113
- x: int
114
- y: int
115
- device_id: str | None = None
116
- delay: float = 0.0
117
-
118
-
119
- class TapResponse(BaseModel):
120
- success: bool
121
- error: str | None = None
122
-
123
-
124
- class SwipeRequest(BaseModel):
125
- start_x: int
126
- start_y: int
127
- end_x: int
128
- end_y: int
129
- duration_ms: int | None = None
130
- device_id: str | None = None
131
- delay: float = 0.0
132
-
133
-
134
- class SwipeResponse(BaseModel):
135
- success: bool
136
- error: str | None = None
137
-
138
-
139
- class TouchDownRequest(BaseModel):
140
- x: int
141
- y: int
142
- device_id: str | None = None
143
- delay: float = 0.0
144
-
145
-
146
- class TouchDownResponse(BaseModel):
147
- success: bool
148
- error: str | None = None
149
-
150
-
151
- class TouchMoveRequest(BaseModel):
152
- x: int
153
- y: int
154
- device_id: str | None = None
155
- delay: float = 0.0
156
-
157
-
158
- class TouchMoveResponse(BaseModel):
159
- success: bool
160
- error: str | None = None
161
-
162
-
163
- class TouchUpRequest(BaseModel):
164
- x: int
165
- y: int
166
- device_id: str | None = None
167
- delay: float = 0.0
168
-
169
-
170
- class TouchUpResponse(BaseModel):
171
- success: bool
172
- error: str | None = None
173
-
174
-
175
- # API 端点
176
- @app.post("/api/init")
177
- def init_agent(request: InitRequest) -> dict:
178
- """初始化 PhoneAgent。"""
179
- global agent, last_model_config, last_agent_config
180
-
181
- # 提取配置或使用空对象
182
- req_model_config = request.model or APIModelConfig()
183
- req_agent_config = request.agent or APIAgentConfig()
184
-
185
- # 使用请求参数或默认值
186
- base_url = req_model_config.base_url or DEFAULT_BASE_URL
187
- api_key = req_model_config.api_key or DEFAULT_API_KEY
188
- model_name = req_model_config.model_name or DEFAULT_MODEL_NAME
189
-
190
- if not base_url:
191
- raise HTTPException(
192
- status_code=400, detail="base_url is required (in model_config or env)"
193
- )
194
-
195
- model_config = ModelConfig(
196
- base_url=base_url,
197
- api_key=api_key,
198
- model_name=model_name,
199
- max_tokens=req_model_config.max_tokens,
200
- temperature=req_model_config.temperature,
201
- top_p=req_model_config.top_p,
202
- frequency_penalty=req_model_config.frequency_penalty,
203
- )
204
-
205
- agent_config = AgentConfig(
206
- max_steps=req_agent_config.max_steps,
207
- device_id=req_agent_config.device_id,
208
- lang=req_agent_config.lang,
209
- system_prompt=req_agent_config.system_prompt,
210
- verbose=req_agent_config.verbose,
211
- )
212
-
213
- agent = PhoneAgent(
214
- model_config=model_config,
215
- agent_config=agent_config,
216
- takeover_callback=_non_blocking_takeover,
217
- )
218
-
219
- # 记录最新配置,便于 reset 时自动重建
220
- last_model_config = model_config
221
- last_agent_config = agent_config
222
-
223
- return {"success": True, "message": "Agent initialized"}
224
-
225
-
226
- @app.post("/api/chat", response_model=ChatResponse)
227
- def chat(request: ChatRequest) -> ChatResponse:
228
- """发送任务给 Agent 并执行。"""
229
- global agent
230
-
231
- if agent is None:
232
- raise HTTPException(
233
- status_code=400, detail="Agent not initialized. Call /api/init first."
234
- )
235
-
236
- try:
237
- result = agent.run(request.message)
238
- steps = agent.step_count
239
- agent.reset()
240
-
241
- return ChatResponse(result=result, steps=steps, success=True)
242
- except Exception as e:
243
- return ChatResponse(result=str(e), steps=0, success=False)
244
-
245
-
246
- @app.post("/api/chat/stream")
247
- def chat_stream(request: ChatRequest):
248
- """发送任务给 Agent 并实时推送执行进度(SSE)。"""
249
- global agent
250
-
251
- if agent is None:
252
- raise HTTPException(
253
- status_code=400, detail="Agent not initialized. Call /api/init first."
254
- )
255
-
256
- def event_generator():
257
- """SSE 事件生成器"""
258
- try:
259
- # 使用 step() 逐步执行
260
- step_result = agent.step(request.message)
261
- while True:
262
- # 发送 step 事件
263
- event_data = {
264
- "type": "step",
265
- "step": agent.step_count,
266
- "thinking": step_result.thinking,
267
- "action": step_result.action,
268
- "success": step_result.success,
269
- "finished": step_result.finished,
270
- }
271
-
272
- yield "event: step\n"
273
- yield f"data: {json.dumps(event_data, ensure_ascii=False)}\n\n"
274
-
275
- if step_result.finished:
276
- done_data = {
277
- "type": "done",
278
- "message": step_result.message,
279
- "steps": agent.step_count,
280
- "success": step_result.success,
281
- }
282
- yield "event: done\n"
283
- yield f"data: {json.dumps(done_data, ensure_ascii=False)}\n\n"
284
- break
285
-
286
- if agent.step_count >= agent.agent_config.max_steps:
287
- done_data = {
288
- "type": "done",
289
- "message": "Max steps reached",
290
- "steps": agent.step_count,
291
- "success": step_result.success,
292
- }
293
- yield "event: done\n"
294
- yield f"data: {json.dumps(done_data, ensure_ascii=False)}\n\n"
295
- break
296
-
297
- step_result = agent.step()
298
-
299
- # 任务完成后重置
300
- agent.reset()
301
-
302
- except Exception as e:
303
- # 发送错误事件
304
- error_data = {
305
- "type": "error",
306
- "message": str(e),
307
- }
308
- yield "event: error\n"
309
- yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
310
-
311
- return StreamingResponse(
312
- event_generator(),
313
- media_type="text/event-stream",
314
- headers={
315
- "Cache-Control": "no-cache",
316
- "Connection": "keep-alive",
317
- "X-Accel-Buffering": "no", # 禁用 nginx 缓冲
318
- },
319
- )
320
-
321
-
322
- @app.get("/api/status", response_model=StatusResponse)
323
- def get_status() -> StatusResponse:
324
- """获取 Agent 状态和版本信息。"""
325
- global agent
326
-
327
- return StatusResponse(
328
- version=__version__,
329
- initialized=agent is not None,
330
- step_count=agent.step_count if agent else 0,
331
- )
332
-
333
-
334
- @app.post("/api/reset")
335
- def reset_agent() -> dict:
336
- """重置 Agent 状态。"""
337
- global agent, last_model_config, last_agent_config
338
-
339
- reinitialized = False
340
-
341
- # 先清空当前实例
342
- if agent is not None:
343
- agent.reset()
344
-
345
- # 如有历史配置,自动重建实例;否则置空
346
- if last_model_config and last_agent_config:
347
- agent = PhoneAgent(
348
- model_config=last_model_config,
349
- agent_config=last_agent_config,
350
- takeover_callback=_non_blocking_takeover,
351
- )
352
- reinitialized = True
353
- else:
354
- agent = None
355
-
356
- return {
357
- "success": True,
358
- "message": "Agent reset",
359
- "reinitialized": reinitialized,
360
- }
361
-
362
-
363
- @app.post("/api/video/reset")
364
- async def reset_video_stream() -> dict:
365
- """Reset video stream (cleanup scrcpy server)."""
366
- global scrcpy_streamer
367
-
368
- async with scrcpy_lock:
369
- if scrcpy_streamer is not None:
370
- print("[video/reset] Stopping existing streamer...")
371
- scrcpy_streamer.stop()
372
- scrcpy_streamer = None
373
- print("[video/reset] Streamer reset complete")
374
- return {"success": True, "message": "Video stream reset"}
375
- else:
376
- return {"success": True, "message": "No active video stream"}
377
-
378
-
379
- @app.post("/api/screenshot", response_model=ScreenshotResponse)
380
- def take_screenshot(request: ScreenshotRequest) -> ScreenshotResponse:
381
- """获取设备截图。此操作无副作用,不影响 PhoneAgent 运行。"""
382
- try:
383
- screenshot = capture_screenshot(device_id=request.device_id)
384
- return ScreenshotResponse(
385
- success=True,
386
- image=screenshot.base64_data,
387
- width=screenshot.width,
388
- height=screenshot.height,
389
- is_sensitive=screenshot.is_sensitive,
390
- )
391
- except Exception as e:
392
- return ScreenshotResponse(
393
- success=False,
394
- image="",
395
- width=0,
396
- height=0,
397
- is_sensitive=False,
398
- error=str(e),
399
- )
400
-
401
-
402
- @app.post("/api/control/tap", response_model=TapResponse)
403
- def control_tap(request: TapRequest) -> TapResponse:
404
- """Execute tap at specified device coordinates."""
405
- try:
406
- from phone_agent.adb import tap
407
-
408
- tap(
409
- x=request.x,
410
- y=request.y,
411
- device_id=request.device_id,
412
- delay=request.delay
413
- )
414
-
415
- return TapResponse(success=True)
416
- except Exception as e:
417
- return TapResponse(success=False, error=str(e))
418
-
419
-
420
- @app.post("/api/control/swipe", response_model=SwipeResponse)
421
- def control_swipe(request: SwipeRequest) -> SwipeResponse:
422
- """Execute swipe from start to end coordinates."""
423
- try:
424
- from phone_agent.adb import swipe
425
-
426
- swipe(
427
- start_x=request.start_x,
428
- start_y=request.start_y,
429
- end_x=request.end_x,
430
- end_y=request.end_y,
431
- duration_ms=request.duration_ms,
432
- device_id=request.device_id,
433
- delay=request.delay
434
- )
435
-
436
- return SwipeResponse(success=True)
437
- except Exception as e:
438
- return SwipeResponse(success=False, error=str(e))
439
-
440
-
441
- @app.post("/api/control/touch/down", response_model=TouchDownResponse)
442
- def control_touch_down(request: TouchDownRequest) -> TouchDownResponse:
443
- """Send touch DOWN event at specified device coordinates."""
444
- try:
445
- from AutoGLM_GUI.adb_plus import touch_down
446
-
447
- touch_down(
448
- x=request.x,
449
- y=request.y,
450
- device_id=request.device_id,
451
- delay=request.delay
452
- )
453
-
454
- return TouchDownResponse(success=True)
455
- except Exception as e:
456
- return TouchDownResponse(success=False, error=str(e))
457
-
458
-
459
- @app.post("/api/control/touch/move", response_model=TouchMoveResponse)
460
- def control_touch_move(request: TouchMoveRequest) -> TouchMoveResponse:
461
- """Send touch MOVE event at specified device coordinates."""
462
- try:
463
- from AutoGLM_GUI.adb_plus import touch_move
464
-
465
- touch_move(
466
- x=request.x,
467
- y=request.y,
468
- device_id=request.device_id,
469
- delay=request.delay
470
- )
471
-
472
- return TouchMoveResponse(success=True)
473
- except Exception as e:
474
- return TouchMoveResponse(success=False, error=str(e))
475
-
476
-
477
- @app.post("/api/control/touch/up", response_model=TouchUpResponse)
478
- def control_touch_up(request: TouchUpRequest) -> TouchUpResponse:
479
- """Send touch UP event at specified device coordinates."""
480
- try:
481
- from AutoGLM_GUI.adb_plus import touch_up
482
-
483
- touch_up(
484
- x=request.x,
485
- y=request.y,
486
- device_id=request.device_id,
487
- delay=request.delay
488
- )
489
-
490
- return TouchUpResponse(success=True)
491
- except Exception as e:
492
- return TouchUpResponse(success=False, error=str(e))
493
-
494
-
495
- @app.websocket("/api/video/stream")
496
- async def video_stream_ws(websocket: WebSocket):
497
- """Stream real-time H.264 video from scrcpy server via WebSocket."""
498
- global scrcpy_streamer
499
-
500
- await websocket.accept()
501
- print("[video/stream] WebSocket connection accepted")
502
-
503
- # Use global lock to prevent concurrent streamer initialization
504
- async with scrcpy_lock:
505
- # Reuse existing streamer if available
506
- if scrcpy_streamer is None:
507
- print("[video/stream] Creating new streamer instance...")
508
- scrcpy_streamer = ScrcpyStreamer(max_size=1280, bit_rate=4_000_000)
509
-
510
- try:
511
- print("[video/stream] Starting scrcpy server...")
512
- await scrcpy_streamer.start()
513
- print("[video/stream] Scrcpy server started successfully")
514
- except Exception as e:
515
- import traceback
516
- print(f"[video/stream] Failed to start streamer: {e}")
517
- print(f"[video/stream] Traceback:\n{traceback.format_exc()}")
518
- scrcpy_streamer.stop()
519
- scrcpy_streamer = None
520
- try:
521
- await websocket.send_json({"error": str(e)})
522
- except Exception:
523
- pass
524
- return
525
- else:
526
- print("[video/stream] Reusing existing streamer instance")
527
-
528
- # Send ONLY SPS/PPS (not IDR) to initialize decoder
529
- # Client will then wait for next live IDR frame (max 1s with i-frame-interval=1)
530
- # This avoids issues with potentially corrupted cached IDR frames
531
- if scrcpy_streamer.cached_sps and scrcpy_streamer.cached_pps:
532
- init_data = scrcpy_streamer.cached_sps + scrcpy_streamer.cached_pps
533
- await websocket.send_bytes(init_data)
534
- print(f"[video/stream] ✓ Sent SPS/PPS ({len(init_data)} bytes), client will wait for live IDR")
535
- else:
536
- print("[video/stream] ⚠ Warning: No cached SPS/PPS available")
537
-
538
- # Stream H.264 data to client
539
- stream_failed = False
540
- try:
541
- chunk_count = 0
542
- while True:
543
- try:
544
- h264_chunk = await scrcpy_streamer.read_h264_chunk()
545
- await websocket.send_bytes(h264_chunk)
546
- chunk_count += 1
547
- if chunk_count % 100 == 0:
548
- print(f"[video/stream] Sent {chunk_count} chunks")
549
- except ConnectionError as e:
550
- print(f"[video/stream] Connection error after {chunk_count} chunks: {e}")
551
- stream_failed = True
552
- # Don't send error if WebSocket already disconnected
553
- try:
554
- await websocket.send_json({"error": f"Stream error: {str(e)}"})
555
- except Exception:
556
- pass
557
- break
558
-
559
- except WebSocketDisconnect:
560
- print("[video/stream] Client disconnected")
561
- except Exception as e:
562
- import traceback
563
- print(f"[video/stream] Error: {e}")
564
- print(f"[video/stream] Traceback:\n{traceback.format_exc()}")
565
- stream_failed = True
566
- try:
567
- await websocket.send_json({"error": str(e)})
568
- except Exception:
569
- pass
570
-
571
- # Reset global streamer if stream failed
572
- if stream_failed:
573
- async with scrcpy_lock:
574
- print("[video/stream] Stream failed, resetting global streamer...")
575
- if scrcpy_streamer is not None:
576
- scrcpy_streamer.stop()
577
- scrcpy_streamer = None
578
-
579
- print("[video/stream] Client stream ended")
580
-
581
-
582
- # 静态文件托管 - 使用包内资源定位
583
- def _get_static_dir() -> Path | None:
584
- """获取静态文件目录路径。"""
585
- try:
586
- # 尝试从包内资源获取
587
- static_dir = files("AutoGLM_GUI").joinpath("static")
588
- if hasattr(static_dir, "_path"):
589
- # Traversable 对象
590
- path = Path(str(static_dir))
591
- if path.exists():
592
- return path
593
- # 直接转换为 Path
594
- path = Path(str(static_dir))
595
- if path.exists():
596
- return path
597
- except (TypeError, FileNotFoundError):
598
- pass
599
-
600
- return None
601
-
602
-
603
- STATIC_DIR = _get_static_dir()
604
-
605
- if STATIC_DIR is not None and STATIC_DIR.exists():
606
- # 托管静态资源
607
- assets_dir = STATIC_DIR / "assets"
608
- if assets_dir.exists():
609
- app.mount("/assets", StaticFiles(directory=assets_dir), name="assets")
610
-
611
- # 所有非 API 路由返回 index.html (支持前端路由)
612
- @app.get("/{full_path:path}")
613
- async def serve_spa(full_path: str) -> FileResponse:
614
- """Serve the SPA for all non-API routes."""
615
- # 如果请求的是具体文件且存在,则返回该文件
616
- file_path = STATIC_DIR / full_path
617
- if file_path.is_file():
618
- return FileResponse(file_path)
619
- # 否则返回 index.html (支持前端路由)
620
- return FileResponse(STATIC_DIR / "index.html")
5
+ __all__ = ["app"]
AutoGLM_GUI/state.py ADDED
@@ -0,0 +1,33 @@
1
+ """Shared runtime state for the AutoGLM-GUI API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import os
7
+ from typing import TYPE_CHECKING
8
+
9
+ from phone_agent.agent import AgentConfig
10
+ from phone_agent.model import ModelConfig
11
+
12
+ if TYPE_CHECKING:
13
+ from AutoGLM_GUI.scrcpy_stream import ScrcpyStreamer
14
+ from phone_agent import PhoneAgent
15
+
16
+ # Agent instances keyed by device_id
17
+ agents: dict[str, "PhoneAgent"] = {}
18
+ # Cached configs to rebuild agents on reset
19
+ agent_configs: dict[str, tuple[ModelConfig, AgentConfig]] = {}
20
+
21
+ # Scrcpy streaming per device
22
+ scrcpy_streamers: dict[str, "ScrcpyStreamer"] = {}
23
+ scrcpy_locks: dict[str, asyncio.Lock] = {}
24
+
25
+ # Defaults pulled from env (used when request omits config)
26
+ DEFAULT_BASE_URL: str = os.getenv("AUTOGLM_BASE_URL", "")
27
+ DEFAULT_MODEL_NAME: str = os.getenv("AUTOGLM_MODEL_NAME", "autoglm-phone-9b")
28
+ DEFAULT_API_KEY: str = os.getenv("AUTOGLM_API_KEY", "EMPTY")
29
+
30
+
31
+ def non_blocking_takeover(message: str) -> None:
32
+ """Log takeover requests without blocking for console input."""
33
+ print(f"[Takeover] {message}")
@@ -1 +1 @@
1
- import{j as o}from"./index-BynheeWl.js";function t(){return o.jsx("div",{className:"p-2",children:o.jsx("h3",{children:"About"})})}export{t as component};
1
+ import{j as o}from"./index-C8KPPfxe.js";function t(){return o.jsx("div",{className:"p-2",children:o.jsx("h3",{children:"About"})})}export{t as component};