@hienlh/ppm 0.13.95 → 0.13.97

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/assets/skills/ppm/SKILL.md +1 -1
  3. package/assets/skills/ppm/references/http-api.md +2 -1
  4. package/dist/web/assets/{audio-preview-_926SILu.js → audio-preview-C5XtLYr0.js} +1 -1
  5. package/dist/web/assets/chat-tab-Crh2a5WT.js +16 -0
  6. package/dist/web/assets/{code-editor-CgX34_CM.js → code-editor-D0n5yRzn.js} +2 -2
  7. package/dist/web/assets/{conflict-editor-4d7ifSFr.js → conflict-editor-C7tPFwQu.js} +1 -1
  8. package/dist/web/assets/{database-viewer-BRGf672-.js → database-viewer-CENJQA63.js} +1 -1
  9. package/dist/web/assets/{diff-viewer-C8Dx_mMP.js → diff-viewer-DYskJYPt.js} +1 -1
  10. package/dist/web/assets/{docx-preview-CTC4n52W.js → docx-preview-Cs1Vck_b.js} +1 -1
  11. package/dist/web/assets/{extension-webview-C2-MlEV1.js → extension-webview-DDNsAryv.js} +1 -1
  12. package/dist/web/assets/{git-log-panel-D6XL2Qfe.js → git-log-panel-Bw50iGkP.js} +1 -1
  13. package/dist/web/assets/{glide-data-grid-W196CMwG.js → glide-data-grid-D-kV0skS.js} +1 -1
  14. package/dist/web/assets/{image-preview-B4vAybDG.js → image-preview-ICmsfJXP.js} +1 -1
  15. package/dist/web/assets/index-D7YWNgnj.css +2 -0
  16. package/dist/web/assets/{index-B8jn9Try.js → index-lVDR594A.js} +3 -3
  17. package/dist/web/assets/keybindings-store-zLledTJ_.js +1 -0
  18. package/dist/web/assets/{markdown-renderer-tbhXgrmJ.js → markdown-renderer-VOyp6B1p.js} +1 -1
  19. package/dist/web/assets/notification-store-XwGVhPdW.js +1 -0
  20. package/dist/web/assets/{pdf-preview-CEE9y9ai.js → pdf-preview-DTlEFagS.js} +1 -1
  21. package/dist/web/assets/{port-forwarding-tab-CL03gwO3.js → port-forwarding-tab-Coe4rUGI.js} +1 -1
  22. package/dist/web/assets/{postgres-viewer-Cca5RWLN.js → postgres-viewer-C9G-BZE8.js} +1 -1
  23. package/dist/web/assets/{settings-tab-BIhxSzkH.js → settings-tab-BTEIHM07.js} +1 -1
  24. package/dist/web/assets/{sql-query-editor-CMXFZyid.js → sql-query-editor-COQcgsYM.js} +1 -1
  25. package/dist/web/assets/{sqlite-viewer-C43nch9A.js → sqlite-viewer-D0pkAQQa.js} +1 -1
  26. package/dist/web/assets/{system-monitor-tab-DR3Ny9fs.js → system-monitor-tab-VgYnDn6v.js} +1 -1
  27. package/dist/web/assets/{terminal-tab-BKZgoFBm.js → terminal-tab-D08UOpkI.js} +1 -1
  28. package/dist/web/assets/{video-preview-2xKLGBUs.js → video-preview-D5ufy0_E.js} +1 -1
  29. package/dist/web/index.html +2 -2
  30. package/dist/web/sw.js +1 -1
  31. package/docs/journals/260602-proxy-request-logging-stats.md +86 -0
  32. package/docs/system-architecture.md +1 -1
  33. package/package.json +1 -1
  34. package/src/providers/claude-agent-sdk.ts +84 -2
  35. package/src/server/index.ts +136 -9
  36. package/src/server/routes/proxy.ts +25 -3
  37. package/src/server/routes/upgrade.ts +2 -1
  38. package/src/services/account-selector.service.ts +12 -0
  39. package/src/services/db.service.ts +83 -1
  40. package/src/services/proxy.service.ts +74 -8
  41. package/src/services/supervisor.ts +102 -48
  42. package/src/web/components/chat/chat-tab.tsx +4 -0
  43. package/src/web/components/chat/message-list.tsx +44 -3
  44. package/src/web/hooks/use-chat.ts +16 -0
  45. package/dist/web/assets/chat-tab-CZ4JB8bF.js +0 -16
  46. package/dist/web/assets/index-CKeYG-TK.css +0 -2
  47. package/dist/web/assets/keybindings-store-D3ajyN3W.js +0 -1
  48. package/dist/web/assets/notification-store-CuF7CL5K.js +0 -1
@@ -0,0 +1,86 @@
1
+ # Proxy Request Logging & Stats
2
+
3
+ **Date**: 2026-06-02
4
+ **Severity**: High
5
+ **Component**: OAuth Proxy Bridge, SQLite Config
6
+ **Status**: Resolved
7
+ **Commit**: d5029ab
8
+
9
+ ## What Happened
10
+
11
+ Overnight, runaway Python benchmark scripts from vn-legal-rag drained the 5-hour quota of all 4 Claude accounts via PPM's proxy with ZERO traceability. OAuth proxy requests routed through the SDK bridge had no persistent logging — only ephemeral console output. No way to audit which caller, how many requests, or which accounts were consumed.
12
+
13
+ ## The Brutal Truth
14
+
15
+ This is infuriating because we had no observability into what broke our quota. A user can accidentally (or maliciously) drain accounts through the proxy and we'd only notice the dead quota. Multi-tenant proxy with no audit trail is irresponsible — shipping without this was a blind spot.
16
+
17
+ ## Technical Details
18
+
19
+ **Schema**: Migration v28 in `src/services/db.service.ts` creates `proxy_requests` table:
20
+ ```sql
21
+ CREATE TABLE proxy_requests (
22
+ id INTEGER PRIMARY KEY,
23
+ endpoint TEXT NOT NULL,
24
+ model TEXT,
25
+ account_id TEXT,
26
+ account_label TEXT,
27
+ caller_ip TEXT,
28
+ caller_ua TEXT,
29
+ status TEXT NOT NULL, -- 'success'|'error'|'rate_limited'
30
+ duration_ms INTEGER,
31
+ created_at TEXT DEFAULT CURRENT_TIMESTAMP
32
+ )
33
+ ```
34
+
35
+ **Logging coverage** (all 3 proxy paths):
36
+ - `proxy.service.ts` intercepts every request in `forward()` / `forwardOpenAi()` / `forwardDirect()` with `performance.now()` timing
37
+ - Early-return cases (no account) still logged with status + duration
38
+ - Try/catch wraps `insertProxyRequest()` internally — DB write failure never breaks a proxy request
39
+
40
+ **Retrieval**:
41
+ - `GET /proxy/stats` (proxy auth required) returns {lastHour, last24h, total, requestCount}
42
+ - `getProxyStats()` service method for programmatic access
43
+
44
+ **Retention**:
45
+ - 30-day cleanup job runs on server startup + daily setInterval
46
+ - `cleanupOldProxyRequests(days=30)` removes expired rows
47
+
48
+ ## What We Tried
49
+
50
+ Initial code review flagged a critical issue: unwrapped `throw` in the logging path could break a previously-working request AND trigger double-insert in the catch block. Fixed by wrapping `insertProxyRequest()` in an internal try/catch so logging failure is safe.
51
+
52
+ Also applied: cosmetic accuracy update to `CURRENT_SCHEMA_VERSION` (26→28), which was out of sync with actual migration count. Dead constant, zero functional impact, but worth fixing for readability.
53
+
54
+ ## Root Cause Analysis
55
+
56
+ Multi-tenant proxy with opaque requests is a liability without persistent audit logs. We shipped observability-blind and only noticed the impact after quota exhaustion. The runaway script was the catalyst, but the real failure was: no way to answer "who used what" or "which account did this drain?"
57
+
58
+ Subagent (docs-manager) claimed `CURRENT_SCHEMA_VERSION` was a "critical bug preventing table creation" — verified FALSE against actual code. Migrations key off `PRAGMA user_version`, not the constant. Lesson: don't trust subagent severity framing without code verification.
59
+
60
+ ## Lessons Learned
61
+
62
+ 1. **Observability is not optional for shared resource proxies.** Log at the service layer (not inside bridge files) — single DRY point covering all code paths.
63
+ 2. **Logging must be failure-safe.** DB write errors can NEVER break the request being logged. Wrap at the service layer and silently degrade.
64
+ 3. **Metadata-only logging respects privacy by design.** No message content, no tokens — forensic accountability, not surveillance.
65
+ 4. **Verify "critical bugs" from subagents.** Dead constants and unused variables aren't bugs. Check the actual code path before trusting severity claims.
66
+ 5. **Caller IP is advisory, not authoritative.** x-forwarded-for is spoofable without a trusted reverse proxy in front. Use for forensics, not access control.
67
+
68
+ ## Next Steps
69
+
70
+ 1. Monitor proxy stats for anomalies — set up alerts if request count spikes (owner: ops, timeline: this week)
71
+ 2. Document proxy auth/trust model (owner: tech lead, timeline: pending — currently assumes trusted reverse proxy context)
72
+ 3. Future: rotate daily stats to cold storage (SQLite → object store) for long-term audit trails (timeline: v0.15)
73
+
74
+ ## Unresolved Questions
75
+
76
+ - Is the proxy ever fronted by a trusted reverse proxy? If not, caller_ip forensics are unreliable.
77
+ - Should proxy stats be exposed to non-admin callers (read-only dashboard)? Currently admin-only.
78
+ - Should we alert on quota drain events (e.g., 50+ requests in last 10 min)? Not implemented yet.
79
+
80
+ ---
81
+
82
+ **Files modified**: src/services/db.service.ts, src/services/proxy.service.ts, src/server/routes/proxy.ts, src/server/index.ts
83
+ **Tests**: 14/14 passing (tests/integration/proxy-requests-table.test.ts)
84
+ **Code review score**: 8/10 (approved)
85
+
86
+ **Status:** DONE
@@ -650,7 +650,7 @@ ppm jira track <issue-key> — Manually track ticket (insert res
650
650
  - Enforce security (no parent directory access)
651
651
 
652
652
  **Key Patterns:**
653
- - SQLite: WAL mode, foreign keys, lazy init, schema v19 (18 tables: config, connections, accounts, usage_history, session_logs, push_subscriptions, session_map, table_metadata, workspace_state, extension_storage, mcp_servers, clawbot_sessions, clawbot_memories, clawbot_paired_chats, jira_config, jira_watchers, jira_watch_results, bot_tasks)
653
+ - SQLite: WAL mode, foreign keys, lazy init, schema v28 (20+ tables: config, connections, accounts, usage_history, session_logs, push_subscriptions, session_map, table_metadata, workspace_state, extension_storage, mcp_servers, clawbot_sessions, clawbot_memories, clawbot_paired_chats, jira_config, jira_watchers, jira_watch_results, bot_tasks, proxy_requests, session_metadata)
654
654
  - Path validation: `projectPath/relativePath` only, reject `..`
655
655
  - Caching: Directory trees cached with TTL
656
656
  - Error handling: Descriptive messages (file not found, permission denied)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hienlh/ppm",
3
- "version": "0.13.95",
3
+ "version": "0.13.97",
4
4
  "description": "Personal Project Manager — mobile-first web IDE with AI assistance",
5
5
  "author": "hienlh",
6
6
  "license": "MIT",
@@ -77,6 +77,36 @@ function createMessageChannel(): {
77
77
  };
78
78
  }
79
79
 
80
+ /**
81
+ * Parse a hard usage/session-limit reset hint from SDK error text.
82
+ * Returns the human-readable reset text and a best-effort absolute timestamp,
83
+ * or null if no reset time is present (caller treats that as a transient rate limit).
84
+ *
85
+ * Examples it handles: "resets 10:10am", "resets at 3pm", "resets 10:10am (Asia/Saigon)".
86
+ */
87
+ function parseUsageLimitReset(text: string): { text?: string; atMs?: number } | null {
88
+ const m = /resets?\s+(?:at\s+)?(\d{1,2})(?::(\d{2}))?\s*(am|pm)?/i.exec(text);
89
+ if (!m) return null;
90
+ const rawText = m[0].replace(/^resets?\s+(?:at\s+)?/i, "").trim();
91
+
92
+ let hour = Number(m[1]);
93
+ const minute = m[2] ? Number(m[2]) : 0;
94
+ const ampm = m[3]?.toLowerCase();
95
+ if (Number.isNaN(hour) || hour > 23 || minute > 59) {
96
+ return { text: rawText || undefined };
97
+ }
98
+ if (ampm === "pm" && hour < 12) hour += 12;
99
+ if (ampm === "am" && hour === 12) hour = 0;
100
+
101
+ const now = new Date();
102
+ const reset = new Date(now);
103
+ reset.setHours(hour, minute, 0, 0);
104
+ // If the computed time already passed today, it must mean the next occurrence.
105
+ if (reset.getTime() <= now.getTime()) reset.setDate(reset.getDate() + 1);
106
+
107
+ return { text: rawText || undefined, atMs: reset.getTime() };
108
+ }
109
+
80
110
  /** Build a MessageParam with optional image content blocks */
81
111
  function buildMessageParam(
82
112
  text: string,
@@ -962,6 +992,8 @@ export class ClaudeAgentSdkProvider implements AIProvider {
962
992
  let rateLimitRetryCount = 0;
963
993
  let authRetryCount = 0;
964
994
  let hadAnyEvents = false;
995
+ // Accounts that hit a hard usage/session limit this turn — never retried again here.
996
+ const usageLimitedAccounts = new Set<string>();
965
997
  retryLoop: while (true) {
966
998
  // Reset streaming state on retry — clears stale content from failed attempts
967
999
  // (e.g. "Failed to authenticate. API Error: 401..." text that was already streamed)
@@ -1180,6 +1212,9 @@ export class ClaudeAgentSdkProvider implements AIProvider {
1180
1212
  if (!parentId && (msg as any).uuid) lastAssistantUuid = (msg as any).uuid;
1181
1213
  // SDK assistant messages can carry an error field for auth/billing/rate-limit failures
1182
1214
  let assistantError = (msg as any).error as string | undefined;
1215
+ // Human-readable reset time + parsed timestamp for a hard usage/session limit
1216
+ let usageLimitResetText: string | undefined;
1217
+ let usageLimitResetAtMs: number | undefined;
1183
1218
 
1184
1219
  // SDK sometimes returns auth errors as text content without setting error field.
1185
1220
  // Detect 401 pattern in text: "Failed to authenticate. API Error: 401 ..."
@@ -1189,8 +1224,19 @@ export class ClaudeAgentSdkProvider implements AIProvider {
1189
1224
  assistantError = "authentication_failed";
1190
1225
  console.warn(`[sdk] session=${sessionId} detected 401 in assistant text content — treating as auth error`);
1191
1226
  } else if (textContent && /hit your (?:[\w-]+\s+)*limit/i.test(textContent)) {
1192
- assistantError = "rate_limit";
1193
- console.warn(`[sdk] session=${sessionId} detected quota limit in assistant text content — treating as rate_limit`);
1227
+ // A hard usage/session limit carries a reset time ("...resets 10:10am").
1228
+ // Treat those as usage_limit (switch accounts, don't backoff-loop); only
1229
+ // wording without a reset hint falls through to transient rate_limit.
1230
+ const reset = parseUsageLimitReset(textContent);
1231
+ if (reset) {
1232
+ assistantError = "usage_limit";
1233
+ usageLimitResetText = reset.text;
1234
+ usageLimitResetAtMs = reset.atMs;
1235
+ console.warn(`[sdk] session=${sessionId} detected usage/session limit (resets ${reset.text ?? "?"}) — will switch account, no backoff loop`);
1236
+ } else {
1237
+ assistantError = "rate_limit";
1238
+ console.warn(`[sdk] session=${sessionId} detected quota limit in assistant text content — treating as rate_limit`);
1239
+ }
1194
1240
  } else if (textContent && /API Error:\s*5\d{2}\b/i.test(textContent)) {
1195
1241
  // 5xx (e.g. 529 Overloaded) — match the explicit "API Error: 5xx" text only.
1196
1242
  // Treat as server_error so it enters the retry branch and the raw error text is
@@ -1240,6 +1286,42 @@ export class ClaudeAgentSdkProvider implements AIProvider {
1240
1286
  break;
1241
1287
  }
1242
1288
 
1289
+ // Hard usage/session limit — never retry the same account (futile until reset).
1290
+ // Switch to a fresh account if one exists; otherwise stop with one clear error.
1291
+ if (assistantError === "usage_limit") {
1292
+ if (account) {
1293
+ usageLimitedAccounts.add(account.id);
1294
+ accountSelector.onUsageLimit(account.id, usageLimitResetAtMs);
1295
+ }
1296
+ const nextAccount = accountSelector.next(usageLimitedAccounts);
1297
+ if (nextAccount) {
1298
+ account = nextAccount;
1299
+ const label = nextAccount.label ?? nextAccount.email ?? "Unknown";
1300
+ console.warn(`[sdk] session=${sessionId} usage limit — switching to fresh account ${nextAccount.id} (${label}), no backoff`);
1301
+ yield { type: "account_retry" as const, reason: `Usage limit reached — switching account`, accountId: nextAccount.id, accountLabel: label };
1302
+ // Rebuild query with the fresh account env, no backoff delay.
1303
+ const retryU = buildRetryMsg();
1304
+ closeCurrentStream();
1305
+ const ulRetryEnv = this.buildQueryEnv(meta.projectPath, account);
1306
+ const { generator: ulRetryGen, controller: ulRetryCtrl } = createMessageChannel();
1307
+ ulRetryCtrl.push(retryU.msg);
1308
+ const retryOpts = { ...queryOptions, sessionId: undefined, resume: sessionId, env: ulRetryEnv };
1309
+ const rq = query({
1310
+ prompt: ulRetryGen,
1311
+ options: { ...retryOpts, ...(permissionHooks && { hooks: permissionHooks }), canUseTool } as any,
1312
+ });
1313
+ this.streamingSessions.set(sessionId, { meta, query: rq, controller: ulRetryCtrl, lastUserContent: retryU.lastUserContent, lastUserImages: retryU.lastUserImages });
1314
+ this.activeQueries.set(sessionId, rq);
1315
+ eventSource = rq;
1316
+ continue retryLoop;
1317
+ }
1318
+ // No fresh account left — stop. One clear error, no retry loop.
1319
+ const resetSuffix = usageLimitResetText ? ` Resets ${usageLimitResetText}.` : "";
1320
+ console.warn(`[sdk] session=${sessionId} usage limit — no fresh account available, stopping`);
1321
+ yield { type: "error", message: `All accounts have hit their usage limit.${resetSuffix} Add another account in Settings → Accounts or wait for the reset.` };
1322
+ break;
1323
+ }
1324
+
1243
1325
  // Rate limit — auto-retry with exponential backoff, switching account if possible
1244
1326
  if ((assistantError === "rate_limit" || assistantError === "server_error") && rateLimitRetryCount < MAX_RATE_LIMIT_RETRIES) {
1245
1327
  const backoff = RATE_LIMIT_BACKOFF_MS[rateLimitRetryCount] ?? 60_000;
@@ -228,7 +228,7 @@ export async function startServer(options: {
228
228
 
229
229
  // Load config
230
230
  configService.load();
231
- const port = parseInt(options.port ?? String(configService.get("port")), 10);
231
+ let port = parseInt(options.port ?? String(configService.get("port")), 10);
232
232
  const host = configService.get("host");
233
233
 
234
234
  await setupLogFile();
@@ -336,6 +336,22 @@ export async function startServer(options: {
336
336
  .once("listening", () => tester.close(() => resolve(false)))
337
337
  .listen(port, host);
338
338
  });
339
+
340
+ // On Windows, detect zombie sockets: port held by a dead process after crash.
341
+ // Returns the dead PID if zombie, 0 if the process is alive, -1 if can't determine.
342
+ const findZombiePortHolder = (): number => {
343
+ if (process.platform !== "win32") return -1;
344
+ try {
345
+ const { execSync } = require("node:child_process") as typeof import("node:child_process");
346
+ const out = execSync(`netstat -ano | findstr "0.0.0.0:${port}.*LISTENING"`, { encoding: "utf-8", timeout: 5000 });
347
+ const match = out.trim().match(/LISTENING\s+(\d+)/);
348
+ if (!match?.[1]) return -1;
349
+ const ownerPid = parseInt(match[1], 10);
350
+ // Check if the process is alive
351
+ try { process.kill(ownerPid, 0); return 0; } catch { return ownerPid; }
352
+ } catch { return -1; }
353
+ };
354
+
339
355
  let portInUse = await checkPort();
340
356
  if (portInUse) {
341
357
  // Retry — port may still be releasing after supervisor self-replace
@@ -346,9 +362,33 @@ export async function startServer(options: {
346
362
  if (!portInUse) break;
347
363
  }
348
364
  if (portInUse) {
349
- console.error(`\n ✗ Port ${port} is already in use.`);
350
- console.error(` Run 'ppm stop' first or use a different port with --port.\n`);
351
- process.exit(1);
365
+ const zombiePid = findZombiePortHolder();
366
+ if (zombiePid > 0) {
367
+ // Zombie socket from a dead process — Windows won't release it.
368
+ // Auto-find a free port nearby so the user isn't stuck.
369
+ console.warn(` ⚠ Port ${port} held by dead process (PID: ${zombiePid}) — zombie socket.`);
370
+ const origPort = port;
371
+ for (let candidate = port + 1; candidate <= port + 20; candidate++) {
372
+ const candidateInUse = await new Promise<boolean>((resolve) => {
373
+ const net = require("node:net") as typeof import("node:net");
374
+ const tester = net.createServer()
375
+ .once("error", (err: NodeJS.ErrnoException) => resolve(err.code === "EADDRINUSE"))
376
+ .once("listening", () => tester.close(() => resolve(false)))
377
+ .listen(candidate, host);
378
+ });
379
+ if (!candidateInUse) { port = candidate; break; }
380
+ }
381
+ if (port === origPort) {
382
+ console.error(`\n ✗ Port ${port} is blocked by a zombie socket and no nearby port is free.`);
383
+ console.error(` Run PowerShell as Admin: netsh int tcp reset (then restart)\n`);
384
+ process.exit(1);
385
+ }
386
+ console.warn(` Auto-selected port ${port} instead.`);
387
+ } else {
388
+ console.error(`\n ✗ Port ${port} is already in use.`);
389
+ console.error(` Run 'ppm stop' first or use a different port with --port.\n`);
390
+ process.exit(1);
391
+ }
352
392
  }
353
393
  }
354
394
 
@@ -585,8 +625,58 @@ if (process.argv.includes("__serve__")) {
585
625
  }
586
626
  } catch { /* status.json missing or no shareUrl — normal */ }
587
627
 
588
- Bun.serve({
589
- port,
628
+ // Auto-cleanup old proxy request logs (30-day retention): on startup + daily
629
+ {
630
+ const { cleanupOldProxyRequests } = await import("../services/db.service.ts");
631
+ const deleted = cleanupOldProxyRequests(30);
632
+ if (deleted > 0) console.log(`[proxy] cleaned up ${deleted} proxy request logs older than 30 days`);
633
+ setInterval(() => cleanupOldProxyRequests(30), 24 * 60 * 60 * 1000);
634
+ }
635
+
636
+ // On Windows, check for zombie sockets before binding.
637
+ // After an upgrade, the old server's socket can stay in LISTENING state
638
+ // because SIGTERM maps to TerminateProcess (graceful handler never fires).
639
+ let actualPort = port;
640
+ if (process.platform === "win32") {
641
+ const portInUse = await new Promise<boolean>((resolve) => {
642
+ const net = require("node:net") as typeof import("node:net");
643
+ const tester = net.createServer()
644
+ .once("error", (e: NodeJS.ErrnoException) => resolve(e.code === "EADDRINUSE"))
645
+ .once("listening", () => tester.close(() => resolve(false)))
646
+ .listen(port, host);
647
+ });
648
+ if (portInUse) {
649
+ try {
650
+ const { execSync } = require("node:child_process") as typeof import("node:child_process");
651
+ const out = execSync(`netstat -ano | findstr "0.0.0.0:${port}.*LISTENING"`, { encoding: "utf-8", timeout: 5000 });
652
+ const match = out.trim().match(/LISTENING\s+(\d+)/);
653
+ if (match?.[1]) {
654
+ const ownerPid = parseInt(match[1], 10);
655
+ let isZombie = false;
656
+ try { process.kill(ownerPid, 0); } catch { isZombie = true; }
657
+ if (isZombie) {
658
+ console.warn(`[serve] Port ${port} held by dead process (PID: ${ownerPid}) — zombie socket`);
659
+ for (let candidate = port + 1; candidate <= port + 20; candidate++) {
660
+ const busy = await new Promise<boolean>((resolve) => {
661
+ const net = require("node:net") as typeof import("node:net");
662
+ const tester = net.createServer()
663
+ .once("error", (e: NodeJS.ErrnoException) => resolve(e.code === "EADDRINUSE"))
664
+ .once("listening", () => tester.close(() => resolve(false)))
665
+ .listen(candidate, host);
666
+ });
667
+ if (!busy) { actualPort = candidate; break; }
668
+ }
669
+ if (actualPort !== port) {
670
+ console.warn(`[serve] Auto-selected port ${actualPort} instead`);
671
+ }
672
+ }
673
+ }
674
+ } catch {}
675
+ }
676
+ }
677
+
678
+ const server = Bun.serve({
679
+ port: actualPort,
590
680
  hostname: host,
591
681
  fetch(req, server) {
592
682
  const url = new URL(req.url);
@@ -687,10 +777,47 @@ if (process.argv.includes("__serve__")) {
687
777
  jiraWatcherService.startAll().catch((e) => {
688
778
  console.error("[jira] Failed to start watchers:", (e as Error).message);
689
779
  });
690
- process.on("SIGTERM", () => jiraWatcherService.stopAll());
691
- process.on("SIGINT", () => jiraWatcherService.stopAll());
692
780
  })
693
781
  .catch(() => {});
694
782
 
695
- console.log(`Server child ready on port ${port}`);
783
+ // If we auto-selected a different port, update status.json so supervisor
784
+ // health checks and tunnel proxy point at the correct port.
785
+ if (actualPort !== port) {
786
+ try {
787
+ const { resolve: r } = await import("node:path");
788
+ const { readFileSync: rf, writeFileSync: wf, renameSync: rn } = await import("node:fs");
789
+ const { getPpmDir: gd } = await import("../services/ppm-dir.ts");
790
+ const sf = r(gd(), "status.json");
791
+ const st = JSON.parse(rf(sf, "utf-8"));
792
+ st.port = actualPort;
793
+ const tmp = sf + ".tmp." + process.pid;
794
+ wf(tmp, JSON.stringify(st));
795
+ rn(tmp, sf);
796
+ } catch {}
797
+ }
798
+
799
+ // Graceful shutdown: close the listening socket so the port is released
800
+ const gracefulShutdown = () => {
801
+ try { server.stop(true); } catch {}
802
+ process.exit(0);
803
+ };
804
+ process.on("SIGTERM", gracefulShutdown);
805
+ process.on("SIGINT", gracefulShutdown);
806
+
807
+ // On Windows, SIGTERM maps to TerminateProcess — graceful handlers never fire.
808
+ // Poll for a shutdown file written by the supervisor instead.
809
+ if (process.platform === "win32") {
810
+ const { getPpmDir: gd } = await import("../services/ppm-dir.ts");
811
+ const { resolve: r } = await import("node:path");
812
+ const { existsSync: ex, unlinkSync: ul } = await import("node:fs");
813
+ const shutdownFile = r(gd(), ".server-shutdown");
814
+ setInterval(() => {
815
+ if (ex(shutdownFile)) {
816
+ try { ul(shutdownFile); } catch {}
817
+ gracefulShutdown();
818
+ }
819
+ }, 200);
820
+ }
821
+
822
+ console.log(`Server child ready on port ${actualPort}`);
696
823
  }
@@ -1,5 +1,7 @@
1
1
  import { Hono } from "hono";
2
+ import type { Context } from "hono";
2
3
  import { proxyService } from "../../services/proxy.service.ts";
4
+ import { getProxyStats } from "../../services/db.service.ts";
3
5
  import { ok, err } from "../../types/api.ts";
4
6
 
5
7
  /**
@@ -22,6 +24,16 @@ function validateProxyAuth(authHeader: string | undefined): boolean {
22
24
  return token === key;
23
25
  }
24
26
 
27
+ /** Extract caller IP/UA from request headers for proxy logging */
28
+ function getCallerMeta(c: Context): { callerIp?: string; callerUa?: string } {
29
+ return {
30
+ callerIp: c.req.header("x-forwarded-for")?.split(",")[0]?.trim()
31
+ || c.req.header("x-real-ip")
32
+ || "unknown",
33
+ callerUa: c.req.header("user-agent") || "unknown",
34
+ };
35
+ }
36
+
25
37
  /** CORS preflight for external tools */
26
38
  proxyRoutes.options("/*", (c) => {
27
39
  return new Response(null, {
@@ -54,7 +66,7 @@ proxyRoutes.post("/v1/messages", async (c) => {
54
66
  if (val) headers[key] = val;
55
67
  }
56
68
 
57
- return proxyService.forward("/v1/messages", "POST", headers, body);
69
+ return proxyService.forward("/v1/messages", "POST", headers, body, getCallerMeta(c));
58
70
  });
59
71
 
60
72
  /** POST /proxy/v1/chat/completions — OpenAI-compatible chat completions proxy */
@@ -69,7 +81,7 @@ proxyRoutes.post("/v1/chat/completions", async (c) => {
69
81
  }
70
82
 
71
83
  const body = await c.req.text();
72
- return proxyService.forwardOpenAi(body);
84
+ return proxyService.forwardOpenAi(body, getCallerMeta(c));
73
85
  });
74
86
 
75
87
  /** POST /proxy/v1/messages/count_tokens — token counting proxy */
@@ -90,5 +102,15 @@ proxyRoutes.post("/v1/messages/count_tokens", async (c) => {
90
102
  if (val) headers[key] = val;
91
103
  }
92
104
 
93
- return proxyService.forward("/v1/messages/count_tokens", "POST", headers, body);
105
+ return proxyService.forward("/v1/messages/count_tokens", "POST", headers, body, getCallerMeta(c));
106
+ });
107
+
108
+ /** GET /proxy/stats — proxy request stats (behind proxy auth) */
109
+ proxyRoutes.get("/stats", (c) => {
110
+ const authHeader = c.req.header("authorization") || c.req.header("x-api-key");
111
+ if (!validateProxyAuth(authHeader)) {
112
+ return c.json({ error: "Invalid proxy auth key" }, 401);
113
+ }
114
+ const stats = getProxyStats();
115
+ return c.json({ ...stats, requestCount: proxyService.getRequestCount() });
94
116
  });
@@ -44,11 +44,12 @@ upgradeRoutes.post("/apply", async (c) => {
44
44
  // Signal supervisor to self-replace
45
45
  const signal = signalSupervisorUpgrade();
46
46
  if (!signal.sent) {
47
+ console.warn(`[upgrade] Supervisor signal failed: ${signal.error ?? "unknown"}`);
47
48
  return c.json(ok({
48
49
  success: true,
49
50
  newVersion: result.newVersion,
50
51
  restart: false,
51
- message: "Upgraded. Restart manually with ppm restart",
52
+ message: `Upgraded to v${result.newVersion}. Restart manually: ppm restart (signal failed: ${signal.error ?? "unknown"})`,
52
53
  }));
53
54
  }
54
55
 
@@ -188,6 +188,18 @@ class AccountSelectorService {
188
188
  console.log(`[accounts] ${accountId} rate limited — cooldown ${Math.round(backoffMs / 1000)}s (retry #${retries})`);
189
189
  }
190
190
 
191
+ /** Called when account hits a hard usage/session limit (5h/weekly cap).
192
+ * Cooldown until the real reset time (or ~1h fallback). Does NOT bump retryCounts —
193
+ * this is a quota ceiling, not a transient failure, so it carries no escalating penalty. */
194
+ onUsageLimit(accountId: string, resetAtMs?: number): void {
195
+ const FALLBACK_MS = 60 * 60_000; // 1 hour
196
+ const cooldownUntilMs =
197
+ resetAtMs && resetAtMs > Date.now() ? resetAtMs : Date.now() + FALLBACK_MS;
198
+ accountService.setCooldown(accountId, cooldownUntilMs);
199
+ const mins = Math.round((cooldownUntilMs - Date.now()) / 60_000);
200
+ console.log(`[accounts] ${accountId} usage limit — cooldown ${mins}m (until reset)`);
201
+ }
202
+
191
203
  /** Called when auth error (401 / authentication_failed) — cooldown with longer backoff */
192
204
  onAuthError(accountId: string): void {
193
205
  const retries = (this.retryCounts.get(accountId) ?? 0) + 1;
@@ -3,7 +3,7 @@ import { resolve } from "node:path";
3
3
  import { mkdirSync, existsSync } from "node:fs";
4
4
  import { encrypt, decrypt } from "../lib/account-crypto.ts";
5
5
  import { getPpmDir } from "./ppm-dir.ts";
6
- const CURRENT_SCHEMA_VERSION = 26;
6
+ const CURRENT_SCHEMA_VERSION = 28;
7
7
 
8
8
  let db: Database | null = null;
9
9
  let dbProfile: string | null = null;
@@ -650,6 +650,26 @@ function runMigrations(database: Database): void {
650
650
  try { database.exec("ALTER TABLE session_metadata ADD COLUMN model TEXT"); } catch {}
651
651
  database.exec("PRAGMA user_version = 27;");
652
652
  }
653
+
654
+ if (current < 28) {
655
+ database.exec(`
656
+ CREATE TABLE IF NOT EXISTS proxy_requests (
657
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
658
+ endpoint TEXT NOT NULL,
659
+ model TEXT,
660
+ account_id TEXT,
661
+ account_label TEXT,
662
+ caller_ip TEXT,
663
+ caller_ua TEXT,
664
+ status TEXT NOT NULL,
665
+ duration_ms INTEGER,
666
+ created_at TEXT DEFAULT (datetime('now'))
667
+ );
668
+ CREATE INDEX IF NOT EXISTS idx_proxy_req_created ON proxy_requests(created_at);
669
+ CREATE INDEX IF NOT EXISTS idx_proxy_req_caller ON proxy_requests(caller_ip);
670
+ PRAGMA user_version = 28;
671
+ `);
672
+ }
653
673
  }
654
674
 
655
675
  // ---------------------------------------------------------------------------
@@ -982,6 +1002,68 @@ export function getUsageSince(since: string): UsageRow[] {
982
1002
  ).all(since) as UsageRow[];
983
1003
  }
984
1004
 
1005
+ // ---------------------------------------------------------------------------
1006
+ // Proxy request logging helpers
1007
+ // ---------------------------------------------------------------------------
1008
+
1009
+ export type ProxyRequestStatus = "success" | "error" | "rate_limited";
1010
+
1011
+ // Best-effort: a logging failure must never break the proxy request flow.
1012
+ export function insertProxyRequest(record: {
1013
+ endpoint: string;
1014
+ model?: string;
1015
+ accountId?: string;
1016
+ accountLabel?: string;
1017
+ callerIp?: string;
1018
+ callerUa?: string;
1019
+ status: ProxyRequestStatus;
1020
+ durationMs?: number;
1021
+ }): void {
1022
+ try {
1023
+ getDb().query(
1024
+ "INSERT INTO proxy_requests (endpoint, model, account_id, account_label, caller_ip, caller_ua, status, duration_ms) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
1025
+ ).run(
1026
+ record.endpoint, record.model ?? null, record.accountId ?? null,
1027
+ record.accountLabel ?? null, record.callerIp ?? null, record.callerUa ?? null,
1028
+ record.status, record.durationMs ?? null,
1029
+ );
1030
+ } catch (e) {
1031
+ console.error(`[proxy] failed to log proxy request:`, (e as Error).message);
1032
+ }
1033
+ }
1034
+
1035
+ export function cleanupOldProxyRequests(days = 30): number {
1036
+ const cutoff = new Date(Date.now() - days * 86_400_000).toISOString();
1037
+ const result = getDb().run(
1038
+ "DELETE FROM proxy_requests WHERE created_at < ?",
1039
+ [cutoff],
1040
+ );
1041
+ return result.changes;
1042
+ }
1043
+
1044
+ export interface ProxyStatsBucket {
1045
+ model: string | null;
1046
+ account_label: string | null;
1047
+ caller_ip: string | null;
1048
+ count: number;
1049
+ }
1050
+
1051
+ export function getProxyStats(): { lastHour: ProxyStatsBucket[]; last24h: ProxyStatsBucket[]; total: number } {
1052
+ const lastHour = getDb().query(
1053
+ "SELECT model, account_label, caller_ip, COUNT(*) as count FROM proxy_requests WHERE created_at >= datetime('now', '-1 hour') GROUP BY model, account_label, caller_ip ORDER BY count DESC",
1054
+ ).all() as ProxyStatsBucket[];
1055
+
1056
+ const last24h = getDb().query(
1057
+ "SELECT model, account_label, caller_ip, COUNT(*) as count FROM proxy_requests WHERE created_at >= datetime('now', '-24 hours') GROUP BY model, account_label, caller_ip ORDER BY count DESC",
1058
+ ).all() as ProxyStatsBucket[];
1059
+
1060
+ const totalRow = getDb().query(
1061
+ "SELECT COUNT(*) as count FROM proxy_requests",
1062
+ ).get() as { count: number };
1063
+
1064
+ return { lastHour, last24h, total: totalRow.count };
1065
+ }
1066
+
985
1067
  export function getDbFilePath(): string {
986
1068
  return getDbPath();
987
1069
  }