@hienlh/ppm 0.9.52 → 0.9.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/docs/project-changelog.md +31 -1
- package/docs/project-roadmap.md +7 -6
- package/docs/system-architecture.md +72 -4
- package/package.json +1 -1
- package/src/cli/commands/restart.ts +29 -0
- package/src/cli/commands/stop.ts +67 -6
- package/src/index.ts +10 -1
- package/src/server/index.ts +131 -19
- package/src/services/autostart-generator.ts +8 -6
- package/src/services/supervisor-state.ts +100 -0
- package/src/services/supervisor-stopped-page.ts +73 -0
- package/src/services/supervisor.ts +144 -50
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.53] - 2026-04-07
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- **Supervisor Always Alive**: `ppm stop` now does a soft stop — kills server only, supervisor stays alive with Cloud WS + tunnel. Use `ppm stop --kill` or `ppm down` for full shutdown.
|
|
7
|
+
- **`ppm down` command**: Alias for `ppm stop --kill` (full shutdown).
|
|
8
|
+
- **`ppm stop --kill` flag**: Full shutdown that kills supervisor + server + tunnel.
|
|
9
|
+
- **Stopped page**: When server is stopped, tunnel URL serves a minimal HTML status page + 503 on `/api/health`.
|
|
10
|
+
- **Supervisor detection**: `ppm start` detects existing supervisor and resumes/upgrades instead of spawning a new one.
|
|
11
|
+
- **Cloud WS commands**: `start` (resume from stopped), `shutdown` (full kill), `stop` (now soft stop).
|
|
12
|
+
- **Exception handlers**: Supervisor catches `uncaughtException`/`unhandledRejection` — never crashes.
|
|
13
|
+
- **Lockfile**: Prevents concurrent `ppm start` races (`~/.ppm/.start-lock`).
|
|
14
|
+
- **Windows command file polling**: Supervisor polls command file every 1s on Windows (no SIGUSR2).
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
- **BREAKING**: `ppm stop` default behavior changed from full shutdown to soft stop.
|
|
18
|
+
- **Autostart**: Generates `__supervise__` instead of `__serve__`. Existing users must run `ppm autostart disable && ppm autostart enable` to regenerate.
|
|
19
|
+
- **Supervisor modularized**: Split into `supervisor.ts` (orchestrator), `supervisor-state.ts` (state machine + IPC), `supervisor-stopped-page.ts` (stopped HTML server).
|
|
20
|
+
|
|
3
21
|
## [0.9.52] - 2026-04-07
|
|
4
22
|
|
|
5
23
|
### Added
|
|
@@ -2,7 +2,37 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to PPM are documented here. Format follows [Keep a Changelog](https://keepachangelog.com/).
|
|
4
4
|
|
|
5
|
-
**Current Version:** v0.9.
|
|
5
|
+
**Current Version:** v0.9.10
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## [0.9.11] — 2026-04-07
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **Supervisor Always Alive Feature** — Distinguish between soft stop (server shutdown) and full shutdown (supervisor shutdown)
|
|
13
|
+
- `ppm stop` now performs SOFT STOP: kills server only, supervisor remains alive with Cloud WS + tunnel connectivity
|
|
14
|
+
- `ppm stop --kill` or `ppm down` performs FULL SHUTDOWN: kills everything (old `ppm stop` behavior)
|
|
15
|
+
- Supervisor now has new `stopped` state (in addition to running, paused, upgrading)
|
|
16
|
+
- When stopped, minimal HTML page served on the port (503 status on /api/health)
|
|
17
|
+
- `ppm start` detects existing supervisor and handles resume/upgrade scenarios
|
|
18
|
+
- Autostart now uses `__supervise__` instead of `__serve__` for consistency
|
|
19
|
+
- Cloud WS has new commands: `start`, `shutdown` (stop is now soft stop, separate from shutdown)
|
|
20
|
+
- Supervisor has uncaughtException/unhandledRejection handlers (never crashes)
|
|
21
|
+
- Supervisor logic modularized into 3 files: supervisor.ts (orchestrator), supervisor-state.ts (state machine), supervisor-stopped-page.ts (503 page)
|
|
22
|
+
|
|
23
|
+
### Technical Details
|
|
24
|
+
- **Files Created:**
|
|
25
|
+
- `src/services/supervisor-state.ts` — State machine, IPC command file handling
|
|
26
|
+
- `src/services/supervisor-stopped-page.ts` — Minimal 503 HTML response
|
|
27
|
+
- Enhanced `src/services/supervisor.ts` — Orchestrator with stopped state support
|
|
28
|
+
- **Files Modified:**
|
|
29
|
+
- `src/cli/commands/stop.ts` — Added --kill flag, soft stop default, ppm down alias
|
|
30
|
+
- `src/cli/commands/start.ts` — Resume detection for existing supervisor
|
|
31
|
+
- `src/cli/autostart-generator.ts` — Uses __supervise__ entry point
|
|
32
|
+
- Cloud WS endpoints updated with new commands
|
|
33
|
+
- **Type Changes:** SupervisorState = "running" | "paused" | "stopped" | "upgrading"
|
|
34
|
+
- **API Changes:** GET /api/health returns 503 when server stopped (supervisor still running)
|
|
35
|
+
- **Breaking Changes:** None (backward compatible, graceful fallback)
|
|
6
36
|
|
|
7
37
|
---
|
|
8
38
|
|
package/docs/project-roadmap.md
CHANGED
|
@@ -38,12 +38,13 @@ PPM is the **lightest path from phone to code** — a self-hosted, BYOK, multi-d
|
|
|
38
38
|
|
|
39
39
|
**Theme:** Multi-device access + AI chat improvements. Solve the "I can't reach my PPM from my phone" problem.
|
|
40
40
|
|
|
41
|
-
| Feature | Priority | Description |
|
|
42
|
-
|
|
43
|
-
| **PPM Cloud** | Critical | Separate cloud service for device registry + tunnel URL sync. Google OAuth login. CLI `ppm cloud link` syncs tunnel URL. Open cloud dashboard on any device → see machines → tap to connect. NO code/data through cloud — only URLs + metadata. |
|
|
44
|
-
| **Auto-start** | High | PPM starts on boot. macOS launchd, Linux systemd, Windows Task Scheduler. CLI: `ppm autostart enable/disable`. Required for "always accessible" story. |
|
|
45
|
-
| **Auto-upgrade** | High | Supervisor checks npm registry every 15min. UI banner shows when update available. One-click upgrade via API or CLI. Supervisor self-replaces after install (no OS autostart dependency).
|
|
46
|
-
| **
|
|
41
|
+
| Feature | Priority | Status | Description |
|
|
42
|
+
|---------|----------|--------|-------------|
|
|
43
|
+
| **PPM Cloud** | Critical | — | Separate cloud service for device registry + tunnel URL sync. Google OAuth login. CLI `ppm cloud link` syncs tunnel URL. Open cloud dashboard on any device → see machines → tap to connect. NO code/data through cloud — only URLs + metadata. |
|
|
44
|
+
| **Auto-start** | High | — | PPM starts on boot. macOS launchd, Linux systemd, Windows Task Scheduler. CLI: `ppm autostart enable/disable`. Required for "always accessible" story. |
|
|
45
|
+
| **Auto-upgrade** | High | ✅ Done | Supervisor checks npm registry every 15min. UI banner shows when update available. One-click upgrade via API or CLI. Supervisor self-replaces after install (no OS autostart dependency). **Completed in v0.8.54** |
|
|
46
|
+
| **Supervisor Always Alive** | High | ✅ Done | Soft stop (server shutdown, supervisor stays) vs full shutdown. New `stopped` state. Cloud WS + tunnel stay active when stopped. `ppm start` resumes without supervisor restart. Modularized: supervisor.ts, supervisor-state.ts, supervisor-stopped-page.ts. **Completed in v0.9.11** |
|
|
47
|
+
| **AI Chat enhancements** | High | — | Tool allow/deny config per session. Chat modes (plan/code/ask). Model selector (opus/sonnet/haiku). Effort level. Max turns. System prompt customization. Better streaming UX (collapsible tool calls). |
|
|
47
48
|
|
|
48
49
|
**PPM Cloud — scope guard:**
|
|
49
50
|
- Cloud is OPTIONAL convenience, never a dependency. PPM works 100% without it.
|
|
@@ -1628,13 +1628,81 @@ $ ppm upgrade
|
|
|
1628
1628
|
→ Works in headless environments (no OS autostart dependency)
|
|
1629
1629
|
|
|
1630
1630
|
$ ppm stop
|
|
1631
|
-
→
|
|
1632
|
-
→
|
|
1633
|
-
→
|
|
1631
|
+
→ SOFT STOP: kills server only, supervisor stays alive with Cloud WS + tunnel
|
|
1632
|
+
→ Supervisor transitions to "stopped" state
|
|
1633
|
+
→ Minimal HTML page served on port (503 status on /api/health)
|
|
1634
|
+
→ Tunnel and Cloud connectivity remain active
|
|
1635
|
+
→ `ppm start` resumes without restarting supervisor process
|
|
1636
|
+
|
|
1637
|
+
$ ppm stop --kill OR ppm down
|
|
1638
|
+
→ FULL SHUTDOWN: kills everything (supervisor + server + tunnel)
|
|
1639
|
+
→ Supervisor transitions to "upgrading" then terminates
|
|
1634
1640
|
→ Cleans up status.json and ppm.pid
|
|
1635
|
-
→ Graceful
|
|
1641
|
+
→ Graceful cleanup (close WS, cleanup PTY, stop tunnel)
|
|
1636
1642
|
```
|
|
1637
1643
|
|
|
1644
|
+
### Supervisor Architecture (v0.9.11+)
|
|
1645
|
+
|
|
1646
|
+
The supervisor is a long-lived parent process that manages server + tunnel children with resilience and state management.
|
|
1647
|
+
|
|
1648
|
+
**Architecture:**
|
|
1649
|
+
```
|
|
1650
|
+
Supervisor Process (parent)
|
|
1651
|
+
├── Server Child (Hono HTTP server)
|
|
1652
|
+
│ ├── Health checks every 30s (/api/health)
|
|
1653
|
+
│ ├── Auto-restart on crash (exponential backoff, max 10 restarts)
|
|
1654
|
+
│ └── If in "stopped" state, serves minimal 503 page instead of restarting
|
|
1655
|
+
│
|
|
1656
|
+
├── Tunnel Child (Cloudflare Quick Tunnel, if --share)
|
|
1657
|
+
│ ├── URL probe every 2min
|
|
1658
|
+
│ ├── Auto-reconnect on failure
|
|
1659
|
+
│ └── URL persisted to status.json
|
|
1660
|
+
│
|
|
1661
|
+
├── State Machine: "running" | "paused" | "stopped" | "upgrading"
|
|
1662
|
+
│ ├── running — Server spawned, tunnel optional, serving requests
|
|
1663
|
+
│ ├── paused — Supervisor paused (resume via signal)
|
|
1664
|
+
│ ├── stopped — Server stopped (soft stop), tunnel alive, Cloud WS active
|
|
1665
|
+
│ └── upgrading — Self-replace in progress
|
|
1666
|
+
│
|
|
1667
|
+
├── Upgrade Check (every 15min)
|
|
1668
|
+
│ └── npm registry poll → availableVersion written to status.json
|
|
1669
|
+
│
|
|
1670
|
+
├── Stopped Page Server
|
|
1671
|
+
│ ├── Lightweight HTTP handler on same port as server
|
|
1672
|
+
│ ├── Returns 503 on /api/health
|
|
1673
|
+
│ └── Tunnels Cloud WS calls through to PPM Cloud
|
|
1674
|
+
│
|
|
1675
|
+
└── Error Resilience
|
|
1676
|
+
├── uncaughtException → log + exit gracefully
|
|
1677
|
+
├── unhandledRejection → log + continue
|
|
1678
|
+
└── Signal handlers: SIGTERM (full shutdown), SIGUSR1 (self-replace), SIGUSR2 (restart skip backoff)
|
|
1679
|
+
```
|
|
1680
|
+
|
|
1681
|
+
**Soft Stop vs Full Shutdown:**
|
|
1682
|
+
| Command | Server | Supervisor | Tunnel | Use Case |
|
|
1683
|
+
|---------|--------|------------|--------|----------|
|
|
1684
|
+
| `ppm stop` | Killed | Stays alive | Stays alive | Restart later with `ppm start` |
|
|
1685
|
+
| `ppm stop --kill` | Killed | Killed | Killed | Full cleanup, exit |
|
|
1686
|
+
| `ppm down` | Killed | Killed | Killed | Full cleanup, exit |
|
|
1687
|
+
|
|
1688
|
+
**State Persistence:**
|
|
1689
|
+
- Status file: `~/.ppm/status.json` — PID, port, host, shareUrl, supervisorPid, availableVersion, state
|
|
1690
|
+
- Lock file: `~/.ppm/.start-lock` — Prevent concurrent starts
|
|
1691
|
+
- Command file: `~/.ppm/.supervisor-cmd` — IPC for soft_stop, resume, self_replace
|
|
1692
|
+
|
|
1693
|
+
**Stopped Page Implementation:**
|
|
1694
|
+
- Minimal HTTP server on same port as main server
|
|
1695
|
+
- Serves `503 Service Unavailable` on /api/health
|
|
1696
|
+
- Proxies Cloud WS calls to PPM Cloud (if tunnel configured)
|
|
1697
|
+
- Allows `ppm start` to resume without supervisor restart
|
|
1698
|
+
|
|
1699
|
+
**Files (Modular Design):**
|
|
1700
|
+
- `src/services/supervisor.ts` — Main orchestrator (spawn, health checks, upgrade checks)
|
|
1701
|
+
- `src/services/supervisor-state.ts` — State machine, IPC command handling, signal routing
|
|
1702
|
+
- `src/services/supervisor-stopped-page.ts` — Minimal 503 page + Cloud WS proxy
|
|
1703
|
+
|
|
1704
|
+
---
|
|
1705
|
+
|
|
1638
1706
|
### Future: Multi-Machine (Not in v2)
|
|
1639
1707
|
Would require:
|
|
1640
1708
|
- Central state server (Redis/Postgres)
|
package/package.json
CHANGED
|
@@ -42,6 +42,35 @@ export async function restartServer(options: { config?: string; force?: boolean
|
|
|
42
42
|
process.exit(1);
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
+
// Stopped state: treat restart as resume (send resume command)
|
|
46
|
+
if (state === "stopped") {
|
|
47
|
+
console.log("\n Server is stopped. Resuming via supervisor...\n");
|
|
48
|
+
const cmdFile = resolve(PPM_DIR, ".supervisor-cmd");
|
|
49
|
+
writeFileSync(cmdFile, JSON.stringify({ action: "resume" }));
|
|
50
|
+
// Signal supervisor (Windows: polling picks up command file)
|
|
51
|
+
if (process.platform !== "win32") {
|
|
52
|
+
try { process.kill(supervisorPid, "SIGUSR2"); } catch (e) {
|
|
53
|
+
console.error(` ✗ Failed to signal supervisor: ${e}`);
|
|
54
|
+
process.exit(1);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// Wait for state to change back to running
|
|
58
|
+
const rStart = Date.now();
|
|
59
|
+
while (Date.now() - rStart < 15_000) {
|
|
60
|
+
await Bun.sleep(500);
|
|
61
|
+
try {
|
|
62
|
+
const newStatus = JSON.parse(readFileSync(STATUS_FILE, "utf-8"));
|
|
63
|
+
if (newStatus.state === "running" && newStatus.pid) {
|
|
64
|
+
console.log(` ✓ Server resumed (PID: ${newStatus.pid})`);
|
|
65
|
+
if (newStatus.shareUrl) console.log(` ➜ Share: ${newStatus.shareUrl}`);
|
|
66
|
+
process.exit(0);
|
|
67
|
+
}
|
|
68
|
+
} catch {}
|
|
69
|
+
}
|
|
70
|
+
console.error(" ⚠ Resume timed out. Check: ppm logs");
|
|
71
|
+
process.exit(1);
|
|
72
|
+
}
|
|
73
|
+
|
|
45
74
|
const oldServerPid = status.pid as number | undefined;
|
|
46
75
|
console.log("\n Restarting PPM server via supervisor...");
|
|
47
76
|
console.log(" If you're using PPM terminal, wait a few seconds for auto-reconnect.\n");
|
package/src/cli/commands/stop.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { resolve } from "node:path";
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
|
-
import { readFileSync, unlinkSync, existsSync } from "node:fs";
|
|
3
|
+
import { readFileSync, writeFileSync, unlinkSync, existsSync } from "node:fs";
|
|
4
4
|
|
|
5
5
|
const PPM_DIR = process.env.PPM_HOME || resolve(homedir(), ".ppm");
|
|
6
6
|
const PID_FILE = resolve(PPM_DIR, "ppm.pid");
|
|
7
7
|
const STATUS_FILE = resolve(PPM_DIR, "status.json");
|
|
8
|
+
const CMD_FILE = resolve(PPM_DIR, ".supervisor-cmd");
|
|
8
9
|
|
|
9
10
|
function killPid(pid: number, label: string): boolean {
|
|
10
11
|
try {
|
|
@@ -51,7 +52,7 @@ function killAllByName(name: string): number {
|
|
|
51
52
|
return killed;
|
|
52
53
|
}
|
|
53
54
|
|
|
54
|
-
export async function stopServer(options?: { all?: boolean }) {
|
|
55
|
+
export async function stopServer(options?: { all?: boolean; kill?: boolean }) {
|
|
55
56
|
if (options?.all) {
|
|
56
57
|
console.log(" Stopping all PPM and cloudflared processes...\n");
|
|
57
58
|
const cfKilled = killAllByName("cloudflared");
|
|
@@ -76,14 +77,76 @@ export async function stopServer(options?: { all?: boolean }) {
|
|
|
76
77
|
return;
|
|
77
78
|
}
|
|
78
79
|
|
|
80
|
+
// Full shutdown: --kill flag or `ppm down`
|
|
81
|
+
if (options?.kill) {
|
|
82
|
+
return hardStop();
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Default: soft stop — kill server only, supervisor stays alive
|
|
86
|
+
return softStopCmd();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Soft stop: write command file + signal supervisor → kills server only */
|
|
90
|
+
async function softStopCmd() {
|
|
91
|
+
let status: Record<string, unknown> | null = null;
|
|
92
|
+
if (existsSync(STATUS_FILE)) {
|
|
93
|
+
try { status = JSON.parse(readFileSync(STATUS_FILE, "utf-8")); } catch {}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const supervisorPid = (status?.supervisorPid as number) ?? null;
|
|
97
|
+
|
|
98
|
+
if (!supervisorPid) {
|
|
99
|
+
// No supervisor — fall back to hard stop (legacy)
|
|
100
|
+
return hardStop();
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Check if supervisor is alive
|
|
104
|
+
try { process.kill(supervisorPid, 0); } catch {
|
|
105
|
+
console.log("Supervisor not running. Cleaning up.");
|
|
106
|
+
cleanup();
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Already stopped?
|
|
111
|
+
if ((status?.state as string) === "stopped") {
|
|
112
|
+
console.log("PPM server is already stopped. Supervisor still alive.");
|
|
113
|
+
console.log("Use 'ppm stop --kill' or 'ppm down' to fully shut down.");
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Write soft stop command file + signal supervisor (Windows: polling picks it up)
|
|
118
|
+
writeFileSync(CMD_FILE, JSON.stringify({ action: "soft_stop" }));
|
|
119
|
+
if (process.platform !== "win32") {
|
|
120
|
+
try { process.kill(supervisorPid, "SIGUSR2"); } catch (e) {
|
|
121
|
+
console.error(` Failed to signal supervisor: ${e}`);
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Wait for state to change to "stopped" in status.json
|
|
127
|
+
const start = Date.now();
|
|
128
|
+
while (Date.now() - start < 5000) {
|
|
129
|
+
await Bun.sleep(500);
|
|
130
|
+
try {
|
|
131
|
+
const data = JSON.parse(readFileSync(STATUS_FILE, "utf-8"));
|
|
132
|
+
if (data.state === "stopped") {
|
|
133
|
+
console.log("PPM server stopped. Supervisor still alive (Cloud WS + tunnel).");
|
|
134
|
+
console.log("Use 'ppm start' to restart or 'ppm stop --kill' to fully shut down.");
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
} catch {}
|
|
138
|
+
}
|
|
139
|
+
console.log("PPM server stop requested.");
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/** Hard stop: SIGTERM supervisor → everything dies (current behavior) */
|
|
143
|
+
async function hardStop() {
|
|
79
144
|
let status: { pid?: number; tunnelPid?: number; supervisorPid?: number } | null = null;
|
|
80
145
|
|
|
81
|
-
// Read status.json
|
|
82
146
|
if (existsSync(STATUS_FILE)) {
|
|
83
147
|
try { status = JSON.parse(readFileSync(STATUS_FILE, "utf-8")); } catch {}
|
|
84
148
|
}
|
|
85
149
|
|
|
86
|
-
// Fallback to ppm.pid (now stores supervisor PID)
|
|
87
150
|
const pidFromFile = existsSync(PID_FILE)
|
|
88
151
|
? parseInt(readFileSync(PID_FILE, "utf-8").trim(), 10)
|
|
89
152
|
: NaN;
|
|
@@ -102,10 +165,8 @@ export async function stopServer(options?: { all?: boolean }) {
|
|
|
102
165
|
// Kill supervisor first — its SIGTERM handler kills server + tunnel children
|
|
103
166
|
if (supervisorPid) {
|
|
104
167
|
killPid(supervisorPid, "supervisor");
|
|
105
|
-
// Give supervisor 2s to gracefully kill children
|
|
106
168
|
await Bun.sleep(2000);
|
|
107
169
|
} else if (fallbackPid) {
|
|
108
|
-
// Legacy: ppm.pid might be server PID (pre-supervisor) or supervisor PID
|
|
109
170
|
killPid(fallbackPid, "supervisor/server (pidfile)");
|
|
110
171
|
await Bun.sleep(1000);
|
|
111
172
|
}
|
package/src/index.ts
CHANGED
|
@@ -39,13 +39,22 @@ program
|
|
|
39
39
|
|
|
40
40
|
program
|
|
41
41
|
.command("stop")
|
|
42
|
-
.description("Stop the PPM
|
|
42
|
+
.description("Stop the PPM server (supervisor stays alive)")
|
|
43
43
|
.option("-a, --all", "Kill all PPM and cloudflared processes (including untracked)")
|
|
44
|
+
.option("--kill", "Full shutdown (kills supervisor too)")
|
|
44
45
|
.action(async (options) => {
|
|
45
46
|
const { stopServer } = await import("./cli/commands/stop.ts");
|
|
46
47
|
await stopServer(options);
|
|
47
48
|
});
|
|
48
49
|
|
|
50
|
+
program
|
|
51
|
+
.command("down")
|
|
52
|
+
.description("Fully shut down PPM (supervisor + server + tunnel)")
|
|
53
|
+
.action(async () => {
|
|
54
|
+
const { stopServer } = await import("./cli/commands/stop.ts");
|
|
55
|
+
await stopServer({ kill: true });
|
|
56
|
+
});
|
|
57
|
+
|
|
49
58
|
program
|
|
50
59
|
.command("restart")
|
|
51
60
|
.description("Restart the server (keeps tunnel alive)")
|
package/src/server/index.ts
CHANGED
|
@@ -169,6 +169,49 @@ app.route("/api/cloud", cloudRoutes);
|
|
|
169
169
|
// Static files / SPA fallback (non-API routes)
|
|
170
170
|
app.route("/", staticRoutes);
|
|
171
171
|
|
|
172
|
+
// ─── Helpers for supervisor detection ───────────────────────────────────
|
|
173
|
+
async function waitForNewSupervisor(statusFile: string, oldPid: number) {
|
|
174
|
+
const { readFileSync } = await import("node:fs");
|
|
175
|
+
const start = Date.now();
|
|
176
|
+
while (Date.now() - start < 30_000) {
|
|
177
|
+
await Bun.sleep(1000);
|
|
178
|
+
try {
|
|
179
|
+
const data = JSON.parse(readFileSync(statusFile, "utf-8"));
|
|
180
|
+
if (data.supervisorPid && data.supervisorPid !== oldPid && data.state === "running") {
|
|
181
|
+
console.log(` Upgrade complete (new PID: ${data.supervisorPid})`);
|
|
182
|
+
process.exit(0);
|
|
183
|
+
}
|
|
184
|
+
} catch {}
|
|
185
|
+
}
|
|
186
|
+
console.error(" Upgrade timed out (30s). Check: ppm logs");
|
|
187
|
+
process.exit(1);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
async function waitForServerReady(statusFile: string, port: number) {
|
|
191
|
+
const { readFileSync } = await import("node:fs");
|
|
192
|
+
const start = Date.now();
|
|
193
|
+
while (Date.now() - start < 10_000) {
|
|
194
|
+
await Bun.sleep(500);
|
|
195
|
+
try {
|
|
196
|
+
const data = JSON.parse(readFileSync(statusFile, "utf-8"));
|
|
197
|
+
if (data.state === "running" && data.pid) {
|
|
198
|
+
// Verify server is responding
|
|
199
|
+
try {
|
|
200
|
+
const res = await fetch(`http://127.0.0.1:${port}/api/health`, {
|
|
201
|
+
signal: AbortSignal.timeout(2000),
|
|
202
|
+
});
|
|
203
|
+
if (res.ok) {
|
|
204
|
+
console.log(` Server is ready (PID: ${data.pid}).`);
|
|
205
|
+
process.exit(0);
|
|
206
|
+
}
|
|
207
|
+
} catch {}
|
|
208
|
+
}
|
|
209
|
+
} catch {}
|
|
210
|
+
}
|
|
211
|
+
console.log(" Resume signal sent. Check: ppm status");
|
|
212
|
+
process.exit(0);
|
|
213
|
+
}
|
|
214
|
+
|
|
172
215
|
export async function startServer(options: {
|
|
173
216
|
port?: string;
|
|
174
217
|
share?: boolean;
|
|
@@ -189,36 +232,105 @@ export async function startServer(options: {
|
|
|
189
232
|
const { bootstrapProviders } = await import("../providers/registry.ts");
|
|
190
233
|
await bootstrapProviders();
|
|
191
234
|
|
|
192
|
-
// Check if port is already in use before spawning supervisor
|
|
193
|
-
const portInUse = await new Promise<boolean>((resolve) => {
|
|
194
|
-
const net = require("node:net") as typeof import("node:net");
|
|
195
|
-
const tester = net.createServer()
|
|
196
|
-
.once("error", (err: NodeJS.ErrnoException) => {
|
|
197
|
-
resolve(err.code === "EADDRINUSE");
|
|
198
|
-
})
|
|
199
|
-
.once("listening", () => {
|
|
200
|
-
tester.close(() => resolve(false));
|
|
201
|
-
})
|
|
202
|
-
.listen(port, host);
|
|
203
|
-
});
|
|
204
|
-
if (portInUse) {
|
|
205
|
-
console.error(`\n ✗ Port ${port} is already in use.`);
|
|
206
|
-
console.error(` Run 'ppm stop' first or use a different port with --port.\n`);
|
|
207
|
-
process.exit(1);
|
|
208
|
-
}
|
|
209
|
-
|
|
210
235
|
{
|
|
211
236
|
const { resolve } = await import("node:path");
|
|
212
237
|
const { homedir } = await import("node:os");
|
|
213
238
|
const { writeFileSync, readFileSync, mkdirSync, existsSync, openSync } = await import("node:fs");
|
|
214
239
|
const { isCompiledBinary } = await import("../services/autostart-generator.ts");
|
|
240
|
+
const { writeCmd, acquireLock, releaseLock } = await import("../services/supervisor-state.ts");
|
|
215
241
|
|
|
216
242
|
const ppmDir = process.env.PPM_HOME || resolve(homedir(), ".ppm");
|
|
217
243
|
if (!existsSync(ppmDir)) mkdirSync(ppmDir, { recursive: true });
|
|
218
244
|
const pidFile = resolve(ppmDir, "ppm.pid");
|
|
219
245
|
const statusFile = resolve(ppmDir, "status.json");
|
|
220
246
|
|
|
221
|
-
//
|
|
247
|
+
// Prevent concurrent ppm start races
|
|
248
|
+
if (!acquireLock()) {
|
|
249
|
+
console.log("\n Another 'ppm start' is already in progress. Exiting.\n");
|
|
250
|
+
process.exit(1);
|
|
251
|
+
}
|
|
252
|
+
// Release lock on exit (normal or error)
|
|
253
|
+
process.on("exit", releaseLock);
|
|
254
|
+
|
|
255
|
+
// ── Check for existing supervisor ──────────────────────────────────
|
|
256
|
+
if (existsSync(statusFile)) {
|
|
257
|
+
try {
|
|
258
|
+
const status = JSON.parse(readFileSync(statusFile, "utf-8"));
|
|
259
|
+
const supervisorPid = status.supervisorPid as number;
|
|
260
|
+
|
|
261
|
+
if (supervisorPid) {
|
|
262
|
+
try {
|
|
263
|
+
process.kill(supervisorPid, 0); // throws if dead
|
|
264
|
+
|
|
265
|
+
// Supervisor is alive — handle based on state
|
|
266
|
+
const state = status.state as string;
|
|
267
|
+
const runningVersion = status.serverVersion as string;
|
|
268
|
+
|
|
269
|
+
if (state === "stopped") {
|
|
270
|
+
console.log(" Supervisor is alive (stopped state). Resuming server...");
|
|
271
|
+
if (runningVersion !== VERSION) {
|
|
272
|
+
console.log(` Upgrading: ${runningVersion} -> ${VERSION}`);
|
|
273
|
+
process.kill(supervisorPid, "SIGUSR1");
|
|
274
|
+
await waitForNewSupervisor(statusFile, supervisorPid);
|
|
275
|
+
} else {
|
|
276
|
+
writeCmd("resume");
|
|
277
|
+
process.kill(supervisorPid, "SIGUSR2");
|
|
278
|
+
await waitForServerReady(statusFile, port);
|
|
279
|
+
}
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
if (state === "running") {
|
|
284
|
+
if (runningVersion !== VERSION) {
|
|
285
|
+
console.log(` Supervisor running (v${runningVersion}). Upgrading to v${VERSION}...`);
|
|
286
|
+
process.kill(supervisorPid, "SIGUSR1");
|
|
287
|
+
await waitForNewSupervisor(statusFile, supervisorPid);
|
|
288
|
+
} else {
|
|
289
|
+
console.log(`\n PPM is already running (PID: ${supervisorPid}).`);
|
|
290
|
+
console.log(` Use 'ppm restart' to reload or 'ppm stop' first.\n`);
|
|
291
|
+
process.exit(0);
|
|
292
|
+
}
|
|
293
|
+
return;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (state === "paused") {
|
|
297
|
+
console.log(" Supervisor is paused (max restarts). Sending resume...");
|
|
298
|
+
writeCmd("resume");
|
|
299
|
+
process.kill(supervisorPid, "SIGUSR2");
|
|
300
|
+
await waitForServerReady(statusFile, port);
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (state === "upgrading") {
|
|
305
|
+
console.log(" Supervisor is currently upgrading. Please wait...");
|
|
306
|
+
process.exit(0);
|
|
307
|
+
}
|
|
308
|
+
} catch {
|
|
309
|
+
// Supervisor PID is dead, continue with fresh start
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
} catch {}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// ── Check port availability ────────────────────────────────────────
|
|
316
|
+
const portInUse = await new Promise<boolean>((resolve) => {
|
|
317
|
+
const net = require("node:net") as typeof import("node:net");
|
|
318
|
+
const tester = net.createServer()
|
|
319
|
+
.once("error", (err: NodeJS.ErrnoException) => {
|
|
320
|
+
resolve(err.code === "EADDRINUSE");
|
|
321
|
+
})
|
|
322
|
+
.once("listening", () => {
|
|
323
|
+
tester.close(() => resolve(false));
|
|
324
|
+
})
|
|
325
|
+
.listen(port, host);
|
|
326
|
+
});
|
|
327
|
+
if (portInUse) {
|
|
328
|
+
console.error(`\n ✗ Port ${port} is already in use.`);
|
|
329
|
+
console.error(` Run 'ppm stop' first or use a different port with --port.\n`);
|
|
330
|
+
process.exit(1);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Kill any leftover processes from previous run (stale status.json)
|
|
222
334
|
if (existsSync(statusFile)) {
|
|
223
335
|
try {
|
|
224
336
|
const prev = JSON.parse(readFileSync(statusFile, "utf-8"));
|
|
@@ -39,24 +39,26 @@ export function resolveBunPath(): string {
|
|
|
39
39
|
throw new Error("Could not resolve bun binary. Install Bun or add it to PATH.");
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
-
/** Build the command array for the PPM
|
|
42
|
+
/** Build the command array for the PPM supervisor process */
|
|
43
43
|
export function buildExecCommand(config: AutoStartConfig): string[] {
|
|
44
44
|
if (isCompiledBinary()) {
|
|
45
|
-
// Compiled binary: just run self with
|
|
46
|
-
const args = [process.execPath, "
|
|
45
|
+
// Compiled binary: just run self with __supervise__ args
|
|
46
|
+
const args = [process.execPath, "__supervise__", String(config.port), config.host];
|
|
47
47
|
if (config.configPath) args.push(config.configPath);
|
|
48
48
|
if (config.profile) args.push(config.profile);
|
|
49
|
+
if (config.share) args.push("--share");
|
|
49
50
|
return args;
|
|
50
51
|
}
|
|
51
52
|
|
|
52
|
-
// Bun runtime: bun run <script>
|
|
53
|
+
// Bun runtime: bun run <script> __supervise__ <port> <host> [config] [profile]
|
|
53
54
|
const bunPath = resolveBunPath();
|
|
54
|
-
const scriptPath = resolve(import.meta.dir, "
|
|
55
|
-
const args = [bunPath, "run", scriptPath, "
|
|
55
|
+
const scriptPath = resolve(import.meta.dir, "supervisor.ts");
|
|
56
|
+
const args = [bunPath, "run", scriptPath, "__supervise__", String(config.port), config.host];
|
|
56
57
|
if (config.configPath) args.push(config.configPath);
|
|
57
58
|
else args.push(""); // placeholder
|
|
58
59
|
if (config.profile) args.push(config.profile);
|
|
59
60
|
else args.push(""); // placeholder
|
|
61
|
+
if (config.share) args.push("--share");
|
|
60
62
|
return args;
|
|
61
63
|
}
|
|
62
64
|
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Supervisor state machine — state transitions, IPC command file, signal handling.
|
|
3
|
+
* Extracted from supervisor.ts to keep the orchestrator lean.
|
|
4
|
+
*/
|
|
5
|
+
import { resolve } from "node:path";
|
|
6
|
+
import { homedir } from "node:os";
|
|
7
|
+
import {
|
|
8
|
+
readFileSync, writeFileSync, existsSync, unlinkSync, renameSync, openSync, closeSync,
|
|
9
|
+
} from "node:fs";
|
|
10
|
+
import { constants } from "node:fs";
|
|
11
|
+
|
|
12
|
+
const PPM_DIR = resolve(process.env.PPM_HOME || resolve(homedir(), ".ppm"));
|
|
13
|
+
export const CMD_FILE = resolve(PPM_DIR, ".supervisor-cmd");
|
|
14
|
+
export const STATUS_FILE = resolve(PPM_DIR, "status.json");
|
|
15
|
+
export const PID_FILE = resolve(PPM_DIR, "ppm.pid");
|
|
16
|
+
export const LOCK_FILE = resolve(PPM_DIR, ".start-lock");
|
|
17
|
+
|
|
18
|
+
// ─── State ─────────────────────────────────────────────────────────────
|
|
19
|
+
export type SupervisorState = "running" | "paused" | "stopped" | "upgrading";
|
|
20
|
+
|
|
21
|
+
let _state: SupervisorState = "running";
|
|
22
|
+
let _resumeResolve: (() => void) | null = null;
|
|
23
|
+
|
|
24
|
+
export function getState(): SupervisorState { return _state; }
|
|
25
|
+
|
|
26
|
+
export function setState(s: SupervisorState) { _state = s; }
|
|
27
|
+
|
|
28
|
+
export function waitForResume(): Promise<void> {
|
|
29
|
+
return new Promise((res) => { _resumeResolve = res; });
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function triggerResume(): void {
|
|
33
|
+
if (_resumeResolve) {
|
|
34
|
+
_resumeResolve();
|
|
35
|
+
_resumeResolve = null;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ─── Status file helpers ───────────────────────────────────────────────
|
|
40
|
+
export function readStatus(): Record<string, unknown> {
|
|
41
|
+
try {
|
|
42
|
+
if (existsSync(STATUS_FILE)) return JSON.parse(readFileSync(STATUS_FILE, "utf-8"));
|
|
43
|
+
} catch {}
|
|
44
|
+
return {};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function updateStatus(patch: Record<string, unknown>) {
|
|
48
|
+
try {
|
|
49
|
+
const data = { ...readStatus(), ...patch };
|
|
50
|
+
writeFileSync(STATUS_FILE, JSON.stringify(data));
|
|
51
|
+
} catch {}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ─── Command file protocol ─────────────────────────────────────────────
|
|
55
|
+
export type CmdAction = "soft_stop" | "resume";
|
|
56
|
+
|
|
57
|
+
/** Atomically claim + read command file (rename to .claimed, read, delete) */
|
|
58
|
+
export function readAndDeleteCmd(): { action: CmdAction } | null {
|
|
59
|
+
const claimed = CMD_FILE + ".claimed";
|
|
60
|
+
try {
|
|
61
|
+
renameSync(CMD_FILE, claimed); // atomic claim — second caller gets ENOENT
|
|
62
|
+
const cmd = JSON.parse(readFileSync(claimed, "utf-8"));
|
|
63
|
+
unlinkSync(claimed);
|
|
64
|
+
return cmd;
|
|
65
|
+
} catch {
|
|
66
|
+
// No command file or already claimed by another handler
|
|
67
|
+
try { unlinkSync(claimed); } catch {}
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function writeCmd(action: CmdAction) {
|
|
73
|
+
writeFileSync(CMD_FILE, JSON.stringify({ action }));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ─── Lockfile ──────────────────────────────────────────────────────────
|
|
77
|
+
export function acquireLock(): boolean {
|
|
78
|
+
try {
|
|
79
|
+
// Try exclusive create — fails if file already exists (atomic)
|
|
80
|
+
const fd = openSync(LOCK_FILE, "wx");
|
|
81
|
+
writeFileSync(fd, String(process.pid));
|
|
82
|
+
closeSync(fd);
|
|
83
|
+
return true;
|
|
84
|
+
} catch {
|
|
85
|
+
// File exists — check if holding process is alive
|
|
86
|
+
try {
|
|
87
|
+
const pid = parseInt(readFileSync(LOCK_FILE, "utf-8").trim(), 10);
|
|
88
|
+
if (!isNaN(pid)) {
|
|
89
|
+
try { process.kill(pid, 0); return false; } catch {} // stale lock
|
|
90
|
+
}
|
|
91
|
+
// Stale lock — overwrite
|
|
92
|
+
writeFileSync(LOCK_FILE, String(process.pid));
|
|
93
|
+
return true;
|
|
94
|
+
} catch { return false; }
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export function releaseLock() {
|
|
99
|
+
try { unlinkSync(LOCK_FILE); } catch {}
|
|
100
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal HTTP server that serves a "stopped" page when the PPM server child is down.
|
|
3
|
+
* Binds to the same port so the tunnel URL still works.
|
|
4
|
+
*/
|
|
5
|
+
import { appendFileSync } from "node:fs";
|
|
6
|
+
import { resolve } from "node:path";
|
|
7
|
+
import { homedir } from "node:os";
|
|
8
|
+
|
|
9
|
+
const LOG_FILE = resolve(process.env.PPM_HOME || resolve(homedir(), ".ppm"), "ppm.log");
|
|
10
|
+
|
|
11
|
+
function log(level: string, msg: string) {
|
|
12
|
+
const ts = new Date().toISOString();
|
|
13
|
+
try { appendFileSync(LOG_FILE, `[${ts}] [${level}] [stopped-page] ${msg}\n`); } catch {}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const STOPPED_HTML = `<!DOCTYPE html>
|
|
17
|
+
<html><head>
|
|
18
|
+
<meta charset="utf-8">
|
|
19
|
+
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
20
|
+
<title>PPM - Stopped</title>
|
|
21
|
+
<style>
|
|
22
|
+
body { font-family: system-ui; display: flex; justify-content: center;
|
|
23
|
+
align-items: center; min-height: 100vh; margin: 0;
|
|
24
|
+
background: #1a1a2e; color: #e0e0e0; }
|
|
25
|
+
.card { text-align: center; padding: 2rem; }
|
|
26
|
+
h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
|
|
27
|
+
p { color: #888; font-size: 0.9rem; }
|
|
28
|
+
.dot { display: inline-block; width: 10px; height: 10px;
|
|
29
|
+
border-radius: 50%; background: #f59e0b; margin-right: 8px; }
|
|
30
|
+
</style>
|
|
31
|
+
</head><body>
|
|
32
|
+
<div class="card">
|
|
33
|
+
<h1><span class="dot"></span>PPM Server Stopped</h1>
|
|
34
|
+
<p>The server is stopped but the supervisor is still running.</p>
|
|
35
|
+
<p>Use <code>ppm start</code> or Cloud dashboard to restart.</p>
|
|
36
|
+
</div>
|
|
37
|
+
</body></html>`;
|
|
38
|
+
|
|
39
|
+
let stoppedServer: ReturnType<typeof Bun.serve> | null = null;
|
|
40
|
+
|
|
41
|
+
export function startStoppedPage(port: number, host: string) {
|
|
42
|
+
if (stoppedServer) return;
|
|
43
|
+
|
|
44
|
+
try {
|
|
45
|
+
stoppedServer = Bun.serve({
|
|
46
|
+
port,
|
|
47
|
+
hostname: host,
|
|
48
|
+
fetch(req) {
|
|
49
|
+
const url = new URL(req.url);
|
|
50
|
+
if (url.pathname === "/api/health") {
|
|
51
|
+
return new Response(JSON.stringify({ status: "stopped" }), {
|
|
52
|
+
status: 503,
|
|
53
|
+
headers: { "Content-Type": "application/json" },
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
return new Response(STOPPED_HTML, {
|
|
57
|
+
headers: { "Content-Type": "text/html" },
|
|
58
|
+
});
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
log("INFO", `Stopped page serving on port ${port}`);
|
|
62
|
+
} catch (e) {
|
|
63
|
+
log("WARN", `Failed to start stopped page: ${e}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function stopStoppedPage() {
|
|
68
|
+
if (stoppedServer) {
|
|
69
|
+
stoppedServer.stop();
|
|
70
|
+
stoppedServer = null;
|
|
71
|
+
log("INFO", "Stopped page server shut down");
|
|
72
|
+
}
|
|
73
|
+
}
|
|
@@ -12,6 +12,13 @@ import {
|
|
|
12
12
|
unlinkSync,
|
|
13
13
|
} from "node:fs";
|
|
14
14
|
import { isCompiledBinary } from "./autostart-generator.ts";
|
|
15
|
+
import {
|
|
16
|
+
type SupervisorState,
|
|
17
|
+
getState, setState, waitForResume, triggerResume,
|
|
18
|
+
readAndDeleteCmd, readStatus, updateStatus,
|
|
19
|
+
STATUS_FILE, PID_FILE,
|
|
20
|
+
} from "./supervisor-state.ts";
|
|
21
|
+
import { startStoppedPage, stopStoppedPage } from "./supervisor-stopped-page.ts";
|
|
15
22
|
|
|
16
23
|
// ─── Constants ─────────────────────────────────────────────────────────
|
|
17
24
|
const MAX_RESTARTS = 10;
|
|
@@ -28,8 +35,6 @@ const UPGRADE_SKIP_INITIAL_MS = 300_000; // 5min delay before first check
|
|
|
28
35
|
const SELF_REPLACE_TIMEOUT_MS = 30_000; // 30s to wait for new supervisor
|
|
29
36
|
|
|
30
37
|
const PPM_DIR = resolve(process.env.PPM_HOME || resolve(homedir(), ".ppm"));
|
|
31
|
-
const STATUS_FILE = resolve(PPM_DIR, "status.json");
|
|
32
|
-
const PID_FILE = resolve(PPM_DIR, "ppm.pid");
|
|
33
38
|
const LOG_FILE = resolve(PPM_DIR, "ppm.log");
|
|
34
39
|
const RESTARTING_FLAG = resolve(PPM_DIR, ".restarting");
|
|
35
40
|
|
|
@@ -40,23 +45,10 @@ let tunnelUrl: string | null = null;
|
|
|
40
45
|
let adoptedTunnelPid: number | null = null; // PID of tunnel kept alive across upgrade
|
|
41
46
|
let shuttingDown = false;
|
|
42
47
|
|
|
43
|
-
|
|
44
|
-
let
|
|
45
|
-
|
|
46
|
-
let
|
|
47
|
-
|
|
48
|
-
function waitForResume(): Promise<void> {
|
|
49
|
-
return new Promise((resolve) => {
|
|
50
|
-
resumeResolve = resolve;
|
|
51
|
-
});
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
function triggerResume(): void {
|
|
55
|
-
if (resumeResolve) {
|
|
56
|
-
resumeResolve();
|
|
57
|
-
resumeResolve = null;
|
|
58
|
-
}
|
|
59
|
-
}
|
|
48
|
+
// Module-level refs for softStop (needs access to respawn args)
|
|
49
|
+
let _serverArgs: string[] = [];
|
|
50
|
+
let _logFd: number = -1;
|
|
51
|
+
let _opts: { port: number; host: string; share: boolean } = { port: 8080, host: "0.0.0.0", share: false };
|
|
60
52
|
|
|
61
53
|
let serverRestarts = 0;
|
|
62
54
|
let lastServerCrash = 0;
|
|
@@ -87,21 +79,6 @@ function log(level: string, msg: string) {
|
|
|
87
79
|
}
|
|
88
80
|
}
|
|
89
81
|
|
|
90
|
-
// ─── Status management ─────────────────────────────────────────────────
|
|
91
|
-
function readStatus(): Record<string, unknown> {
|
|
92
|
-
try {
|
|
93
|
-
if (existsSync(STATUS_FILE)) return JSON.parse(readFileSync(STATUS_FILE, "utf-8"));
|
|
94
|
-
} catch {}
|
|
95
|
-
return {};
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
function updateStatus(patch: Record<string, unknown>) {
|
|
99
|
-
try {
|
|
100
|
-
const data = { ...readStatus(), ...patch };
|
|
101
|
-
writeFileSync(STATUS_FILE, JSON.stringify(data));
|
|
102
|
-
} catch {}
|
|
103
|
-
}
|
|
104
|
-
|
|
105
82
|
// ─── Backoff calc ──────────────────────────────────────────────────────
|
|
106
83
|
function backoffDelay(restartCount: number): number {
|
|
107
84
|
return Math.min(BACKOFF_BASE_MS * 2 ** (restartCount - 1), BACKOFF_MAX_MS);
|
|
@@ -130,6 +107,12 @@ export async function spawnServer(
|
|
|
130
107
|
const exitCode = await serverChild.exited;
|
|
131
108
|
serverChild = null;
|
|
132
109
|
|
|
110
|
+
// Don't respawn if in stopped state (soft stop)
|
|
111
|
+
if (getState() === "stopped") {
|
|
112
|
+
log("INFO", "Server exited, supervisor in stopped state — not respawning");
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
133
116
|
if (exitCode === 0 && shuttingDown) {
|
|
134
117
|
log("INFO", `Server exited cleanly (code ${exitCode})`);
|
|
135
118
|
return;
|
|
@@ -158,7 +141,7 @@ export async function spawnServer(
|
|
|
158
141
|
if (serverRestarts > MAX_RESTARTS) {
|
|
159
142
|
log("WARN", `Server exceeded ${MAX_RESTARTS} restarts, pausing`);
|
|
160
143
|
notifyStateChange("running", "paused", "max_restarts_exceeded");
|
|
161
|
-
|
|
144
|
+
setState("paused");
|
|
162
145
|
updateStatus({
|
|
163
146
|
state: "paused",
|
|
164
147
|
pid: null,
|
|
@@ -170,7 +153,7 @@ export async function spawnServer(
|
|
|
170
153
|
await waitForResume();
|
|
171
154
|
// Resumed — reset and respawn
|
|
172
155
|
notifyStateChange("paused", "running", "user_resume");
|
|
173
|
-
|
|
156
|
+
setState("running");
|
|
174
157
|
serverRestarts = 0;
|
|
175
158
|
updateStatus({ state: "running", pausedAt: null, pauseReason: null });
|
|
176
159
|
log("INFO", "Resuming server after pause");
|
|
@@ -301,7 +284,7 @@ export async function spawnTunnel(port: number): Promise<void> {
|
|
|
301
284
|
// ─── Health checks ─────────────────────────────────────────────────────
|
|
302
285
|
function startServerHealthCheck(port: number) {
|
|
303
286
|
healthTimer = setInterval(async () => {
|
|
304
|
-
if (shuttingDown || !serverChild) return;
|
|
287
|
+
if (shuttingDown || !serverChild || getState() === "stopped") return;
|
|
305
288
|
try {
|
|
306
289
|
const res = await fetch(`http://127.0.0.1:${port}/api/health`, {
|
|
307
290
|
signal: AbortSignal.timeout(5000),
|
|
@@ -322,6 +305,8 @@ function startTunnelProbe(port: number) {
|
|
|
322
305
|
tunnelProbeTimer = setInterval(async () => {
|
|
323
306
|
if (shuttingDown || !tunnelUrl) { tunnelFailCount = 0; return; }
|
|
324
307
|
if (!tunnelChild && !adoptedTunnelPid) { tunnelFailCount = 0; return; }
|
|
308
|
+
// Don't probe when server is intentionally stopped (stopped page serves 503)
|
|
309
|
+
if (getState() === "stopped") { tunnelFailCount = 0; return; }
|
|
325
310
|
|
|
326
311
|
// Check if adopted tunnel process is still alive
|
|
327
312
|
if (adoptedTunnelPid && !tunnelChild) {
|
|
@@ -421,8 +406,8 @@ async function selfReplace(): Promise<{ success: boolean; error?: string }> {
|
|
|
421
406
|
try {
|
|
422
407
|
// Prevent spawnServer crash-restart loop from respawning killed children
|
|
423
408
|
shuttingDown = true;
|
|
424
|
-
notifyStateChange(
|
|
425
|
-
|
|
409
|
+
notifyStateChange(getState(), "upgrading", "self_replace");
|
|
410
|
+
setState("upgrading");
|
|
426
411
|
updateStatus({ state: "upgrading" });
|
|
427
412
|
|
|
428
413
|
// Set restarting flag so server child's stopTunnel() skips killing the tunnel
|
|
@@ -470,7 +455,7 @@ async function selfReplace(): Promise<{ success: boolean; error?: string }> {
|
|
|
470
455
|
try { unlinkSync(RESTARTING_FLAG); } catch {}
|
|
471
456
|
shuttingDown = false;
|
|
472
457
|
notifyStateChange("upgrading", "running", "upgrade_failed");
|
|
473
|
-
|
|
458
|
+
setState("running");
|
|
474
459
|
updateStatus({ state: "running" });
|
|
475
460
|
return { success: false, error: "New supervisor failed to start within 30s" };
|
|
476
461
|
} catch (e) {
|
|
@@ -478,7 +463,7 @@ async function selfReplace(): Promise<{ success: boolean; error?: string }> {
|
|
|
478
463
|
try { unlinkSync(RESTARTING_FLAG); } catch {}
|
|
479
464
|
shuttingDown = false;
|
|
480
465
|
notifyStateChange("upgrading", "running", "upgrade_failed");
|
|
481
|
-
|
|
466
|
+
setState("running");
|
|
482
467
|
updateStatus({ state: "running" });
|
|
483
468
|
return { success: false, error: (e as Error).message };
|
|
484
469
|
}
|
|
@@ -524,7 +509,7 @@ async function connectCloud(opts: { port: number }, serverArgs: string[], logFd:
|
|
|
524
509
|
return {
|
|
525
510
|
type: "heartbeat" as const,
|
|
526
511
|
tunnelUrl,
|
|
527
|
-
state:
|
|
512
|
+
state: getState(),
|
|
528
513
|
// Use server-reported version (source of truth) with supervisor fallback
|
|
529
514
|
appVersion: (status.serverVersion as string) || VERSION,
|
|
530
515
|
availableVersion: (status.availableVersion as string) || null,
|
|
@@ -560,12 +545,21 @@ async function connectCloud(opts: { port: number }, serverArgs: string[], logFd:
|
|
|
560
545
|
});
|
|
561
546
|
|
|
562
547
|
switch (cmd.action) {
|
|
548
|
+
case "start":
|
|
549
|
+
if (getState() === "stopped") {
|
|
550
|
+
triggerResume();
|
|
551
|
+
sendResult(true, undefined, { state: "running" });
|
|
552
|
+
} else {
|
|
553
|
+
sendResult(false, `Server already in ${getState()} state`);
|
|
554
|
+
}
|
|
555
|
+
break;
|
|
556
|
+
|
|
563
557
|
case "restart":
|
|
564
558
|
if (serverChild) {
|
|
565
559
|
serverRestartRequested = true;
|
|
566
560
|
try { serverChild.kill(); } catch {}
|
|
567
561
|
sendResult(true);
|
|
568
|
-
} else if (
|
|
562
|
+
} else if (getState() === "paused" || getState() === "stopped") {
|
|
569
563
|
triggerResume();
|
|
570
564
|
sendResult(true);
|
|
571
565
|
} else {
|
|
@@ -574,17 +568,25 @@ async function connectCloud(opts: { port: number }, serverArgs: string[], logFd:
|
|
|
574
568
|
break;
|
|
575
569
|
|
|
576
570
|
case "resume":
|
|
577
|
-
if (
|
|
571
|
+
if (getState() === "paused" || getState() === "stopped") {
|
|
578
572
|
triggerResume();
|
|
579
573
|
sendResult(true);
|
|
580
574
|
} else {
|
|
581
|
-
sendResult(false,
|
|
575
|
+
sendResult(false, `Not in paused/stopped state (current: ${getState()})`);
|
|
582
576
|
}
|
|
583
577
|
break;
|
|
584
578
|
|
|
585
579
|
case "stop":
|
|
580
|
+
if (getState() === "stopped") {
|
|
581
|
+
sendResult(false, "Already stopped");
|
|
582
|
+
} else {
|
|
583
|
+
sendResult(true);
|
|
584
|
+
softStop();
|
|
585
|
+
}
|
|
586
|
+
break;
|
|
587
|
+
|
|
588
|
+
case "shutdown":
|
|
586
589
|
sendResult(true);
|
|
587
|
-
// Delay exit to allow WS buffer to flush
|
|
588
590
|
setTimeout(() => {
|
|
589
591
|
shutdown();
|
|
590
592
|
process.exit(0);
|
|
@@ -593,10 +595,13 @@ async function connectCloud(opts: { port: number }, serverArgs: string[], logFd:
|
|
|
593
595
|
|
|
594
596
|
case "status":
|
|
595
597
|
sendResult(true, undefined, {
|
|
596
|
-
state:
|
|
598
|
+
state: getState(),
|
|
597
599
|
serverPid: serverChild?.pid ?? null,
|
|
598
600
|
tunnelUrl,
|
|
599
601
|
serverRestarts,
|
|
602
|
+
stoppedAt: getState() === "stopped"
|
|
603
|
+
? readStatus().stoppedAt
|
|
604
|
+
: null,
|
|
600
605
|
});
|
|
601
606
|
break;
|
|
602
607
|
|
|
@@ -609,6 +614,47 @@ async function connectCloud(opts: { port: number }, serverArgs: string[], logFd:
|
|
|
609
614
|
}
|
|
610
615
|
}
|
|
611
616
|
|
|
617
|
+
// ─── Soft stop (server only, supervisor stays alive) ──────────────────
|
|
618
|
+
let _softStopRunning = false;
|
|
619
|
+
export async function softStop() {
|
|
620
|
+
if (getState() === "stopped" || _softStopRunning) return;
|
|
621
|
+
_softStopRunning = true;
|
|
622
|
+
|
|
623
|
+
log("INFO", "Soft stop: killing server, supervisor stays alive");
|
|
624
|
+
notifyStateChange(getState(), "stopped", "user_stop");
|
|
625
|
+
setState("stopped");
|
|
626
|
+
|
|
627
|
+
// Kill server child
|
|
628
|
+
if (serverChild) {
|
|
629
|
+
try { serverChild.kill(); } catch {}
|
|
630
|
+
serverChild = null;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// Stop health checks (no server to check)
|
|
634
|
+
if (healthTimer) { clearInterval(healthTimer); healthTimer = null; }
|
|
635
|
+
|
|
636
|
+
// Keep: tunnel, Cloud WS, upgrade checks, tunnel probe
|
|
637
|
+
updateStatus({ state: "stopped", pid: null, stoppedAt: new Date().toISOString() });
|
|
638
|
+
|
|
639
|
+
// Start stopped page on the server port so tunnel URL still works
|
|
640
|
+
await Bun.sleep(500); // brief wait for port release
|
|
641
|
+
startStoppedPage(_opts.port, _opts.host);
|
|
642
|
+
|
|
643
|
+
// Wait for resume signal
|
|
644
|
+
await waitForResume();
|
|
645
|
+
|
|
646
|
+
// Resumed — restart server
|
|
647
|
+
stopStoppedPage();
|
|
648
|
+
await Bun.sleep(200); // brief wait for port release
|
|
649
|
+
notifyStateChange("stopped", "running", "user_start");
|
|
650
|
+
setState("running");
|
|
651
|
+
updateStatus({ state: "running", stoppedAt: null });
|
|
652
|
+
startServerHealthCheck(_opts.port);
|
|
653
|
+
log("INFO", "Resuming server from stopped state");
|
|
654
|
+
_softStopRunning = false;
|
|
655
|
+
spawnServer(_serverArgs, _logFd);
|
|
656
|
+
}
|
|
657
|
+
|
|
612
658
|
// ─── Shutdown ──────────────────────────────────────────────────────────
|
|
613
659
|
export function shutdown() {
|
|
614
660
|
if (shuttingDown) return;
|
|
@@ -653,6 +699,14 @@ export async function runSupervisor(opts: {
|
|
|
653
699
|
const logFd = openSync(LOG_FILE, "a");
|
|
654
700
|
log("INFO", `Supervisor started (PID: ${process.pid}, port: ${opts.port}, share: ${opts.share})`);
|
|
655
701
|
|
|
702
|
+
// Global exception handlers — supervisor must never crash
|
|
703
|
+
process.on("uncaughtException", (err) => {
|
|
704
|
+
log("ERROR", `Uncaught exception: ${err.stack || err.message}`);
|
|
705
|
+
});
|
|
706
|
+
process.on("unhandledRejection", (reason) => {
|
|
707
|
+
log("ERROR", `Unhandled rejection: ${reason}`);
|
|
708
|
+
});
|
|
709
|
+
|
|
656
710
|
// Write supervisor PID + clear stale availableVersion from previous run
|
|
657
711
|
writeFileSync(PID_FILE, String(process.pid));
|
|
658
712
|
updateStatus({
|
|
@@ -668,17 +722,45 @@ export async function runSupervisor(opts: {
|
|
|
668
722
|
// Strip trailing empty args
|
|
669
723
|
while (serverArgs.length > 0 && serverArgs[serverArgs.length - 1] === "") serverArgs.pop();
|
|
670
724
|
|
|
725
|
+
// Save module-level refs for softStop()
|
|
726
|
+
_serverArgs = serverArgs;
|
|
727
|
+
_logFd = logFd;
|
|
728
|
+
_opts = { port: opts.port, host: opts.host, share: opts.share };
|
|
729
|
+
|
|
671
730
|
// Signal handlers
|
|
672
731
|
process.on("SIGTERM", () => { shutdown(); process.exit(0); });
|
|
673
732
|
process.on("SIGINT", () => { shutdown(); process.exit(0); });
|
|
674
733
|
|
|
675
|
-
// SIGUSR2 =
|
|
734
|
+
// SIGUSR2 = command file dispatch OR graceful server restart
|
|
676
735
|
process.on("SIGUSR2", () => {
|
|
677
|
-
|
|
736
|
+
// Check for command file first (soft_stop, resume)
|
|
737
|
+
const cmd = readAndDeleteCmd();
|
|
738
|
+
if (cmd) {
|
|
739
|
+
if (cmd.action === "soft_stop") {
|
|
740
|
+
log("INFO", "SIGUSR2: soft_stop command received");
|
|
741
|
+
softStop();
|
|
742
|
+
return;
|
|
743
|
+
}
|
|
744
|
+
if (cmd.action === "resume") {
|
|
745
|
+
log("INFO", "SIGUSR2: resume command received");
|
|
746
|
+
if (getState() === "stopped" || getState() === "paused") {
|
|
747
|
+
triggerResume();
|
|
748
|
+
}
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
// Default: restart server (existing behavior)
|
|
754
|
+
if (getState() === "paused") {
|
|
678
755
|
log("INFO", "SIGUSR2 received while paused, resuming server");
|
|
679
756
|
triggerResume();
|
|
680
757
|
return;
|
|
681
758
|
}
|
|
759
|
+
if (getState() === "stopped") {
|
|
760
|
+
log("INFO", "SIGUSR2 received while stopped, resuming server");
|
|
761
|
+
triggerResume();
|
|
762
|
+
return;
|
|
763
|
+
}
|
|
682
764
|
log("INFO", "SIGUSR2 received, restarting server only");
|
|
683
765
|
if (serverChild) {
|
|
684
766
|
serverRestartRequested = true; // flag so spawnServer skips backoff
|
|
@@ -707,6 +789,18 @@ export async function runSupervisor(opts: {
|
|
|
707
789
|
upgradeCheckTimer = setInterval(checkAvailableVersion, UPGRADE_CHECK_INTERVAL_MS);
|
|
708
790
|
}, UPGRADE_SKIP_INITIAL_MS);
|
|
709
791
|
|
|
792
|
+
// Windows: poll command file since SIGUSR2 is not available
|
|
793
|
+
if (process.platform === "win32") {
|
|
794
|
+
setInterval(() => {
|
|
795
|
+
const cmd = readAndDeleteCmd();
|
|
796
|
+
if (!cmd) return;
|
|
797
|
+
if (cmd.action === "soft_stop") { softStop(); }
|
|
798
|
+
else if (cmd.action === "resume") {
|
|
799
|
+
if (getState() === "stopped" || getState() === "paused") triggerResume();
|
|
800
|
+
}
|
|
801
|
+
}, 1000);
|
|
802
|
+
}
|
|
803
|
+
|
|
710
804
|
// Connect to Cloud via WebSocket (if device is linked)
|
|
711
805
|
connectCloud(opts, serverArgs, logFd);
|
|
712
806
|
|
|
@@ -725,7 +819,7 @@ export async function runSupervisor(opts: {
|
|
|
725
819
|
await Promise.all(promises);
|
|
726
820
|
|
|
727
821
|
// If upgrading, selfReplace handles process.exit — wait for it
|
|
728
|
-
if (
|
|
822
|
+
if (getState() === "upgrading") {
|
|
729
823
|
log("INFO", "Server loop exited during upgrade, waiting for selfReplace to finish");
|
|
730
824
|
await new Promise(() => {}); // selfReplace will call process.exit()
|
|
731
825
|
}
|