sensorium-mcp 2.17.25 → 2.17.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/dist/dashboard/routes/threads.d.ts.map +1 -1
  2. package/dist/dashboard/routes/threads.js +18 -5
  3. package/dist/dashboard/routes/threads.js.map +1 -1
  4. package/dist/data/memory/bootstrap.js +2 -2
  5. package/dist/data/memory/bootstrap.js.map +1 -1
  6. package/dist/data/memory/consolidation.d.ts.map +1 -1
  7. package/dist/data/memory/consolidation.js +75 -4
  8. package/dist/data/memory/consolidation.js.map +1 -1
  9. package/dist/data/memory/index.d.ts +1 -0
  10. package/dist/data/memory/index.d.ts.map +1 -1
  11. package/dist/data/memory/index.js +1 -0
  12. package/dist/data/memory/index.js.map +1 -1
  13. package/dist/data/memory/quality-scoring.d.ts +32 -0
  14. package/dist/data/memory/quality-scoring.d.ts.map +1 -0
  15. package/dist/data/memory/quality-scoring.js +182 -0
  16. package/dist/data/memory/quality-scoring.js.map +1 -0
  17. package/dist/data/memory/semantic.d.ts +12 -0
  18. package/dist/data/memory/semantic.d.ts.map +1 -1
  19. package/dist/data/memory/semantic.js +45 -2
  20. package/dist/data/memory/semantic.js.map +1 -1
  21. package/dist/data/memory/thread-registry.d.ts +7 -0
  22. package/dist/data/memory/thread-registry.d.ts.map +1 -1
  23. package/dist/data/memory/thread-registry.js +11 -1
  24. package/dist/data/memory/thread-registry.js.map +1 -1
  25. package/dist/index.js +17 -5
  26. package/dist/index.js.map +1 -1
  27. package/dist/tools/defs/memory-defs.d.ts.map +1 -1
  28. package/dist/tools/defs/memory-defs.js +19 -0
  29. package/dist/tools/defs/memory-defs.js.map +1 -1
  30. package/dist/tools/delegate-tool.d.ts.map +1 -1
  31. package/dist/tools/delegate-tool.js +6 -3
  32. package/dist/tools/delegate-tool.js.map +1 -1
  33. package/dist/tools/memory-tools.d.ts.map +1 -1
  34. package/dist/tools/memory-tools.js +15 -0
  35. package/dist/tools/memory-tools.js.map +1 -1
  36. package/dist/tools/thread-lifecycle.d.ts.map +1 -1
  37. package/dist/tools/thread-lifecycle.js +52 -32
  38. package/dist/tools/thread-lifecycle.js.map +1 -1
  39. package/package.json +10 -2
  40. package/scripts/install-supervisor.ps1 +67 -0
  41. package/scripts/install-supervisor.sh +43 -0
  42. package/scripts/start-supervisor.ps1 +46 -0
  43. package/scripts/start-supervisor.sh +20 -0
  44. package/supervisor/config.go +140 -0
  45. package/supervisor/go.mod +3 -0
  46. package/supervisor/health.go +390 -0
  47. package/supervisor/health_test.go +93 -0
  48. package/supervisor/keeper.go +303 -0
  49. package/supervisor/keeper_test.go +27 -0
  50. package/supervisor/lock.go +56 -0
  51. package/supervisor/lock_test.go +54 -0
  52. package/supervisor/log.go +114 -0
  53. package/supervisor/log_test.go +45 -0
  54. package/supervisor/main.go +325 -0
  55. package/supervisor/notify.go +53 -0
  56. package/supervisor/process.go +222 -0
  57. package/supervisor/process_test.go +94 -0
  58. package/supervisor/process_unix.go +14 -0
  59. package/supervisor/process_windows.go +15 -0
  60. package/supervisor/updater.go +281 -0
  61. package/templates/coding-task.default.md +12 -0
  62. package/dist/claude-keeper.d.ts +0 -24
  63. package/dist/claude-keeper.d.ts.map +0 -1
  64. package/dist/claude-keeper.js +0 -374
  65. package/dist/claude-keeper.js.map +0 -1
  66. package/dist/watcher-service.d.ts +0 -2
  67. package/dist/watcher-service.d.ts.map +0 -1
  68. package/dist/watcher-service.js +0 -997
  69. package/dist/watcher-service.js.map +0 -1
@@ -0,0 +1,325 @@
1
+ package main
2
+
3
+ import (
4
+ "context"
5
+ "fmt"
6
+ "os"
7
+ "os/signal"
8
+ "sync"
9
+ "syscall"
10
+ "time"
11
+ )
12
+
13
+ // KeeperEntry tracks a running keeper and its settings.
14
+ type KeeperEntry struct {
15
+ keeper *Keeper
16
+ settings KeeperConfig
17
+ }
18
+
19
+ func main() {
20
+ cfg := LoadConfig()
21
+
22
+ if err := os.MkdirAll(cfg.DataDir, 0755); err != nil {
23
+ fmt.Fprintf(os.Stderr, "Cannot create data dir %s: %v\n", cfg.DataDir, err)
24
+ os.Exit(1)
25
+ }
26
+
27
+ log := NewLogger(cfg.Paths.WatcherLog)
28
+ defer log.Close()
29
+
30
+ log.Info("sensorium-supervisor starting (mode=%s, port=%d, dataDir=%s)", cfg.Mode, cfg.MCPHttpPort, cfg.DataDir)
31
+ log.Debug("Config: MCPStartCommand=%q, PollInterval=%v, MinUptime=%v, KeeperMaxRetries=%d", cfg.MCPStartCommand, cfg.PollInterval, cfg.MinUptime, cfg.KeeperMaxRetries)
32
+ log.Debug("Config: TelegramToken=%v, HealthFailThresh=%d, StuckThreshold=%v", cfg.TelegramToken != "", cfg.HealthFailThresh, cfg.StuckThreshold)
33
+
34
+ if err := os.MkdirAll(cfg.Paths.PIDsDir, 0755); err != nil {
35
+ log.Warn("Cannot create PIDs dir %s: %v", cfg.Paths.PIDsDir, err)
36
+ }
37
+ if err := os.MkdirAll(cfg.Paths.HeartbeatsDir, 0755); err != nil {
38
+ log.Warn("Cannot create heartbeats dir %s: %v", cfg.Paths.HeartbeatsDir, err)
39
+ }
40
+
41
+ // Acquire lock — prevent multiple instances
42
+ if !AcquireLock(cfg.Paths.WatcherLock, log) {
43
+ os.Exit(1)
44
+ }
45
+ defer ReleaseLock(cfg.Paths.WatcherLock)
46
+
47
+ if cfg.MCPHttpPort <= 0 {
48
+ log.Error("MCP_HTTP_PORT must be set (got %d)", cfg.MCPHttpPort)
49
+ os.Exit(1)
50
+ }
51
+
52
+ mcp := NewMCPClient(cfg.MCPHttpPort, cfg.MCPHttpSecret)
53
+ mcp.Log = log
54
+
55
+ // Clean stale PID files from previous runs
56
+ CleanStalePIDs(cfg.Paths.PIDsDir, log)
57
+
58
+ // Kill orphan process on our port
59
+ KillByPort(cfg.MCPHttpPort, log)
60
+
61
+ // Spawn MCP server
62
+ _, err := SpawnMCPServer(cfg, log)
63
+ if err != nil {
64
+ log.Error("Failed to start MCP server: %v", err)
65
+ os.Exit(1)
66
+ }
67
+
68
+ // Wait for server to be ready
69
+ ctx, rootCancel := context.WithCancel(context.Background())
70
+ defer rootCancel()
71
+
72
+ if mcp.WaitForReady(ctx, 3*time.Second, cfg.KeeperReadyTimeout) {
73
+ log.Info("MCP server is ready")
74
+ } else {
75
+ log.Warn("MCP server did not become ready in %v — proceeding anyway", cfg.KeeperReadyTimeout)
76
+ }
77
+
78
+ // Start keeper management
79
+ var mu sync.Mutex
80
+ keepers := make(map[int]*KeeperEntry)
81
+
82
+ onDeath := func(threadID int, sessionName string) {
83
+ log.Warn("Thread %d ('%s') died", threadID, sessionName)
84
+ NotifyOperator(cfg, log, fmt.Sprintf("💀 <b>%s</b> session died — restarting…", sessionName), threadID)
85
+ }
86
+
87
+ syncKeepers := func() {
88
+ if cfg.MCPHttpPort <= 0 {
89
+ log.Debug("syncKeepers: skipped (no port configured)")
90
+ return
91
+ }
92
+
93
+ log.Debug("syncKeepers: fetching keeper settings...")
94
+ settings, err := fetchKeeperSettings(ctx, mcp, log)
95
+ if err != nil {
96
+ log.Warn("Failed to fetch keeper settings: %v", err)
97
+ return
98
+ }
99
+ log.Debug("syncKeepers: got %d keeper configs", len(settings))
100
+
101
+ mu.Lock()
102
+ defer mu.Unlock()
103
+
104
+ // Find keepers to remove (no longer in settings)
105
+ wanted := make(map[int]bool)
106
+ for _, s := range settings {
107
+ wanted[s.ThreadID] = true
108
+ }
109
+ for tid, entry := range keepers {
110
+ if !wanted[tid] {
111
+ log.Info("Stopping keeper for removed thread %d", tid)
112
+ entry.keeper.Stop()
113
+ delete(keepers, tid)
114
+ }
115
+ }
116
+
117
+ // Start or update keepers
118
+ for _, s := range settings {
119
+ existing, exists := keepers[s.ThreadID]
120
+ if exists && settingsChanged(existing.settings, s) {
121
+ log.Info("Settings changed for thread %d — restarting keeper", s.ThreadID)
122
+ existing.keeper.Stop()
123
+ delete(keepers, s.ThreadID)
124
+ exists = false
125
+ }
126
+ if !exists {
127
+ k := NewKeeper(s, cfg, mcp, log, onDeath)
128
+ k.Start()
129
+ keepers[s.ThreadID] = &KeeperEntry{keeper: k, settings: s}
130
+ log.Info("Started keeper for thread %d ('%s')", s.ThreadID, s.SessionName)
131
+ }
132
+ }
133
+ }
134
+
135
+ // Initial sync
136
+ log.Info("Running initial keeper sync")
137
+ syncKeepers()
138
+
139
+ // Keeper settings poller (every 2 min)
140
+ keeperPollerDone := make(chan struct{})
141
+ go func() {
142
+ defer close(keeperPollerDone)
143
+ ticker := time.NewTicker(2 * time.Minute)
144
+ defer ticker.Stop()
145
+ for {
146
+ select {
147
+ case <-ctx.Done():
148
+ return
149
+ case <-ticker.C:
150
+ log.Debug("Keeper settings poll triggered")
151
+ syncKeepers()
152
+ }
153
+ }
154
+ }()
155
+
156
+ // Start updater
157
+ log.Info("Starting auto-updater")
158
+ updater := NewUpdater(cfg, mcp, log)
159
+ updater.Start()
160
+
161
+ // Health check loop for the server process itself
162
+ healthDone := make(chan struct{})
163
+ go func() {
164
+ defer close(healthDone)
165
+ consecutiveFails := 0
166
+ ticker := time.NewTicker(60 * time.Second)
167
+ defer ticker.Stop()
168
+ for {
169
+ select {
170
+ case <-ctx.Done():
171
+ return
172
+ case <-ticker.C:
173
+ if mcp.IsServerReady(ctx) {
174
+ if consecutiveFails > 0 {
175
+ log.Info("Server health check recovered (was at %d fails)", consecutiveFails)
176
+ }
177
+ consecutiveFails = 0
178
+ } else {
179
+ consecutiveFails++
180
+ log.Warn("Server health check failed (%d/%d)", consecutiveFails, cfg.HealthFailThresh)
181
+ if consecutiveFails >= cfg.HealthFailThresh {
182
+ log.Error("Server unresponsive after %d consecutive failures — restarting", consecutiveFails)
183
+ NotifyOperator(cfg, log, "⚠️ Supervisor: server process not running — restarting...", 0)
184
+
185
+ // Kill and respawn
186
+ pid, pidErr := ReadPIDFile(cfg.Paths.ServerPID)
187
+ if pidErr != nil {
188
+ log.Warn("Could not read server PID file: %v", pidErr)
189
+ }
190
+ if pid > 0 {
191
+ _ = KillProcess(pid, log)
192
+ }
193
+ KillByPort(cfg.MCPHttpPort, log)
194
+
195
+ if _, err := SpawnMCPServer(cfg, log); err != nil {
196
+ log.Error("Failed to respawn server: %v", err)
197
+ }
198
+ consecutiveFails = 0
199
+ }
200
+ }
201
+ }
202
+ }
203
+ }()
204
+
205
+ // Wait for shutdown signal
206
+ sigCh := make(chan os.Signal, 1)
207
+ signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
208
+
209
+ log.Info("All subsystems started — supervisor is running (PID %d)", os.Getpid())
210
+
211
+ sig := <-sigCh
212
+ log.Info("Received %s — shutting down", sig)
213
+ rootCancel()
214
+
215
+ // Stop keepers (with 10s timeout)
216
+ shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second)
217
+ defer shutdownCancel()
218
+
219
+ mu.Lock()
220
+ var wg sync.WaitGroup
221
+ for _, entry := range keepers {
222
+ wg.Add(1)
223
+ go func(k *Keeper) {
224
+ defer wg.Done()
225
+ k.Stop()
226
+ }(entry.keeper)
227
+ }
228
+ mu.Unlock()
229
+
230
+ doneCh := make(chan struct{})
231
+ go func() { wg.Wait(); close(doneCh) }()
232
+ select {
233
+ case <-doneCh:
234
+ log.Info("All keepers stopped")
235
+ case <-shutdownCtx.Done():
236
+ log.Warn("Keeper shutdown timed out after 10s")
237
+ }
238
+
239
+ // Stop updater
240
+ updater.Stop()
241
+
242
+ // Wait for background goroutines
243
+ <-keeperPollerDone
244
+ <-healthDone
245
+
246
+ // Kill server process
247
+ pid, err := ReadPIDFile(cfg.Paths.ServerPID)
248
+ if err == nil && pid > 0 {
249
+ log.Info("Stopping MCP server (PID %d)", pid)
250
+ _ = KillProcess(pid, log)
251
+ }
252
+
253
+ log.Info("Supervisor stopped cleanly")
254
+ }
255
+
256
+ // fetchKeeperSettings reads the root threads from the MCP server,
257
+ // filtering for those with keepAlive=true.
258
+ func fetchKeeperSettings(ctx context.Context, mcp *MCPClient, log *Logger) ([]KeeperConfig, error) {
259
+ roots, err := mcp.GetRootThreads(ctx)
260
+ if err != nil {
261
+ return nil, err
262
+ }
263
+
264
+ var result []KeeperConfig
265
+ for _, r := range roots {
266
+ keepAlive, _ := r["keepAlive"].(bool)
267
+ if !keepAlive {
268
+ continue
269
+ }
270
+
271
+ // Skip non-active roots (archived, expired, exited)
272
+ if status, _ := r["status"].(string); status != "" && status != "active" {
273
+ continue
274
+ }
275
+
276
+ tidFloat, _ := r["threadId"].(float64) // JSON numbers decode as float64
277
+ tid := int(tidFloat)
278
+ if tid <= 0 {
279
+ continue
280
+ }
281
+
282
+ client := "claude"
283
+ if c, ok := r["client"].(string); ok && c != "" {
284
+ client = c
285
+ }
286
+
287
+ sessionName := ""
288
+ if n, ok := r["name"].(string); ok {
289
+ sessionName = n
290
+ }
291
+
292
+ maxRetries := 5
293
+ if mr, ok := r["maxRetries"].(float64); ok {
294
+ maxRetries = int(mr)
295
+ }
296
+
297
+ cooldownMs := 300_000
298
+ if cd, ok := r["cooldownMs"].(float64); ok {
299
+ cooldownMs = int(cd)
300
+ }
301
+
302
+ workDir := ""
303
+ if wd, ok := r["workingDirectory"].(string); ok {
304
+ workDir = wd
305
+ }
306
+
307
+ result = append(result, KeeperConfig{
308
+ ThreadID: tid,
309
+ SessionName: sessionName,
310
+ Client: client,
311
+ WorkingDirectory: workDir,
312
+ MaxRetries: maxRetries,
313
+ CooldownMs: cooldownMs,
314
+ })
315
+ }
316
+ return result, nil
317
+ }
318
+
319
+ func settingsChanged(a, b KeeperConfig) bool {
320
+ return a.MaxRetries != b.MaxRetries ||
321
+ a.CooldownMs != b.CooldownMs ||
322
+ a.Client != b.Client ||
323
+ a.SessionName != b.SessionName ||
324
+ a.WorkingDirectory != b.WorkingDirectory
325
+ }
@@ -0,0 +1,53 @@
1
+ package main
2
+
3
+ import (
4
+ "context"
5
+ "fmt"
6
+ "net/http"
7
+ "net/url"
8
+ "strings"
9
+ "time"
10
+ )
11
+
12
+ // NotifyOperator sends a message via Telegram to the operator.
13
+ // Silently fails if credentials are not configured.
14
+ func NotifyOperator(cfg Config, log *Logger, text string, threadID int) {
15
+ if cfg.TelegramToken == "" || cfg.TelegramChatID == "" {
16
+ log.Debug("NotifyOperator: skipped (no Telegram credentials)")
17
+ return
18
+ }
19
+
20
+ log.Debug("NotifyOperator: sending to chat %s (threadID=%d)", cfg.TelegramChatID, threadID)
21
+
22
+ ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
23
+ defer cancel()
24
+
25
+ apiURL := fmt.Sprintf("https://api.telegram.org/bot%s/sendMessage", cfg.TelegramToken)
26
+
27
+ form := url.Values{}
28
+ form.Set("chat_id", cfg.TelegramChatID)
29
+ form.Set("text", text)
30
+ form.Set("parse_mode", "HTML")
31
+ if threadID > 0 {
32
+ form.Set("message_thread_id", fmt.Sprintf("%d", threadID))
33
+ }
34
+
35
+ req, err := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, strings.NewReader(form.Encode()))
36
+ if err != nil {
37
+ log.Warn("Telegram notify: %v", err)
38
+ return
39
+ }
40
+ req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
41
+
42
+ resp, err := http.DefaultClient.Do(req)
43
+ if err != nil {
44
+ log.Warn("Telegram notify: %v", err)
45
+ return
46
+ }
47
+ defer resp.Body.Close()
48
+ if resp.StatusCode >= 400 {
49
+ log.Warn("Telegram notify: HTTP %d", resp.StatusCode)
50
+ } else {
51
+ log.Debug("Telegram notify: sent OK (HTTP %d)", resp.StatusCode)
52
+ }
53
+ }
@@ -0,0 +1,222 @@
1
+ package main
2
+
3
+ import (
4
+ "encoding/json"
5
+ "errors"
6
+ "fmt"
7
+ "os"
8
+ "os/exec"
9
+ "path/filepath"
10
+ "runtime"
11
+ "strconv"
12
+ "strings"
13
+ "time"
14
+ )
15
+
16
+ // SpawnMCPServer starts the MCP server as a detached child process.
17
+ // Returns the PID of the spawned process.
18
+ func SpawnMCPServer(cfg Config, log *Logger) (int, error) {
19
+ parts := strings.Fields(cfg.MCPStartCommand)
20
+ if len(parts) == 0 {
21
+ return 0, errors.New("empty MCP_START_COMMAND")
22
+ }
23
+
24
+ cmd := exec.Command(parts[0], parts[1:]...)
25
+ cmd.Env = os.Environ()
26
+ cmd.Stdin = nil
27
+ cmd.Stdout = nil
28
+ cmd.Stderr = nil
29
+ setSysProcAttr(cmd)
30
+
31
+ log.Info("Starting MCP server: %s", cfg.MCPStartCommand)
32
+
33
+ if err := cmd.Start(); err != nil {
34
+ return 0, fmt.Errorf("spawn MCP server: %w", err)
35
+ }
36
+
37
+ pid := cmd.Process.Pid
38
+ log.Info("MCP server started with PID %d", pid)
39
+
40
+ // Don't wait — detached process
41
+ go func() { _ = cmd.Wait() }()
42
+
43
+ if err := writePIDFile(cfg.Paths.ServerPID, pid); err != nil {
44
+ log.Warn("Failed to write server PID file: %v", err)
45
+ }
46
+
47
+ return pid, nil
48
+ }
49
+
50
+ // KillProcess kills a process by PID. On Windows, uses taskkill /F /T for tree kill.
51
+ func KillProcess(pid int, log *Logger) error {
52
+ if !IsProcessAlive(pid) {
53
+ log.Debug("KillProcess: PID %d already dead", pid)
54
+ return nil
55
+ }
56
+
57
+ log.Debug("KillProcess: killing PID %d", pid)
58
+
59
+ if runtime.GOOS == "windows" {
60
+ // taskkill /F /T kills the tree
61
+ out, err := exec.Command("taskkill", "/F", "/T", "/PID", strconv.Itoa(pid)).CombinedOutput()
62
+ if err != nil {
63
+ return fmt.Errorf("taskkill PID %d: %w (%s)", pid, err, strings.TrimSpace(string(out)))
64
+ }
65
+ log.Info("Killed process tree PID %d", pid)
66
+ return nil
67
+ }
68
+
69
+ // Unix: SIGTERM, wait 2s, then SIGKILL
70
+ proc, err := os.FindProcess(pid)
71
+ if err != nil {
72
+ log.Debug("KillProcess: FindProcess(%d) failed: %v", pid, err)
73
+ return err
74
+ }
75
+ if err := proc.Signal(os.Interrupt); err != nil {
76
+ // Already dead
77
+ return nil
78
+ }
79
+ time.Sleep(2 * time.Second)
80
+ if IsProcessAlive(pid) {
81
+ _ = proc.Kill()
82
+ log.Info("Force-killed PID %d", pid)
83
+ } else {
84
+ log.Info("Process PID %d terminated gracefully", pid)
85
+ }
86
+ return nil
87
+ }
88
+
89
+ // KillByPort finds a process listening on the given port and kills it (Windows-only orphan cleanup).
90
+ func KillByPort(port int, log *Logger) {
91
+ if runtime.GOOS != "windows" || port <= 0 || port > 65535 {
92
+ return
93
+ }
94
+ log.Debug("KillByPort: checking for processes on port %d", port)
95
+ out, err := exec.Command("cmd", "/c", fmt.Sprintf("netstat -aon | findstr \":%d.*LISTENING\"", port)).CombinedOutput()
96
+ if err != nil {
97
+ log.Debug("KillByPort: no listeners on port %d", port)
98
+ return
99
+ }
100
+ for _, line := range strings.Split(string(out), "\n") {
101
+ fields := strings.Fields(strings.TrimSpace(line))
102
+ if len(fields) >= 5 {
103
+ pid, err := strconv.Atoi(fields[len(fields)-1])
104
+ if err == nil && pid > 0 {
105
+ log.Info("Found orphan PID %d on port %d — killing", pid, port)
106
+ _ = KillProcess(pid, log)
107
+ }
108
+ }
109
+ }
110
+ }
111
+
112
+ // IsProcessAlive checks whether a process with the given PID exists.
113
+ func IsProcessAlive(pid int) bool {
114
+ if pid <= 0 {
115
+ return false
116
+ }
117
+ if runtime.GOOS == "windows" {
118
+ out, err := exec.Command("tasklist", "/FI", fmt.Sprintf("PID eq %d", pid), "/NH").CombinedOutput()
119
+ if err != nil {
120
+ return false
121
+ }
122
+ return strings.Contains(string(out), strconv.Itoa(pid))
123
+ }
124
+ proc, err := os.FindProcess(pid)
125
+ if err != nil {
126
+ return false
127
+ }
128
+ return proc.Signal(nil) == nil // signal 0 = existence check on Unix
129
+ }
130
+
131
+ // --- PID File Helpers ---
132
+
133
+ type pidJSON struct {
134
+ PID int `json:"pid"`
135
+ }
136
+
137
+ // ReadPIDFile reads a PID from a file. Supports both JSON {"pid":123} and raw integer formats.
138
+ func ReadPIDFile(path string) (int, error) {
139
+ data, err := os.ReadFile(path)
140
+ if err != nil {
141
+ return 0, err
142
+ }
143
+ raw := strings.TrimSpace(string(data))
144
+
145
+ // Try JSON first
146
+ var pj pidJSON
147
+ if json.Unmarshal([]byte(raw), &pj) == nil && pj.PID > 0 {
148
+ return pj.PID, nil
149
+ }
150
+
151
+ // Fallback: raw integer
152
+ pid, err := strconv.Atoi(raw)
153
+ if err != nil {
154
+ return 0, fmt.Errorf("invalid PID file content: %q", raw)
155
+ }
156
+ if pid <= 0 {
157
+ return 0, fmt.Errorf("invalid PID: %d", pid)
158
+ }
159
+ return pid, nil
160
+ }
161
+
162
+ func writePIDFile(path string, pid int) error {
163
+ dir := filepath.Dir(path)
164
+ if err := os.MkdirAll(dir, 0755); err != nil {
165
+ return err
166
+ }
167
+ data, _ := json.Marshal(pidJSON{PID: pid})
168
+ return atomicWrite(path, data)
169
+ }
170
+
171
+ // atomicWrite writes data to a temp file then renames — prevents partial reads.
172
+ func atomicWrite(path string, data []byte) error {
173
+ tmp := fmt.Sprintf("%s.tmp.%d", path, os.Getpid())
174
+ if err := os.WriteFile(tmp, data, 0644); err != nil {
175
+ return err
176
+ }
177
+ return os.Rename(tmp, path)
178
+ }
179
+
180
+ // ListThreadPIDs returns a map of threadId → PID from the pids directory.
181
+ func ListThreadPIDs(pidsDir string) map[string]int {
182
+ result := make(map[string]int)
183
+ entries, err := os.ReadDir(pidsDir)
184
+ if err != nil {
185
+ // directory may not exist yet
186
+ return result
187
+ }
188
+ for _, e := range entries {
189
+ if e.IsDir() || !strings.HasSuffix(e.Name(), ".pid") {
190
+ continue
191
+ }
192
+ threadID := strings.TrimSuffix(e.Name(), ".pid")
193
+ pid, err := ReadPIDFile(filepath.Join(pidsDir, e.Name()))
194
+ if err != nil {
195
+ continue
196
+ }
197
+ result[threadID] = pid
198
+ }
199
+ return result
200
+ }
201
+
202
+ // CleanStalePIDs removes PID files for processes that are no longer running.
203
+ func CleanStalePIDs(pidsDir string, log *Logger) {
204
+ pids := ListThreadPIDs(pidsDir)
205
+ if len(pids) == 0 {
206
+ log.Debug("CleanStalePIDs: no PID files found in %s", pidsDir)
207
+ return
208
+ }
209
+ log.Debug("CleanStalePIDs: checking %d PID files", len(pids))
210
+ cleaned := 0
211
+ for threadID, pid := range pids {
212
+ if !IsProcessAlive(pid) {
213
+ path := filepath.Join(pidsDir, threadID+".pid")
214
+ log.Info("Removing stale PID file for thread %s (PID %d)", threadID, pid)
215
+ _ = os.Remove(path)
216
+ cleaned++
217
+ }
218
+ }
219
+ if cleaned > 0 {
220
+ log.Info("CleanStalePIDs: removed %d stale PID files", cleaned)
221
+ }
222
+ }
@@ -0,0 +1,94 @@
1
+ package main
2
+
3
+ import (
4
+ "os"
5
+ "path/filepath"
6
+ "testing"
7
+ )
8
+
9
+ func TestReadPIDFile_JSON(t *testing.T) {
10
+ dir := t.TempDir()
11
+ path := filepath.Join(dir, "test.pid")
12
+ os.WriteFile(path, []byte(`{"pid":12345}`), 0644)
13
+
14
+ pid, err := ReadPIDFile(path)
15
+ if err != nil {
16
+ t.Fatalf("unexpected error: %v", err)
17
+ }
18
+ if pid != 12345 {
19
+ t.Errorf("got %d, want 12345", pid)
20
+ }
21
+ }
22
+
23
+ func TestReadPIDFile_RawInt(t *testing.T) {
24
+ dir := t.TempDir()
25
+ path := filepath.Join(dir, "test.pid")
26
+ os.WriteFile(path, []byte("54321\n"), 0644)
27
+
28
+ pid, err := ReadPIDFile(path)
29
+ if err != nil {
30
+ t.Fatalf("unexpected error: %v", err)
31
+ }
32
+ if pid != 54321 {
33
+ t.Errorf("got %d, want 54321", pid)
34
+ }
35
+ }
36
+
37
+ func TestReadPIDFile_Invalid(t *testing.T) {
38
+ dir := t.TempDir()
39
+ path := filepath.Join(dir, "test.pid")
40
+ os.WriteFile(path, []byte("not-a-pid"), 0644)
41
+
42
+ _, err := ReadPIDFile(path)
43
+ if err == nil {
44
+ t.Fatal("expected error for invalid PID content")
45
+ }
46
+ }
47
+
48
+ func TestReadPIDFile_Missing(t *testing.T) {
49
+ _, err := ReadPIDFile(filepath.Join(t.TempDir(), "missing.pid"))
50
+ if err == nil {
51
+ t.Fatal("expected error for missing file")
52
+ }
53
+ }
54
+
55
+ func TestAtomicWrite(t *testing.T) {
56
+ dir := t.TempDir()
57
+ path := filepath.Join(dir, "data.txt")
58
+
59
+ if err := atomicWrite(path, []byte("hello")); err != nil {
60
+ t.Fatalf("atomicWrite failed: %v", err)
61
+ }
62
+ data, err := os.ReadFile(path)
63
+ if err != nil {
64
+ t.Fatalf("ReadFile failed: %v", err)
65
+ }
66
+ if string(data) != "hello" {
67
+ t.Errorf("got %q, want %q", string(data), "hello")
68
+ }
69
+ }
70
+
71
+ func TestListThreadPIDs(t *testing.T) {
72
+ dir := t.TempDir()
73
+ os.WriteFile(filepath.Join(dir, "1234.pid"), []byte(`{"pid":100}`), 0644)
74
+ os.WriteFile(filepath.Join(dir, "5678.pid"), []byte("200"), 0644)
75
+ os.WriteFile(filepath.Join(dir, "not-a-pid.txt"), []byte("300"), 0644)
76
+
77
+ result := ListThreadPIDs(dir)
78
+ if len(result) != 2 {
79
+ t.Fatalf("got %d entries, want 2", len(result))
80
+ }
81
+ if result["1234"] != 100 {
82
+ t.Errorf("result[1234] = %d, want 100", result["1234"])
83
+ }
84
+ if result["5678"] != 200 {
85
+ t.Errorf("result[5678] = %d, want 200", result["5678"])
86
+ }
87
+ }
88
+
89
+ func TestListThreadPIDs_MissingDir(t *testing.T) {
90
+ result := ListThreadPIDs(filepath.Join(t.TempDir(), "no-such-dir"))
91
+ if len(result) != 0 {
92
+ t.Errorf("expected empty map for missing directory, got %d entries", len(result))
93
+ }
94
+ }
@@ -0,0 +1,14 @@
1
+ //go:build !windows
2
+
3
+ package main
4
+
5
+ import (
6
+ "os/exec"
7
+ "syscall"
8
+ )
9
+
10
+ func setSysProcAttr(cmd *exec.Cmd) {
11
+ cmd.SysProcAttr = &syscall.SysProcAttr{
12
+ Setsid: true, // detach from parent session
13
+ }
14
+ }