sensorium-mcp 2.17.25 → 2.17.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dashboard/routes/threads.d.ts.map +1 -1
- package/dist/dashboard/routes/threads.js +18 -5
- package/dist/dashboard/routes/threads.js.map +1 -1
- package/dist/data/memory/bootstrap.js +2 -2
- package/dist/data/memory/bootstrap.js.map +1 -1
- package/dist/data/memory/consolidation.d.ts.map +1 -1
- package/dist/data/memory/consolidation.js +75 -4
- package/dist/data/memory/consolidation.js.map +1 -1
- package/dist/data/memory/index.d.ts +1 -0
- package/dist/data/memory/index.d.ts.map +1 -1
- package/dist/data/memory/index.js +1 -0
- package/dist/data/memory/index.js.map +1 -1
- package/dist/data/memory/quality-scoring.d.ts +32 -0
- package/dist/data/memory/quality-scoring.d.ts.map +1 -0
- package/dist/data/memory/quality-scoring.js +182 -0
- package/dist/data/memory/quality-scoring.js.map +1 -0
- package/dist/data/memory/semantic.d.ts +12 -0
- package/dist/data/memory/semantic.d.ts.map +1 -1
- package/dist/data/memory/semantic.js +45 -2
- package/dist/data/memory/semantic.js.map +1 -1
- package/dist/data/memory/thread-registry.d.ts +7 -0
- package/dist/data/memory/thread-registry.d.ts.map +1 -1
- package/dist/data/memory/thread-registry.js +11 -1
- package/dist/data/memory/thread-registry.js.map +1 -1
- package/dist/index.js +17 -5
- package/dist/index.js.map +1 -1
- package/dist/tools/defs/memory-defs.d.ts.map +1 -1
- package/dist/tools/defs/memory-defs.js +19 -0
- package/dist/tools/defs/memory-defs.js.map +1 -1
- package/dist/tools/delegate-tool.d.ts.map +1 -1
- package/dist/tools/delegate-tool.js +6 -3
- package/dist/tools/delegate-tool.js.map +1 -1
- package/dist/tools/memory-tools.d.ts.map +1 -1
- package/dist/tools/memory-tools.js +15 -0
- package/dist/tools/memory-tools.js.map +1 -1
- package/dist/tools/thread-lifecycle.d.ts.map +1 -1
- package/dist/tools/thread-lifecycle.js +52 -32
- package/dist/tools/thread-lifecycle.js.map +1 -1
- package/package.json +10 -2
- package/scripts/install-supervisor.ps1 +67 -0
- package/scripts/install-supervisor.sh +43 -0
- package/scripts/start-supervisor.ps1 +46 -0
- package/scripts/start-supervisor.sh +20 -0
- package/supervisor/config.go +140 -0
- package/supervisor/go.mod +3 -0
- package/supervisor/health.go +390 -0
- package/supervisor/health_test.go +93 -0
- package/supervisor/keeper.go +303 -0
- package/supervisor/keeper_test.go +27 -0
- package/supervisor/lock.go +56 -0
- package/supervisor/lock_test.go +54 -0
- package/supervisor/log.go +114 -0
- package/supervisor/log_test.go +45 -0
- package/supervisor/main.go +325 -0
- package/supervisor/notify.go +53 -0
- package/supervisor/process.go +222 -0
- package/supervisor/process_test.go +94 -0
- package/supervisor/process_unix.go +14 -0
- package/supervisor/process_windows.go +15 -0
- package/supervisor/updater.go +281 -0
- package/templates/coding-task.default.md +12 -0
- package/dist/claude-keeper.d.ts +0 -24
- package/dist/claude-keeper.d.ts.map +0 -1
- package/dist/claude-keeper.js +0 -374
- package/dist/claude-keeper.js.map +0 -1
- package/dist/watcher-service.d.ts +0 -2
- package/dist/watcher-service.d.ts.map +0 -1
- package/dist/watcher-service.js +0 -997
- package/dist/watcher-service.js.map +0 -1
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
package main
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"context"
|
|
5
|
+
"fmt"
|
|
6
|
+
"os"
|
|
7
|
+
"os/signal"
|
|
8
|
+
"sync"
|
|
9
|
+
"syscall"
|
|
10
|
+
"time"
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
// KeeperEntry tracks a running keeper and its settings.
|
|
14
|
+
type KeeperEntry struct {
|
|
15
|
+
keeper *Keeper
|
|
16
|
+
settings KeeperConfig
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
func main() {
|
|
20
|
+
cfg := LoadConfig()
|
|
21
|
+
|
|
22
|
+
if err := os.MkdirAll(cfg.DataDir, 0755); err != nil {
|
|
23
|
+
fmt.Fprintf(os.Stderr, "Cannot create data dir %s: %v\n", cfg.DataDir, err)
|
|
24
|
+
os.Exit(1)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
log := NewLogger(cfg.Paths.WatcherLog)
|
|
28
|
+
defer log.Close()
|
|
29
|
+
|
|
30
|
+
log.Info("sensorium-supervisor starting (mode=%s, port=%d, dataDir=%s)", cfg.Mode, cfg.MCPHttpPort, cfg.DataDir)
|
|
31
|
+
log.Debug("Config: MCPStartCommand=%q, PollInterval=%v, MinUptime=%v, KeeperMaxRetries=%d", cfg.MCPStartCommand, cfg.PollInterval, cfg.MinUptime, cfg.KeeperMaxRetries)
|
|
32
|
+
log.Debug("Config: TelegramToken=%v, HealthFailThresh=%d, StuckThreshold=%v", cfg.TelegramToken != "", cfg.HealthFailThresh, cfg.StuckThreshold)
|
|
33
|
+
|
|
34
|
+
if err := os.MkdirAll(cfg.Paths.PIDsDir, 0755); err != nil {
|
|
35
|
+
log.Warn("Cannot create PIDs dir %s: %v", cfg.Paths.PIDsDir, err)
|
|
36
|
+
}
|
|
37
|
+
if err := os.MkdirAll(cfg.Paths.HeartbeatsDir, 0755); err != nil {
|
|
38
|
+
log.Warn("Cannot create heartbeats dir %s: %v", cfg.Paths.HeartbeatsDir, err)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Acquire lock — prevent multiple instances
|
|
42
|
+
if !AcquireLock(cfg.Paths.WatcherLock, log) {
|
|
43
|
+
os.Exit(1)
|
|
44
|
+
}
|
|
45
|
+
defer ReleaseLock(cfg.Paths.WatcherLock)
|
|
46
|
+
|
|
47
|
+
if cfg.MCPHttpPort <= 0 {
|
|
48
|
+
log.Error("MCP_HTTP_PORT must be set (got %d)", cfg.MCPHttpPort)
|
|
49
|
+
os.Exit(1)
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
mcp := NewMCPClient(cfg.MCPHttpPort, cfg.MCPHttpSecret)
|
|
53
|
+
mcp.Log = log
|
|
54
|
+
|
|
55
|
+
// Clean stale PID files from previous runs
|
|
56
|
+
CleanStalePIDs(cfg.Paths.PIDsDir, log)
|
|
57
|
+
|
|
58
|
+
// Kill orphan process on our port
|
|
59
|
+
KillByPort(cfg.MCPHttpPort, log)
|
|
60
|
+
|
|
61
|
+
// Spawn MCP server
|
|
62
|
+
_, err := SpawnMCPServer(cfg, log)
|
|
63
|
+
if err != nil {
|
|
64
|
+
log.Error("Failed to start MCP server: %v", err)
|
|
65
|
+
os.Exit(1)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Wait for server to be ready
|
|
69
|
+
ctx, rootCancel := context.WithCancel(context.Background())
|
|
70
|
+
defer rootCancel()
|
|
71
|
+
|
|
72
|
+
if mcp.WaitForReady(ctx, 3*time.Second, cfg.KeeperReadyTimeout) {
|
|
73
|
+
log.Info("MCP server is ready")
|
|
74
|
+
} else {
|
|
75
|
+
log.Warn("MCP server did not become ready in %v — proceeding anyway", cfg.KeeperReadyTimeout)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Start keeper management
|
|
79
|
+
var mu sync.Mutex
|
|
80
|
+
keepers := make(map[int]*KeeperEntry)
|
|
81
|
+
|
|
82
|
+
onDeath := func(threadID int, sessionName string) {
|
|
83
|
+
log.Warn("Thread %d ('%s') died", threadID, sessionName)
|
|
84
|
+
NotifyOperator(cfg, log, fmt.Sprintf("💀 <b>%s</b> session died — restarting…", sessionName), threadID)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
syncKeepers := func() {
|
|
88
|
+
if cfg.MCPHttpPort <= 0 {
|
|
89
|
+
log.Debug("syncKeepers: skipped (no port configured)")
|
|
90
|
+
return
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
log.Debug("syncKeepers: fetching keeper settings...")
|
|
94
|
+
settings, err := fetchKeeperSettings(ctx, mcp, log)
|
|
95
|
+
if err != nil {
|
|
96
|
+
log.Warn("Failed to fetch keeper settings: %v", err)
|
|
97
|
+
return
|
|
98
|
+
}
|
|
99
|
+
log.Debug("syncKeepers: got %d keeper configs", len(settings))
|
|
100
|
+
|
|
101
|
+
mu.Lock()
|
|
102
|
+
defer mu.Unlock()
|
|
103
|
+
|
|
104
|
+
// Find keepers to remove (no longer in settings)
|
|
105
|
+
wanted := make(map[int]bool)
|
|
106
|
+
for _, s := range settings {
|
|
107
|
+
wanted[s.ThreadID] = true
|
|
108
|
+
}
|
|
109
|
+
for tid, entry := range keepers {
|
|
110
|
+
if !wanted[tid] {
|
|
111
|
+
log.Info("Stopping keeper for removed thread %d", tid)
|
|
112
|
+
entry.keeper.Stop()
|
|
113
|
+
delete(keepers, tid)
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Start or update keepers
|
|
118
|
+
for _, s := range settings {
|
|
119
|
+
existing, exists := keepers[s.ThreadID]
|
|
120
|
+
if exists && settingsChanged(existing.settings, s) {
|
|
121
|
+
log.Info("Settings changed for thread %d — restarting keeper", s.ThreadID)
|
|
122
|
+
existing.keeper.Stop()
|
|
123
|
+
delete(keepers, s.ThreadID)
|
|
124
|
+
exists = false
|
|
125
|
+
}
|
|
126
|
+
if !exists {
|
|
127
|
+
k := NewKeeper(s, cfg, mcp, log, onDeath)
|
|
128
|
+
k.Start()
|
|
129
|
+
keepers[s.ThreadID] = &KeeperEntry{keeper: k, settings: s}
|
|
130
|
+
log.Info("Started keeper for thread %d ('%s')", s.ThreadID, s.SessionName)
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Initial sync
|
|
136
|
+
log.Info("Running initial keeper sync")
|
|
137
|
+
syncKeepers()
|
|
138
|
+
|
|
139
|
+
// Keeper settings poller (every 2 min)
|
|
140
|
+
keeperPollerDone := make(chan struct{})
|
|
141
|
+
go func() {
|
|
142
|
+
defer close(keeperPollerDone)
|
|
143
|
+
ticker := time.NewTicker(2 * time.Minute)
|
|
144
|
+
defer ticker.Stop()
|
|
145
|
+
for {
|
|
146
|
+
select {
|
|
147
|
+
case <-ctx.Done():
|
|
148
|
+
return
|
|
149
|
+
case <-ticker.C:
|
|
150
|
+
log.Debug("Keeper settings poll triggered")
|
|
151
|
+
syncKeepers()
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}()
|
|
155
|
+
|
|
156
|
+
// Start updater
|
|
157
|
+
log.Info("Starting auto-updater")
|
|
158
|
+
updater := NewUpdater(cfg, mcp, log)
|
|
159
|
+
updater.Start()
|
|
160
|
+
|
|
161
|
+
// Health check loop for the server process itself
|
|
162
|
+
healthDone := make(chan struct{})
|
|
163
|
+
go func() {
|
|
164
|
+
defer close(healthDone)
|
|
165
|
+
consecutiveFails := 0
|
|
166
|
+
ticker := time.NewTicker(60 * time.Second)
|
|
167
|
+
defer ticker.Stop()
|
|
168
|
+
for {
|
|
169
|
+
select {
|
|
170
|
+
case <-ctx.Done():
|
|
171
|
+
return
|
|
172
|
+
case <-ticker.C:
|
|
173
|
+
if mcp.IsServerReady(ctx) {
|
|
174
|
+
if consecutiveFails > 0 {
|
|
175
|
+
log.Info("Server health check recovered (was at %d fails)", consecutiveFails)
|
|
176
|
+
}
|
|
177
|
+
consecutiveFails = 0
|
|
178
|
+
} else {
|
|
179
|
+
consecutiveFails++
|
|
180
|
+
log.Warn("Server health check failed (%d/%d)", consecutiveFails, cfg.HealthFailThresh)
|
|
181
|
+
if consecutiveFails >= cfg.HealthFailThresh {
|
|
182
|
+
log.Error("Server unresponsive after %d consecutive failures — restarting", consecutiveFails)
|
|
183
|
+
NotifyOperator(cfg, log, "⚠️ Supervisor: server process not running — restarting...", 0)
|
|
184
|
+
|
|
185
|
+
// Kill and respawn
|
|
186
|
+
pid, pidErr := ReadPIDFile(cfg.Paths.ServerPID)
|
|
187
|
+
if pidErr != nil {
|
|
188
|
+
log.Warn("Could not read server PID file: %v", pidErr)
|
|
189
|
+
}
|
|
190
|
+
if pid > 0 {
|
|
191
|
+
_ = KillProcess(pid, log)
|
|
192
|
+
}
|
|
193
|
+
KillByPort(cfg.MCPHttpPort, log)
|
|
194
|
+
|
|
195
|
+
if _, err := SpawnMCPServer(cfg, log); err != nil {
|
|
196
|
+
log.Error("Failed to respawn server: %v", err)
|
|
197
|
+
}
|
|
198
|
+
consecutiveFails = 0
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}()
|
|
204
|
+
|
|
205
|
+
// Wait for shutdown signal
|
|
206
|
+
sigCh := make(chan os.Signal, 1)
|
|
207
|
+
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
|
|
208
|
+
|
|
209
|
+
log.Info("All subsystems started — supervisor is running (PID %d)", os.Getpid())
|
|
210
|
+
|
|
211
|
+
sig := <-sigCh
|
|
212
|
+
log.Info("Received %s — shutting down", sig)
|
|
213
|
+
rootCancel()
|
|
214
|
+
|
|
215
|
+
// Stop keepers (with 10s timeout)
|
|
216
|
+
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
217
|
+
defer shutdownCancel()
|
|
218
|
+
|
|
219
|
+
mu.Lock()
|
|
220
|
+
var wg sync.WaitGroup
|
|
221
|
+
for _, entry := range keepers {
|
|
222
|
+
wg.Add(1)
|
|
223
|
+
go func(k *Keeper) {
|
|
224
|
+
defer wg.Done()
|
|
225
|
+
k.Stop()
|
|
226
|
+
}(entry.keeper)
|
|
227
|
+
}
|
|
228
|
+
mu.Unlock()
|
|
229
|
+
|
|
230
|
+
doneCh := make(chan struct{})
|
|
231
|
+
go func() { wg.Wait(); close(doneCh) }()
|
|
232
|
+
select {
|
|
233
|
+
case <-doneCh:
|
|
234
|
+
log.Info("All keepers stopped")
|
|
235
|
+
case <-shutdownCtx.Done():
|
|
236
|
+
log.Warn("Keeper shutdown timed out after 10s")
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Stop updater
|
|
240
|
+
updater.Stop()
|
|
241
|
+
|
|
242
|
+
// Wait for background goroutines
|
|
243
|
+
<-keeperPollerDone
|
|
244
|
+
<-healthDone
|
|
245
|
+
|
|
246
|
+
// Kill server process
|
|
247
|
+
pid, err := ReadPIDFile(cfg.Paths.ServerPID)
|
|
248
|
+
if err == nil && pid > 0 {
|
|
249
|
+
log.Info("Stopping MCP server (PID %d)", pid)
|
|
250
|
+
_ = KillProcess(pid, log)
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
log.Info("Supervisor stopped cleanly")
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// fetchKeeperSettings reads the root threads from the MCP server,
|
|
257
|
+
// filtering for those with keepAlive=true.
|
|
258
|
+
func fetchKeeperSettings(ctx context.Context, mcp *MCPClient, log *Logger) ([]KeeperConfig, error) {
|
|
259
|
+
roots, err := mcp.GetRootThreads(ctx)
|
|
260
|
+
if err != nil {
|
|
261
|
+
return nil, err
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
var result []KeeperConfig
|
|
265
|
+
for _, r := range roots {
|
|
266
|
+
keepAlive, _ := r["keepAlive"].(bool)
|
|
267
|
+
if !keepAlive {
|
|
268
|
+
continue
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Skip non-active roots (archived, expired, exited)
|
|
272
|
+
if status, _ := r["status"].(string); status != "" && status != "active" {
|
|
273
|
+
continue
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
tidFloat, _ := r["threadId"].(float64) // JSON numbers decode as float64
|
|
277
|
+
tid := int(tidFloat)
|
|
278
|
+
if tid <= 0 {
|
|
279
|
+
continue
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
client := "claude"
|
|
283
|
+
if c, ok := r["client"].(string); ok && c != "" {
|
|
284
|
+
client = c
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
sessionName := ""
|
|
288
|
+
if n, ok := r["name"].(string); ok {
|
|
289
|
+
sessionName = n
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
maxRetries := 5
|
|
293
|
+
if mr, ok := r["maxRetries"].(float64); ok {
|
|
294
|
+
maxRetries = int(mr)
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
cooldownMs := 300_000
|
|
298
|
+
if cd, ok := r["cooldownMs"].(float64); ok {
|
|
299
|
+
cooldownMs = int(cd)
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
workDir := ""
|
|
303
|
+
if wd, ok := r["workingDirectory"].(string); ok {
|
|
304
|
+
workDir = wd
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
result = append(result, KeeperConfig{
|
|
308
|
+
ThreadID: tid,
|
|
309
|
+
SessionName: sessionName,
|
|
310
|
+
Client: client,
|
|
311
|
+
WorkingDirectory: workDir,
|
|
312
|
+
MaxRetries: maxRetries,
|
|
313
|
+
CooldownMs: cooldownMs,
|
|
314
|
+
})
|
|
315
|
+
}
|
|
316
|
+
return result, nil
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
func settingsChanged(a, b KeeperConfig) bool {
|
|
320
|
+
return a.MaxRetries != b.MaxRetries ||
|
|
321
|
+
a.CooldownMs != b.CooldownMs ||
|
|
322
|
+
a.Client != b.Client ||
|
|
323
|
+
a.SessionName != b.SessionName ||
|
|
324
|
+
a.WorkingDirectory != b.WorkingDirectory
|
|
325
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
package main
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"context"
|
|
5
|
+
"fmt"
|
|
6
|
+
"net/http"
|
|
7
|
+
"net/url"
|
|
8
|
+
"strings"
|
|
9
|
+
"time"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
// NotifyOperator sends a message via Telegram to the operator.
|
|
13
|
+
// Silently fails if credentials are not configured.
|
|
14
|
+
func NotifyOperator(cfg Config, log *Logger, text string, threadID int) {
|
|
15
|
+
if cfg.TelegramToken == "" || cfg.TelegramChatID == "" {
|
|
16
|
+
log.Debug("NotifyOperator: skipped (no Telegram credentials)")
|
|
17
|
+
return
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
log.Debug("NotifyOperator: sending to chat %s (threadID=%d)", cfg.TelegramChatID, threadID)
|
|
21
|
+
|
|
22
|
+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
|
23
|
+
defer cancel()
|
|
24
|
+
|
|
25
|
+
apiURL := fmt.Sprintf("https://api.telegram.org/bot%s/sendMessage", cfg.TelegramToken)
|
|
26
|
+
|
|
27
|
+
form := url.Values{}
|
|
28
|
+
form.Set("chat_id", cfg.TelegramChatID)
|
|
29
|
+
form.Set("text", text)
|
|
30
|
+
form.Set("parse_mode", "HTML")
|
|
31
|
+
if threadID > 0 {
|
|
32
|
+
form.Set("message_thread_id", fmt.Sprintf("%d", threadID))
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
req, err := http.NewRequestWithContext(ctx, http.MethodPost, apiURL, strings.NewReader(form.Encode()))
|
|
36
|
+
if err != nil {
|
|
37
|
+
log.Warn("Telegram notify: %v", err)
|
|
38
|
+
return
|
|
39
|
+
}
|
|
40
|
+
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
|
41
|
+
|
|
42
|
+
resp, err := http.DefaultClient.Do(req)
|
|
43
|
+
if err != nil {
|
|
44
|
+
log.Warn("Telegram notify: %v", err)
|
|
45
|
+
return
|
|
46
|
+
}
|
|
47
|
+
defer resp.Body.Close()
|
|
48
|
+
if resp.StatusCode >= 400 {
|
|
49
|
+
log.Warn("Telegram notify: HTTP %d", resp.StatusCode)
|
|
50
|
+
} else {
|
|
51
|
+
log.Debug("Telegram notify: sent OK (HTTP %d)", resp.StatusCode)
|
|
52
|
+
}
|
|
53
|
+
}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
package main
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"encoding/json"
|
|
5
|
+
"errors"
|
|
6
|
+
"fmt"
|
|
7
|
+
"os"
|
|
8
|
+
"os/exec"
|
|
9
|
+
"path/filepath"
|
|
10
|
+
"runtime"
|
|
11
|
+
"strconv"
|
|
12
|
+
"strings"
|
|
13
|
+
"time"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
// SpawnMCPServer starts the MCP server as a detached child process.
|
|
17
|
+
// Returns the PID of the spawned process.
|
|
18
|
+
func SpawnMCPServer(cfg Config, log *Logger) (int, error) {
|
|
19
|
+
parts := strings.Fields(cfg.MCPStartCommand)
|
|
20
|
+
if len(parts) == 0 {
|
|
21
|
+
return 0, errors.New("empty MCP_START_COMMAND")
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
cmd := exec.Command(parts[0], parts[1:]...)
|
|
25
|
+
cmd.Env = os.Environ()
|
|
26
|
+
cmd.Stdin = nil
|
|
27
|
+
cmd.Stdout = nil
|
|
28
|
+
cmd.Stderr = nil
|
|
29
|
+
setSysProcAttr(cmd)
|
|
30
|
+
|
|
31
|
+
log.Info("Starting MCP server: %s", cfg.MCPStartCommand)
|
|
32
|
+
|
|
33
|
+
if err := cmd.Start(); err != nil {
|
|
34
|
+
return 0, fmt.Errorf("spawn MCP server: %w", err)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
pid := cmd.Process.Pid
|
|
38
|
+
log.Info("MCP server started with PID %d", pid)
|
|
39
|
+
|
|
40
|
+
// Don't wait — detached process
|
|
41
|
+
go func() { _ = cmd.Wait() }()
|
|
42
|
+
|
|
43
|
+
if err := writePIDFile(cfg.Paths.ServerPID, pid); err != nil {
|
|
44
|
+
log.Warn("Failed to write server PID file: %v", err)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return pid, nil
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// KillProcess kills a process by PID. On Windows, uses taskkill /F /T for tree kill.
|
|
51
|
+
func KillProcess(pid int, log *Logger) error {
|
|
52
|
+
if !IsProcessAlive(pid) {
|
|
53
|
+
log.Debug("KillProcess: PID %d already dead", pid)
|
|
54
|
+
return nil
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
log.Debug("KillProcess: killing PID %d", pid)
|
|
58
|
+
|
|
59
|
+
if runtime.GOOS == "windows" {
|
|
60
|
+
// taskkill /F /T kills the tree
|
|
61
|
+
out, err := exec.Command("taskkill", "/F", "/T", "/PID", strconv.Itoa(pid)).CombinedOutput()
|
|
62
|
+
if err != nil {
|
|
63
|
+
return fmt.Errorf("taskkill PID %d: %w (%s)", pid, err, strings.TrimSpace(string(out)))
|
|
64
|
+
}
|
|
65
|
+
log.Info("Killed process tree PID %d", pid)
|
|
66
|
+
return nil
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Unix: SIGTERM, wait 2s, then SIGKILL
|
|
70
|
+
proc, err := os.FindProcess(pid)
|
|
71
|
+
if err != nil {
|
|
72
|
+
log.Debug("KillProcess: FindProcess(%d) failed: %v", pid, err)
|
|
73
|
+
return err
|
|
74
|
+
}
|
|
75
|
+
if err := proc.Signal(os.Interrupt); err != nil {
|
|
76
|
+
// Already dead
|
|
77
|
+
return nil
|
|
78
|
+
}
|
|
79
|
+
time.Sleep(2 * time.Second)
|
|
80
|
+
if IsProcessAlive(pid) {
|
|
81
|
+
_ = proc.Kill()
|
|
82
|
+
log.Info("Force-killed PID %d", pid)
|
|
83
|
+
} else {
|
|
84
|
+
log.Info("Process PID %d terminated gracefully", pid)
|
|
85
|
+
}
|
|
86
|
+
return nil
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// KillByPort finds a process listening on the given port and kills it (Windows-only orphan cleanup).
|
|
90
|
+
func KillByPort(port int, log *Logger) {
|
|
91
|
+
if runtime.GOOS != "windows" || port <= 0 || port > 65535 {
|
|
92
|
+
return
|
|
93
|
+
}
|
|
94
|
+
log.Debug("KillByPort: checking for processes on port %d", port)
|
|
95
|
+
out, err := exec.Command("cmd", "/c", fmt.Sprintf("netstat -aon | findstr \":%d.*LISTENING\"", port)).CombinedOutput()
|
|
96
|
+
if err != nil {
|
|
97
|
+
log.Debug("KillByPort: no listeners on port %d", port)
|
|
98
|
+
return
|
|
99
|
+
}
|
|
100
|
+
for _, line := range strings.Split(string(out), "\n") {
|
|
101
|
+
fields := strings.Fields(strings.TrimSpace(line))
|
|
102
|
+
if len(fields) >= 5 {
|
|
103
|
+
pid, err := strconv.Atoi(fields[len(fields)-1])
|
|
104
|
+
if err == nil && pid > 0 {
|
|
105
|
+
log.Info("Found orphan PID %d on port %d — killing", pid, port)
|
|
106
|
+
_ = KillProcess(pid, log)
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// IsProcessAlive checks whether a process with the given PID exists.
|
|
113
|
+
func IsProcessAlive(pid int) bool {
|
|
114
|
+
if pid <= 0 {
|
|
115
|
+
return false
|
|
116
|
+
}
|
|
117
|
+
if runtime.GOOS == "windows" {
|
|
118
|
+
out, err := exec.Command("tasklist", "/FI", fmt.Sprintf("PID eq %d", pid), "/NH").CombinedOutput()
|
|
119
|
+
if err != nil {
|
|
120
|
+
return false
|
|
121
|
+
}
|
|
122
|
+
return strings.Contains(string(out), strconv.Itoa(pid))
|
|
123
|
+
}
|
|
124
|
+
proc, err := os.FindProcess(pid)
|
|
125
|
+
if err != nil {
|
|
126
|
+
return false
|
|
127
|
+
}
|
|
128
|
+
return proc.Signal(nil) == nil // signal 0 = existence check on Unix
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// --- PID File Helpers ---
|
|
132
|
+
|
|
133
|
+
type pidJSON struct {
|
|
134
|
+
PID int `json:"pid"`
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// ReadPIDFile reads a PID from a file. Supports both JSON {"pid":123} and raw integer formats.
|
|
138
|
+
func ReadPIDFile(path string) (int, error) {
|
|
139
|
+
data, err := os.ReadFile(path)
|
|
140
|
+
if err != nil {
|
|
141
|
+
return 0, err
|
|
142
|
+
}
|
|
143
|
+
raw := strings.TrimSpace(string(data))
|
|
144
|
+
|
|
145
|
+
// Try JSON first
|
|
146
|
+
var pj pidJSON
|
|
147
|
+
if json.Unmarshal([]byte(raw), &pj) == nil && pj.PID > 0 {
|
|
148
|
+
return pj.PID, nil
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Fallback: raw integer
|
|
152
|
+
pid, err := strconv.Atoi(raw)
|
|
153
|
+
if err != nil {
|
|
154
|
+
return 0, fmt.Errorf("invalid PID file content: %q", raw)
|
|
155
|
+
}
|
|
156
|
+
if pid <= 0 {
|
|
157
|
+
return 0, fmt.Errorf("invalid PID: %d", pid)
|
|
158
|
+
}
|
|
159
|
+
return pid, nil
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
func writePIDFile(path string, pid int) error {
|
|
163
|
+
dir := filepath.Dir(path)
|
|
164
|
+
if err := os.MkdirAll(dir, 0755); err != nil {
|
|
165
|
+
return err
|
|
166
|
+
}
|
|
167
|
+
data, _ := json.Marshal(pidJSON{PID: pid})
|
|
168
|
+
return atomicWrite(path, data)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// atomicWrite writes data to a temp file then renames — prevents partial reads.
|
|
172
|
+
func atomicWrite(path string, data []byte) error {
|
|
173
|
+
tmp := fmt.Sprintf("%s.tmp.%d", path, os.Getpid())
|
|
174
|
+
if err := os.WriteFile(tmp, data, 0644); err != nil {
|
|
175
|
+
return err
|
|
176
|
+
}
|
|
177
|
+
return os.Rename(tmp, path)
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// ListThreadPIDs returns a map of threadId → PID from the pids directory.
|
|
181
|
+
func ListThreadPIDs(pidsDir string) map[string]int {
|
|
182
|
+
result := make(map[string]int)
|
|
183
|
+
entries, err := os.ReadDir(pidsDir)
|
|
184
|
+
if err != nil {
|
|
185
|
+
// directory may not exist yet
|
|
186
|
+
return result
|
|
187
|
+
}
|
|
188
|
+
for _, e := range entries {
|
|
189
|
+
if e.IsDir() || !strings.HasSuffix(e.Name(), ".pid") {
|
|
190
|
+
continue
|
|
191
|
+
}
|
|
192
|
+
threadID := strings.TrimSuffix(e.Name(), ".pid")
|
|
193
|
+
pid, err := ReadPIDFile(filepath.Join(pidsDir, e.Name()))
|
|
194
|
+
if err != nil {
|
|
195
|
+
continue
|
|
196
|
+
}
|
|
197
|
+
result[threadID] = pid
|
|
198
|
+
}
|
|
199
|
+
return result
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// CleanStalePIDs removes PID files for processes that are no longer running.
|
|
203
|
+
func CleanStalePIDs(pidsDir string, log *Logger) {
|
|
204
|
+
pids := ListThreadPIDs(pidsDir)
|
|
205
|
+
if len(pids) == 0 {
|
|
206
|
+
log.Debug("CleanStalePIDs: no PID files found in %s", pidsDir)
|
|
207
|
+
return
|
|
208
|
+
}
|
|
209
|
+
log.Debug("CleanStalePIDs: checking %d PID files", len(pids))
|
|
210
|
+
cleaned := 0
|
|
211
|
+
for threadID, pid := range pids {
|
|
212
|
+
if !IsProcessAlive(pid) {
|
|
213
|
+
path := filepath.Join(pidsDir, threadID+".pid")
|
|
214
|
+
log.Info("Removing stale PID file for thread %s (PID %d)", threadID, pid)
|
|
215
|
+
_ = os.Remove(path)
|
|
216
|
+
cleaned++
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
if cleaned > 0 {
|
|
220
|
+
log.Info("CleanStalePIDs: removed %d stale PID files", cleaned)
|
|
221
|
+
}
|
|
222
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
package main
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"os"
|
|
5
|
+
"path/filepath"
|
|
6
|
+
"testing"
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
func TestReadPIDFile_JSON(t *testing.T) {
|
|
10
|
+
dir := t.TempDir()
|
|
11
|
+
path := filepath.Join(dir, "test.pid")
|
|
12
|
+
os.WriteFile(path, []byte(`{"pid":12345}`), 0644)
|
|
13
|
+
|
|
14
|
+
pid, err := ReadPIDFile(path)
|
|
15
|
+
if err != nil {
|
|
16
|
+
t.Fatalf("unexpected error: %v", err)
|
|
17
|
+
}
|
|
18
|
+
if pid != 12345 {
|
|
19
|
+
t.Errorf("got %d, want 12345", pid)
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
func TestReadPIDFile_RawInt(t *testing.T) {
|
|
24
|
+
dir := t.TempDir()
|
|
25
|
+
path := filepath.Join(dir, "test.pid")
|
|
26
|
+
os.WriteFile(path, []byte("54321\n"), 0644)
|
|
27
|
+
|
|
28
|
+
pid, err := ReadPIDFile(path)
|
|
29
|
+
if err != nil {
|
|
30
|
+
t.Fatalf("unexpected error: %v", err)
|
|
31
|
+
}
|
|
32
|
+
if pid != 54321 {
|
|
33
|
+
t.Errorf("got %d, want 54321", pid)
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
func TestReadPIDFile_Invalid(t *testing.T) {
|
|
38
|
+
dir := t.TempDir()
|
|
39
|
+
path := filepath.Join(dir, "test.pid")
|
|
40
|
+
os.WriteFile(path, []byte("not-a-pid"), 0644)
|
|
41
|
+
|
|
42
|
+
_, err := ReadPIDFile(path)
|
|
43
|
+
if err == nil {
|
|
44
|
+
t.Fatal("expected error for invalid PID content")
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
func TestReadPIDFile_Missing(t *testing.T) {
|
|
49
|
+
_, err := ReadPIDFile(filepath.Join(t.TempDir(), "missing.pid"))
|
|
50
|
+
if err == nil {
|
|
51
|
+
t.Fatal("expected error for missing file")
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
func TestAtomicWrite(t *testing.T) {
|
|
56
|
+
dir := t.TempDir()
|
|
57
|
+
path := filepath.Join(dir, "data.txt")
|
|
58
|
+
|
|
59
|
+
if err := atomicWrite(path, []byte("hello")); err != nil {
|
|
60
|
+
t.Fatalf("atomicWrite failed: %v", err)
|
|
61
|
+
}
|
|
62
|
+
data, err := os.ReadFile(path)
|
|
63
|
+
if err != nil {
|
|
64
|
+
t.Fatalf("ReadFile failed: %v", err)
|
|
65
|
+
}
|
|
66
|
+
if string(data) != "hello" {
|
|
67
|
+
t.Errorf("got %q, want %q", string(data), "hello")
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
func TestListThreadPIDs(t *testing.T) {
|
|
72
|
+
dir := t.TempDir()
|
|
73
|
+
os.WriteFile(filepath.Join(dir, "1234.pid"), []byte(`{"pid":100}`), 0644)
|
|
74
|
+
os.WriteFile(filepath.Join(dir, "5678.pid"), []byte("200"), 0644)
|
|
75
|
+
os.WriteFile(filepath.Join(dir, "not-a-pid.txt"), []byte("300"), 0644)
|
|
76
|
+
|
|
77
|
+
result := ListThreadPIDs(dir)
|
|
78
|
+
if len(result) != 2 {
|
|
79
|
+
t.Fatalf("got %d entries, want 2", len(result))
|
|
80
|
+
}
|
|
81
|
+
if result["1234"] != 100 {
|
|
82
|
+
t.Errorf("result[1234] = %d, want 100", result["1234"])
|
|
83
|
+
}
|
|
84
|
+
if result["5678"] != 200 {
|
|
85
|
+
t.Errorf("result[5678] = %d, want 200", result["5678"])
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
func TestListThreadPIDs_MissingDir(t *testing.T) {
|
|
90
|
+
result := ListThreadPIDs(filepath.Join(t.TempDir(), "no-such-dir"))
|
|
91
|
+
if len(result) != 0 {
|
|
92
|
+
t.Errorf("expected empty map for missing directory, got %d entries", len(result))
|
|
93
|
+
}
|
|
94
|
+
}
|