sensorium-mcp 3.0.4 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Install-Sensorium.ps1 +102 -209
- package/dist/dashboard/routes/data.d.ts.map +1 -1
- package/dist/dashboard/routes/data.js +2 -1
- package/dist/dashboard/routes/data.js.map +1 -1
- package/dist/dashboard/routes/threads.js +1 -1
- package/dist/dashboard/routes/threads.js.map +1 -1
- package/dist/dashboard/routes.d.ts.map +1 -1
- package/dist/dashboard/routes.js +1 -3
- package/dist/dashboard/routes.js.map +1 -1
- package/dist/data/memory/migration-runner.d.ts +1 -1
- package/dist/data/memory/migration-runner.d.ts.map +1 -1
- package/dist/data/memory/migration-runner.js +59 -3
- package/dist/data/memory/migration-runner.js.map +1 -1
- package/dist/data/memory/narrative.d.ts.map +1 -1
- package/dist/data/memory/narrative.js +43 -6
- package/dist/data/memory/narrative.js.map +1 -1
- package/dist/data/memory/reflection.d.ts +24 -0
- package/dist/data/memory/reflection.d.ts.map +1 -1
- package/dist/data/memory/reflection.js +65 -1
- package/dist/data/memory/reflection.js.map +1 -1
- package/dist/data/memory/schema-ddl.d.ts +1 -1
- package/dist/data/memory/schema-ddl.d.ts.map +1 -1
- package/dist/data/memory/schema-ddl.js +2 -1
- package/dist/data/memory/schema-ddl.js.map +1 -1
- package/dist/data/memory/thread-registry.js +1 -1
- package/dist/data/memory/thread-registry.js.map +1 -1
- package/dist/http-server.d.ts.map +1 -1
- package/dist/http-server.js +1 -9
- package/dist/http-server.js.map +1 -1
- package/dist/index.js +3 -6
- package/dist/index.js.map +1 -1
- package/dist/server/factory.js +1 -1
- package/dist/server/factory.js.map +1 -1
- package/dist/services/agent-spawn.service.d.ts +7 -1
- package/dist/services/agent-spawn.service.d.ts.map +1 -1
- package/dist/services/agent-spawn.service.js +69 -45
- package/dist/services/agent-spawn.service.js.map +1 -1
- package/dist/services/consolidation.service.d.ts.map +1 -1
- package/dist/services/consolidation.service.js +88 -35
- package/dist/services/consolidation.service.js.map +1 -1
- package/dist/services/keeper.service.d.ts +21 -0
- package/dist/services/keeper.service.d.ts.map +1 -0
- package/dist/services/keeper.service.js +195 -0
- package/dist/services/keeper.service.js.map +1 -0
- package/dist/services/maintenance-signal.d.ts +2 -0
- package/dist/services/maintenance-signal.d.ts.map +1 -1
- package/dist/services/maintenance-signal.js +7 -1
- package/dist/services/maintenance-signal.js.map +1 -1
- package/dist/services/memory-briefing.service.d.ts.map +1 -1
- package/dist/services/memory-briefing.service.js +17 -1
- package/dist/services/memory-briefing.service.js.map +1 -1
- package/dist/services/process.service.d.ts +19 -2
- package/dist/services/process.service.d.ts.map +1 -1
- package/dist/services/process.service.js +104 -10
- package/dist/services/process.service.js.map +1 -1
- package/dist/services/thread-lifecycle.service.d.ts +5 -0
- package/dist/services/thread-lifecycle.service.d.ts.map +1 -1
- package/dist/services/thread-lifecycle.service.js +33 -8
- package/dist/services/thread-lifecycle.service.js.map +1 -1
- package/dist/services/worker-cleanup.service.d.ts +14 -1
- package/dist/services/worker-cleanup.service.d.ts.map +1 -1
- package/dist/services/worker-cleanup.service.js +36 -38
- package/dist/services/worker-cleanup.service.js.map +1 -1
- package/dist/sessions.d.ts +0 -5
- package/dist/sessions.d.ts.map +1 -1
- package/dist/sessions.js +0 -7
- package/dist/sessions.js.map +1 -1
- package/dist/stdio-server.d.ts.map +1 -1
- package/dist/stdio-server.js +1 -7
- package/dist/stdio-server.js.map +1 -1
- package/dist/tools/delegate-tool.d.ts.map +1 -1
- package/dist/tools/delegate-tool.js +2 -2
- package/dist/tools/delegate-tool.js.map +1 -1
- package/dist/tools/session-tools.js +1 -1
- package/dist/tools/session-tools.js.map +1 -1
- package/dist/tools/start-session-tool.d.ts.map +1 -1
- package/dist/tools/start-session-tool.js +8 -9
- package/dist/tools/start-session-tool.js.map +1 -1
- package/dist/tools/wait/message-processing.d.ts.map +1 -1
- package/dist/tools/wait/message-processing.js +28 -0
- package/dist/tools/wait/message-processing.js.map +1 -1
- package/dist/tools/wait/poll-loop.js +1 -1
- package/dist/tools/wait/poll-loop.js.map +1 -1
- package/package.json +1 -1
- package/dist/tools/thread-lifecycle.d.ts +0 -6
- package/dist/tools/thread-lifecycle.d.ts.map +0 -1
- package/dist/tools/thread-lifecycle.js +0 -6
- package/dist/tools/thread-lifecycle.js.map +0 -1
- package/supervisor/config.go +0 -253
- package/supervisor/config_test.go +0 -78
- package/supervisor/go.mod +0 -15
- package/supervisor/go.sum +0 -20
- package/supervisor/health.go +0 -433
- package/supervisor/health_test.go +0 -93
- package/supervisor/keeper.go +0 -309
- package/supervisor/keeper_test.go +0 -27
- package/supervisor/lock.go +0 -57
- package/supervisor/lock_test.go +0 -54
- package/supervisor/log.go +0 -195
- package/supervisor/log_test.go +0 -125
- package/supervisor/main.go +0 -475
- package/supervisor/main_test.go +0 -130
- package/supervisor/notify.go +0 -53
- package/supervisor/process.go +0 -294
- package/supervisor/process_test.go +0 -108
- package/supervisor/process_unix.go +0 -14
- package/supervisor/process_windows.go +0 -15
- package/supervisor/secrets.go +0 -95
- package/supervisor/secrets_securevault_test.go +0 -98
- package/supervisor/secrets_test.go +0 -119
- package/supervisor/self_update.go +0 -282
- package/supervisor/self_update_test.go +0 -177
- package/supervisor/service_restart_stub.go +0 -9
- package/supervisor/service_restart_windows.go +0 -63
- package/supervisor/service_stub.go +0 -15
- package/supervisor/service_windows.go +0 -194
- package/supervisor/update_state.go +0 -264
- package/supervisor/update_state_test.go +0 -306
- package/supervisor/updater.go +0 -613
- package/supervisor/updater_test.go +0 -64
package/supervisor/keeper.go
DELETED
|
@@ -1,309 +0,0 @@
|
|
|
1
|
-
package main
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"context"
|
|
5
|
-
"encoding/json"
|
|
6
|
-
"fmt"
|
|
7
|
-
"math"
|
|
8
|
-
"path/filepath"
|
|
9
|
-
"sync"
|
|
10
|
-
"time"
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
// KeeperConfig describes a thread that must be kept alive.
|
|
14
|
-
type KeeperConfig struct {
|
|
15
|
-
ThreadID int
|
|
16
|
-
SessionName string
|
|
17
|
-
Client string // e.g. "claude-code", "codex"
|
|
18
|
-
WorkingDirectory string
|
|
19
|
-
MaxRetries int
|
|
20
|
-
CooldownMs int
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
// Keeper supervises a single thread, restarting it via the MCP server's
|
|
24
|
-
// start_thread tool when it stops running. One goroutine per keeper.
|
|
25
|
-
type Keeper struct {
|
|
26
|
-
cfg KeeperConfig
|
|
27
|
-
global Config
|
|
28
|
-
mcp *MCPClient
|
|
29
|
-
log *Logger
|
|
30
|
-
onDeath func(threadID int, sessionName string)
|
|
31
|
-
|
|
32
|
-
mu sync.Mutex
|
|
33
|
-
stopped bool
|
|
34
|
-
cancel context.CancelFunc
|
|
35
|
-
done chan struct{}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
func NewKeeper(cfg KeeperConfig, global Config, mcp *MCPClient, log *Logger, onDeath func(int, string)) *Keeper {
|
|
39
|
-
maxRetries := cfg.MaxRetries
|
|
40
|
-
if maxRetries <= 0 {
|
|
41
|
-
maxRetries = global.KeeperMaxRetries
|
|
42
|
-
}
|
|
43
|
-
cfg.MaxRetries = maxRetries
|
|
44
|
-
return &Keeper{
|
|
45
|
-
cfg: cfg,
|
|
46
|
-
global: global,
|
|
47
|
-
mcp: mcp,
|
|
48
|
-
log: log,
|
|
49
|
-
onDeath: onDeath,
|
|
50
|
-
done: make(chan struct{}),
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
// Start begins the keeper loop in a separate goroutine.
|
|
55
|
-
func (k *Keeper) Start() {
|
|
56
|
-
ctx, cancel := context.WithCancel(context.Background())
|
|
57
|
-
k.mu.Lock()
|
|
58
|
-
k.cancel = cancel
|
|
59
|
-
k.mu.Unlock()
|
|
60
|
-
|
|
61
|
-
go k.run(ctx)
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
// Stop signals the keeper to shut down and waits for it to finish.
|
|
65
|
-
func (k *Keeper) Stop() {
|
|
66
|
-
k.mu.Lock()
|
|
67
|
-
k.stopped = true
|
|
68
|
-
if k.cancel != nil {
|
|
69
|
-
k.cancel()
|
|
70
|
-
}
|
|
71
|
-
k.mu.Unlock()
|
|
72
|
-
<-k.done
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
func (k *Keeper) isStopped() bool {
|
|
76
|
-
k.mu.Lock()
|
|
77
|
-
defer k.mu.Unlock()
|
|
78
|
-
return k.stopped
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
func (k *Keeper) run(ctx context.Context) {
|
|
82
|
-
defer close(k.done)
|
|
83
|
-
defer func() {
|
|
84
|
-
if r := recover(); r != nil {
|
|
85
|
-
k.log.Error("Keeper panicked for thread %d: %v", k.cfg.ThreadID, r)
|
|
86
|
-
}
|
|
87
|
-
}()
|
|
88
|
-
defer k.log.Info("Keeper stopped for thread %d", k.cfg.ThreadID)
|
|
89
|
-
|
|
90
|
-
k.log.Info("Keeper started for thread %d ('%s') [client=%s]", k.cfg.ThreadID, k.cfg.SessionName, k.cfg.Client)
|
|
91
|
-
|
|
92
|
-
// Wait for MCP server to be ready
|
|
93
|
-
ready := k.mcp.WaitForReady(ctx, k.global.KeeperReadyPollInterval, k.global.KeeperReadyTimeout)
|
|
94
|
-
if !ready && !k.isStopped() {
|
|
95
|
-
k.log.Warn("MCP server not ready after %v — attempting start_thread anyway", k.global.KeeperReadyTimeout)
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
retryCount := 0
|
|
99
|
-
fastExitCount := 0
|
|
100
|
-
fastExitEscalation := 0
|
|
101
|
-
var lastStartTime time.Time
|
|
102
|
-
activeThreadID := k.cfg.ThreadID // may differ from root after start_thread
|
|
103
|
-
|
|
104
|
-
checkAndStart := func() {
|
|
105
|
-
if k.isStopped() {
|
|
106
|
-
return
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// Check if thread is running — use activeThreadID (worker) not root
|
|
110
|
-
running := k.mcp.IsThreadRunning(ctx, activeThreadID)
|
|
111
|
-
if running {
|
|
112
|
-
// Check if stuck
|
|
113
|
-
if k.mcp.IsThreadStuck(ctx, activeThreadID, k.global.StuckThreshold) {
|
|
114
|
-
k.log.Warn("Thread %d (worker %d) is stuck (no heartbeat for %v) — restarting", k.cfg.ThreadID, activeThreadID, k.global.StuckThreshold)
|
|
115
|
-
// Kill via MCP API, then fall through to restart
|
|
116
|
-
k.killThread(ctx, activeThreadID)
|
|
117
|
-
retryCount = 0
|
|
118
|
-
activeThreadID = k.cfg.ThreadID // reset — will get new worker ID on restart
|
|
119
|
-
} else {
|
|
120
|
-
// Healthy — reset counters
|
|
121
|
-
if retryCount > 0 {
|
|
122
|
-
k.log.Info("Thread %d is healthy again (was at retry %d)", k.cfg.ThreadID, retryCount)
|
|
123
|
-
} else {
|
|
124
|
-
k.log.Info("Thread %d is healthy", k.cfg.ThreadID)
|
|
125
|
-
}
|
|
126
|
-
retryCount = 0
|
|
127
|
-
fastExitCount = 0
|
|
128
|
-
fastExitEscalation = 0
|
|
129
|
-
return
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// Thread is not running (or was stuck and killed).
|
|
134
|
-
// Detect fast-exit: if the previous successful start was recent, the thread exited too quickly.
|
|
135
|
-
if !lastStartTime.IsZero() && time.Since(lastStartTime) < k.global.FastExitThreshold {
|
|
136
|
-
fastExitCount++
|
|
137
|
-
if fastExitCount >= k.global.FastExitMaxCount {
|
|
138
|
-
cooldown := time.Duration(float64(k.global.FastExitBaseCooldown) * math.Pow(2, float64(fastExitEscalation)))
|
|
139
|
-
if cooldown > k.global.FastExitMaxCooldown {
|
|
140
|
-
cooldown = k.global.FastExitMaxCooldown
|
|
141
|
-
}
|
|
142
|
-
k.log.Warn("Thread %d: %d consecutive fast exits — backing off %v", k.cfg.ThreadID, fastExitCount, cooldown)
|
|
143
|
-
if k.onDeath != nil {
|
|
144
|
-
k.onDeath(k.cfg.ThreadID, k.cfg.SessionName+" (repeated fast exits — check credits/API key)")
|
|
145
|
-
}
|
|
146
|
-
fastExitEscalation++
|
|
147
|
-
k.sleep(ctx, cooldown)
|
|
148
|
-
fastExitCount = 0
|
|
149
|
-
retryCount = 0
|
|
150
|
-
return
|
|
151
|
-
}
|
|
152
|
-
} else if !lastStartTime.IsZero() {
|
|
153
|
-
// Previous start was long ago — not a fast exit pattern, reset counters
|
|
154
|
-
fastExitCount = 0
|
|
155
|
-
fastExitEscalation = 0
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
if retryCount >= k.cfg.MaxRetries {
|
|
159
|
-
cooldown := k.global.KeeperCooldown
|
|
160
|
-
if k.cfg.CooldownMs > 0 {
|
|
161
|
-
cooldown = time.Duration(k.cfg.CooldownMs) * time.Millisecond
|
|
162
|
-
}
|
|
163
|
-
k.log.Warn("Max retries (%d) exceeded — cooling down for %v", k.cfg.MaxRetries, cooldown)
|
|
164
|
-
if k.onDeath != nil {
|
|
165
|
-
k.onDeath(k.cfg.ThreadID, k.cfg.SessionName)
|
|
166
|
-
}
|
|
167
|
-
k.sleep(ctx, cooldown)
|
|
168
|
-
retryCount = 0
|
|
169
|
-
fastExitCount = 0
|
|
170
|
-
return
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
k.log.Info("Thread %d not running — calling start_thread (attempt %d/%d)", k.cfg.ThreadID, retryCount+1, k.cfg.MaxRetries)
|
|
174
|
-
|
|
175
|
-
// Re-verify root still has keepAlive before restarting
|
|
176
|
-
// (user may have archived the worker, which disables keepAlive on the root)
|
|
177
|
-
if !k.isRootKeepAlive(ctx) {
|
|
178
|
-
k.log.Info("Thread %d root has keepAlive=false — stopping keeper", k.cfg.ThreadID)
|
|
179
|
-
k.mu.Lock()
|
|
180
|
-
k.stopped = true
|
|
181
|
-
k.mu.Unlock()
|
|
182
|
-
return
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
ok, workerID := k.callStartThread(ctx)
|
|
186
|
-
|
|
187
|
-
if ok {
|
|
188
|
-
lastStartTime = time.Now()
|
|
189
|
-
if workerID > 0 {
|
|
190
|
-
activeThreadID = workerID
|
|
191
|
-
k.log.Info("Thread %d start_thread succeeded (worker %d)", k.cfg.ThreadID, workerID)
|
|
192
|
-
} else {
|
|
193
|
-
k.log.Info("Thread %d start_thread succeeded", k.cfg.ThreadID)
|
|
194
|
-
}
|
|
195
|
-
retryCount = 0
|
|
196
|
-
} else {
|
|
197
|
-
retryCount++
|
|
198
|
-
delay := k.backoff(retryCount)
|
|
199
|
-
k.log.Info("Backing off %v before next attempt", delay)
|
|
200
|
-
k.sleep(ctx, delay)
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// Initial check
|
|
205
|
-
checkAndStart()
|
|
206
|
-
|
|
207
|
-
// Health check loop
|
|
208
|
-
for {
|
|
209
|
-
if k.isStopped() {
|
|
210
|
-
return
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
select {
|
|
214
|
-
case <-ctx.Done():
|
|
215
|
-
return
|
|
216
|
-
case <-time.After(k.global.KeeperHealthCheckInterval):
|
|
217
|
-
checkAndStart()
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
// callStartThread starts the thread via MCP. Returns (success, workerThreadID).
|
|
223
|
-
// workerThreadID is extracted from the response JSON when available.
|
|
224
|
-
func (k *Keeper) callStartThread(ctx context.Context) (bool, int) {
|
|
225
|
-
sessionID, err := k.mcp.OpenMCPSession(ctx)
|
|
226
|
-
if err != nil {
|
|
227
|
-
k.log.Error("Failed to open MCP session: %v", err)
|
|
228
|
-
return false, 0
|
|
229
|
-
}
|
|
230
|
-
defer k.mcp.CloseMCPSession(ctx, sessionID)
|
|
231
|
-
|
|
232
|
-
text, err := k.mcp.CallStartThread(ctx, sessionID, k.cfg.ThreadID, k.cfg.SessionName, k.cfg.Client, k.cfg.WorkingDirectory)
|
|
233
|
-
if err != nil {
|
|
234
|
-
k.log.Error("start_thread failed: %v", err)
|
|
235
|
-
return false, 0
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
if text != "" {
|
|
239
|
-
k.log.Info("start_thread response: %.200s", text)
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
// Parse the worker threadId from the response JSON
|
|
243
|
-
workerID := parseWorkerThreadID(text)
|
|
244
|
-
return true, workerID
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
func (k *Keeper) killThread(ctx context.Context, threadID int) {
|
|
248
|
-
k.log.Info("Killing stuck thread %d", threadID)
|
|
249
|
-
// Read PID from thread PID file
|
|
250
|
-
pidFile := filepath.Join(k.global.Paths.PIDsDir, fmt.Sprintf("%d.pid", threadID))
|
|
251
|
-
pid, err := ReadPIDFile(pidFile)
|
|
252
|
-
if err != nil {
|
|
253
|
-
k.log.Warn("Cannot read PID for thread %d: %v", threadID, err)
|
|
254
|
-
return
|
|
255
|
-
}
|
|
256
|
-
if err := KillProcess(pid, k.log); err != nil {
|
|
257
|
-
k.log.Error("Failed to kill thread %d (PID %d): %v", threadID, pid, err)
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
func (k *Keeper) backoff(retry int) time.Duration {
|
|
262
|
-
delay := time.Duration(float64(k.global.KeeperBaseBackoff) * math.Pow(2, float64(retry)))
|
|
263
|
-
if delay > k.global.KeeperMaxBackoff {
|
|
264
|
-
delay = k.global.KeeperMaxBackoff
|
|
265
|
-
}
|
|
266
|
-
return delay
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
func (k *Keeper) sleep(ctx context.Context, d time.Duration) {
|
|
270
|
-
select {
|
|
271
|
-
case <-ctx.Done():
|
|
272
|
-
case <-time.After(d):
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
// isRootKeepAlive checks whether the thread still has keepAlive=true.
|
|
277
|
-
// Uses the /api/threads/keepalive endpoint (root, branch, daily — excludes workers)
|
|
278
|
-
// so that branch and daily threads are correctly included in the check.
|
|
279
|
-
func (k *Keeper) isRootKeepAlive(ctx context.Context) bool {
|
|
280
|
-
threads, err := k.mcp.GetKeepAliveThreads(ctx)
|
|
281
|
-
if err != nil {
|
|
282
|
-
k.log.Debug("isRootKeepAlive(%d): failed to fetch keepalive threads: %v — assuming still alive", k.cfg.ThreadID, err)
|
|
283
|
-
return true // fail-open: don't stop keeper if we can't check
|
|
284
|
-
}
|
|
285
|
-
for _, r := range threads {
|
|
286
|
-
tidFloat, _ := r["threadId"].(float64)
|
|
287
|
-
if int(tidFloat) == k.cfg.ThreadID {
|
|
288
|
-
keepAlive, _ := r["keepAlive"].(bool)
|
|
289
|
-
status, _ := r["status"].(string)
|
|
290
|
-
return keepAlive && (status == "" || status == "active")
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
k.log.Debug("isRootKeepAlive(%d): thread not found in keepalive list", k.cfg.ThreadID)
|
|
294
|
-
return false // thread gone or keepAlive removed → stop keeper
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
// parseWorkerThreadID extracts the "threadId" field from a start_thread JSON response.
|
|
298
|
-
func parseWorkerThreadID(text string) int {
|
|
299
|
-
if text == "" {
|
|
300
|
-
return 0
|
|
301
|
-
}
|
|
302
|
-
var resp struct {
|
|
303
|
-
ThreadID int `json:"threadId"`
|
|
304
|
-
}
|
|
305
|
-
if json.Unmarshal([]byte(text), &resp) == nil && resp.ThreadID > 0 {
|
|
306
|
-
return resp.ThreadID
|
|
307
|
-
}
|
|
308
|
-
return 0
|
|
309
|
-
}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
package main
|
|
2
|
-
|
|
3
|
-
import "testing"
|
|
4
|
-
|
|
5
|
-
func TestParseWorkerThreadID(t *testing.T) {
|
|
6
|
-
tests := []struct {
|
|
7
|
-
name string
|
|
8
|
-
text string
|
|
9
|
-
want int
|
|
10
|
-
}{
|
|
11
|
-
{"normal response", `{"threadId":11226,"status":"restarted","name":"Sensorium 2","pid":87108}`, 11226},
|
|
12
|
-
{"already_running", `{"threadId":11226,"status":"already_running","name":"Sensorium 2","pid":40568}`, 11226},
|
|
13
|
-
{"empty string", "", 0},
|
|
14
|
-
{"no threadId", `{"status":"error"}`, 0},
|
|
15
|
-
{"threadId zero", `{"threadId":0}`, 0},
|
|
16
|
-
{"invalid JSON", `not json`, 0},
|
|
17
|
-
{"negative threadId", `{"threadId":-5}`, 0},
|
|
18
|
-
}
|
|
19
|
-
for _, tt := range tests {
|
|
20
|
-
t.Run(tt.name, func(t *testing.T) {
|
|
21
|
-
got := parseWorkerThreadID(tt.text)
|
|
22
|
-
if got != tt.want {
|
|
23
|
-
t.Errorf("parseWorkerThreadID(%q) = %d, want %d", tt.text, got, tt.want)
|
|
24
|
-
}
|
|
25
|
-
})
|
|
26
|
-
}
|
|
27
|
-
}
|
package/supervisor/lock.go
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
package main
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"fmt"
|
|
5
|
-
"os"
|
|
6
|
-
"strconv"
|
|
7
|
-
"strings"
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
// AcquireLock creates a lock file to prevent multiple supervisor instances.
|
|
11
|
-
// Uses O_CREATE|O_EXCL for atomic creation. If a stale lock exists (PID not
|
|
12
|
-
// running), it reclaims the lock.
|
|
13
|
-
func AcquireLock(lockPath string, log *Logger) bool {
|
|
14
|
-
// Try atomic create first
|
|
15
|
-
f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
|
|
16
|
-
if err == nil {
|
|
17
|
-
// Lock acquired — write our PID
|
|
18
|
-
fmt.Fprintf(f, "%d", os.Getpid())
|
|
19
|
-
f.Close()
|
|
20
|
-
log.Info("Lock acquired: %s (PID %d)", lockPath, os.Getpid())
|
|
21
|
-
return true
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// Lock file exists — check if the PID is still alive
|
|
25
|
-
data, err := os.ReadFile(lockPath)
|
|
26
|
-
if err != nil {
|
|
27
|
-
log.Error("Failed to read lockfile %s: %v", lockPath, err)
|
|
28
|
-
return false
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
pidStr := strings.TrimSpace(string(data))
|
|
32
|
-
pid, err := strconv.Atoi(pidStr)
|
|
33
|
-
if err == nil && pid > 0 && IsProcessAlive(pid) {
|
|
34
|
-
log.Error("Another supervisor is running (PID %d). Lockfile: %s", pid, lockPath)
|
|
35
|
-
return false
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
// Stale lock — reclaim
|
|
39
|
-
log.Warn("Reclaimed stale supervisor lockfile (old PID %s)", pidStr)
|
|
40
|
-
_ = os.Remove(lockPath)
|
|
41
|
-
|
|
42
|
-
// Re-acquire atomically
|
|
43
|
-
f, err = os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
|
|
44
|
-
if err != nil {
|
|
45
|
-
log.Error("Failed to acquire lockfile after reclaim: %v", err)
|
|
46
|
-
return false
|
|
47
|
-
}
|
|
48
|
-
fmt.Fprintf(f, "%d", os.Getpid())
|
|
49
|
-
f.Close()
|
|
50
|
-
log.Info("Lock acquired: %s (PID %d)", lockPath, os.Getpid())
|
|
51
|
-
return true
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
// ReleaseLock removes the lock file.
|
|
55
|
-
func ReleaseLock(lockPath string) {
|
|
56
|
-
_ = os.Remove(lockPath)
|
|
57
|
-
}
|
package/supervisor/lock_test.go
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
package main
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"os"
|
|
5
|
-
"path/filepath"
|
|
6
|
-
"testing"
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
func TestAcquireLock_Fresh(t *testing.T) {
|
|
10
|
-
dir := t.TempDir()
|
|
11
|
-
lockPath := filepath.Join(dir, "test.lock")
|
|
12
|
-
log := NewLogger("")
|
|
13
|
-
|
|
14
|
-
if !AcquireLock(lockPath, log) {
|
|
15
|
-
t.Fatal("expected lock acquisition to succeed")
|
|
16
|
-
}
|
|
17
|
-
defer ReleaseLock(lockPath)
|
|
18
|
-
|
|
19
|
-
// Verify lock file was created with our PID
|
|
20
|
-
data, err := os.ReadFile(lockPath)
|
|
21
|
-
if err != nil {
|
|
22
|
-
t.Fatalf("lock file not created: %v", err)
|
|
23
|
-
}
|
|
24
|
-
if len(data) == 0 {
|
|
25
|
-
t.Fatal("lock file is empty")
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
func TestAcquireLock_StalePID(t *testing.T) {
|
|
30
|
-
dir := t.TempDir()
|
|
31
|
-
lockPath := filepath.Join(dir, "test.lock")
|
|
32
|
-
log := NewLogger("")
|
|
33
|
-
|
|
34
|
-
// Write a stale lock with PID 1 (guaranteed to not be a supervisor)
|
|
35
|
-
// Use PID 99999999 which is almost certainly not running
|
|
36
|
-
os.WriteFile(lockPath, []byte("99999999"), 0644)
|
|
37
|
-
|
|
38
|
-
if !AcquireLock(lockPath, log) {
|
|
39
|
-
t.Fatal("expected stale lock to be reclaimed")
|
|
40
|
-
}
|
|
41
|
-
defer ReleaseLock(lockPath)
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
func TestReleaseLock(t *testing.T) {
|
|
45
|
-
dir := t.TempDir()
|
|
46
|
-
lockPath := filepath.Join(dir, "test.lock")
|
|
47
|
-
os.WriteFile(lockPath, []byte("12345"), 0644)
|
|
48
|
-
|
|
49
|
-
ReleaseLock(lockPath)
|
|
50
|
-
|
|
51
|
-
if _, err := os.Stat(lockPath); !os.IsNotExist(err) {
|
|
52
|
-
t.Error("expected lock file to be removed")
|
|
53
|
-
}
|
|
54
|
-
}
|
package/supervisor/log.go
DELETED
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
package main
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"fmt"
|
|
5
|
-
"os"
|
|
6
|
-
"path/filepath"
|
|
7
|
-
"sort"
|
|
8
|
-
"strings"
|
|
9
|
-
"sync"
|
|
10
|
-
"time"
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
// Logger writes to both stderr and a rotating log file.
|
|
14
|
-
// Rotates daily (at midnight) and when the file exceeds maxSize bytes.
|
|
15
|
-
type Logger struct {
|
|
16
|
-
mu sync.Mutex
|
|
17
|
-
logPath string
|
|
18
|
-
file *os.File
|
|
19
|
-
debug bool
|
|
20
|
-
size int64
|
|
21
|
-
maxSize int64 // default 5 MB
|
|
22
|
-
maxKeep int // max daily rotated files to keep
|
|
23
|
-
today string
|
|
24
|
-
stopTimer chan struct{}
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
func NewLogger(logPath string) *Logger {
|
|
28
|
-
l := &Logger{
|
|
29
|
-
logPath: logPath,
|
|
30
|
-
debug: os.Getenv("SUPERVISOR_DEBUG") == "1" || os.Getenv("SUPERVISOR_DEBUG") == "true",
|
|
31
|
-
maxSize: 5 * 1024 * 1024, // 5 MB
|
|
32
|
-
maxKeep: 7, // keep 7 daily files
|
|
33
|
-
stopTimer: make(chan struct{}),
|
|
34
|
-
}
|
|
35
|
-
// Rotate previous day's log on startup if needed
|
|
36
|
-
l.today = time.Now().Format("2006-01-02")
|
|
37
|
-
l.rotateDailyIfNeeded()
|
|
38
|
-
l.openFile()
|
|
39
|
-
l.startMidnightTimer()
|
|
40
|
-
return l
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
func (l *Logger) openFile() {
|
|
44
|
-
if l.logPath == "" {
|
|
45
|
-
return
|
|
46
|
-
}
|
|
47
|
-
f, err := os.OpenFile(l.logPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
|
48
|
-
if err != nil {
|
|
49
|
-
fmt.Fprintf(os.Stderr, "[WARN] cannot open log file %s: %v\n", l.logPath, err)
|
|
50
|
-
return
|
|
51
|
-
}
|
|
52
|
-
l.file = f
|
|
53
|
-
// Seed current size for rotation checks
|
|
54
|
-
if info, err := f.Stat(); err == nil {
|
|
55
|
-
l.size = info.Size()
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
func (l *Logger) log(level, format string, args ...any) {
|
|
60
|
-
ts := time.Now().Format("2006-01-02 15:04:05")
|
|
61
|
-
msg := fmt.Sprintf(format, args...)
|
|
62
|
-
line := fmt.Sprintf("[%s] [%s] %s\n", ts, level, msg)
|
|
63
|
-
|
|
64
|
-
l.mu.Lock()
|
|
65
|
-
defer l.mu.Unlock()
|
|
66
|
-
// Always write to file for post-mortem debugging; only emit DEBUG to stderr
|
|
67
|
-
// when SUPERVISOR_DEBUG is set.
|
|
68
|
-
if level != "DEBUG" || l.debug {
|
|
69
|
-
fmt.Fprint(os.Stderr, line)
|
|
70
|
-
}
|
|
71
|
-
if l.file != nil {
|
|
72
|
-
n, err := l.file.WriteString(line)
|
|
73
|
-
if err != nil {
|
|
74
|
-
fmt.Fprintf(os.Stderr, "[ERR] log write failed: %v\n", err)
|
|
75
|
-
}
|
|
76
|
-
l.size += int64(n)
|
|
77
|
-
if l.maxSize > 0 && l.size >= l.maxSize {
|
|
78
|
-
l.rotate()
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
func (l *Logger) Info(format string, args ...any) { l.log("INFO", format, args...) }
|
|
84
|
-
func (l *Logger) Warn(format string, args ...any) { l.log("WARN", format, args...) }
|
|
85
|
-
func (l *Logger) Error(format string, args ...any) { l.log("ERROR", format, args...) }
|
|
86
|
-
func (l *Logger) Debug(format string, args ...any) { l.log("DEBUG", format, args...) }
|
|
87
|
-
|
|
88
|
-
// rotateDailyIfNeeded renames the current log to a dated archive if it was
|
|
89
|
-
// written on a previous day. Called without mu held (used at startup and from
|
|
90
|
-
// the midnight timer before the lock is acquired).
|
|
91
|
-
func (l *Logger) rotateDailyIfNeeded() {
|
|
92
|
-
info, err := os.Stat(l.logPath)
|
|
93
|
-
if err != nil {
|
|
94
|
-
return // file doesn't exist yet — nothing to rotate
|
|
95
|
-
}
|
|
96
|
-
modDay := info.ModTime().Format("2006-01-02")
|
|
97
|
-
if modDay == l.today {
|
|
98
|
-
return // same day — no rotation needed
|
|
99
|
-
}
|
|
100
|
-
// Rename to dated file
|
|
101
|
-
ext := filepath.Ext(l.logPath)
|
|
102
|
-
base := strings.TrimSuffix(l.logPath, ext)
|
|
103
|
-
dated := fmt.Sprintf("%s.%s%s", base, modDay, ext)
|
|
104
|
-
if err := os.Rename(l.logPath, dated); err != nil {
|
|
105
|
-
fmt.Fprintf(os.Stderr, "[WARN] daily log rotate: %v\n", err)
|
|
106
|
-
}
|
|
107
|
-
l.pruneOldLogs()
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// pruneOldLogs deletes daily log files beyond maxKeep. Called without mu held.
|
|
111
|
-
func (l *Logger) pruneOldLogs() {
|
|
112
|
-
dir := filepath.Dir(l.logPath)
|
|
113
|
-
base := strings.TrimSuffix(filepath.Base(l.logPath), filepath.Ext(l.logPath))
|
|
114
|
-
entries, err := os.ReadDir(dir)
|
|
115
|
-
if err != nil {
|
|
116
|
-
return
|
|
117
|
-
}
|
|
118
|
-
var dated []string
|
|
119
|
-
for _, e := range entries {
|
|
120
|
-
name := e.Name()
|
|
121
|
-
if strings.HasPrefix(name, base+".") && name != filepath.Base(l.logPath) {
|
|
122
|
-
dated = append(dated, filepath.Join(dir, name))
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
sort.Strings(dated) // ascending — oldest first
|
|
126
|
-
for len(dated) > l.maxKeep {
|
|
127
|
-
os.Remove(dated[0])
|
|
128
|
-
dated = dated[1:]
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
// startMidnightTimer fires a daily rotation at local midnight.
|
|
133
|
-
func (l *Logger) startMidnightTimer() {
|
|
134
|
-
stopCh := l.stopTimer // capture channel value; Close() may nil the field concurrently
|
|
135
|
-
go func() {
|
|
136
|
-
for {
|
|
137
|
-
now := time.Now()
|
|
138
|
-
next := time.Date(now.Year(), now.Month(), now.Day()+1, 0, 0, 0, 0, now.Location())
|
|
139
|
-
select {
|
|
140
|
-
case <-time.After(time.Until(next)):
|
|
141
|
-
case <-stopCh:
|
|
142
|
-
return
|
|
143
|
-
}
|
|
144
|
-
l.mu.Lock()
|
|
145
|
-
l.today = time.Now().Format("2006-01-02")
|
|
146
|
-
if l.file != nil {
|
|
147
|
-
l.file.Close()
|
|
148
|
-
l.file = nil
|
|
149
|
-
}
|
|
150
|
-
l.rotateDailyIfNeeded()
|
|
151
|
-
l.size = 0
|
|
152
|
-
l.openFile()
|
|
153
|
-
l.mu.Unlock()
|
|
154
|
-
}
|
|
155
|
-
}()
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// rotate closes the current log file, renames it with a dated suffix for
|
|
159
|
-
// size-based rotation mid-day, and opens a fresh log. Called with mu held.
|
|
160
|
-
func (l *Logger) rotate() {
|
|
161
|
-
if l.file != nil {
|
|
162
|
-
l.file.Close()
|
|
163
|
-
l.file = nil
|
|
164
|
-
}
|
|
165
|
-
// Use a timestamp suffix to avoid colliding with the daily dated file
|
|
166
|
-
ts := time.Now().Format("2006-01-02T150405")
|
|
167
|
-
ext := filepath.Ext(l.logPath)
|
|
168
|
-
base := strings.TrimSuffix(l.logPath, ext)
|
|
169
|
-
if err := os.Rename(l.logPath, fmt.Sprintf("%s.%s%s", base, ts, ext)); err != nil {
|
|
170
|
-
fmt.Fprintf(os.Stderr, "[WARN] log size-rotate rename: %v\n", err)
|
|
171
|
-
}
|
|
172
|
-
l.pruneOldLogs()
|
|
173
|
-
|
|
174
|
-
// Open a fresh file
|
|
175
|
-
l.size = 0
|
|
176
|
-
f, err := os.OpenFile(l.logPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
|
|
177
|
-
if err != nil {
|
|
178
|
-
fmt.Fprintf(os.Stderr, "[WARN] log rotate: cannot create fresh log: %v\n", err)
|
|
179
|
-
return
|
|
180
|
-
}
|
|
181
|
-
l.file = f
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
func (l *Logger) Close() {
|
|
185
|
-
if l.stopTimer != nil {
|
|
186
|
-
close(l.stopTimer)
|
|
187
|
-
l.stopTimer = nil
|
|
188
|
-
}
|
|
189
|
-
l.mu.Lock()
|
|
190
|
-
defer l.mu.Unlock()
|
|
191
|
-
if l.file != nil {
|
|
192
|
-
l.file.Close()
|
|
193
|
-
l.file = nil
|
|
194
|
-
}
|
|
195
|
-
}
|