sensorium-mcp 3.0.4 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/Install-Sensorium.ps1 +102 -209
  2. package/dist/dashboard/routes/data.d.ts.map +1 -1
  3. package/dist/dashboard/routes/data.js +2 -1
  4. package/dist/dashboard/routes/data.js.map +1 -1
  5. package/dist/dashboard/routes/threads.js +1 -1
  6. package/dist/dashboard/routes/threads.js.map +1 -1
  7. package/dist/dashboard/routes.d.ts.map +1 -1
  8. package/dist/dashboard/routes.js +1 -3
  9. package/dist/dashboard/routes.js.map +1 -1
  10. package/dist/data/memory/migration-runner.d.ts +1 -1
  11. package/dist/data/memory/migration-runner.d.ts.map +1 -1
  12. package/dist/data/memory/migration-runner.js +59 -3
  13. package/dist/data/memory/migration-runner.js.map +1 -1
  14. package/dist/data/memory/narrative.d.ts.map +1 -1
  15. package/dist/data/memory/narrative.js +43 -6
  16. package/dist/data/memory/narrative.js.map +1 -1
  17. package/dist/data/memory/reflection.d.ts +24 -0
  18. package/dist/data/memory/reflection.d.ts.map +1 -1
  19. package/dist/data/memory/reflection.js +65 -1
  20. package/dist/data/memory/reflection.js.map +1 -1
  21. package/dist/data/memory/schema-ddl.d.ts +1 -1
  22. package/dist/data/memory/schema-ddl.d.ts.map +1 -1
  23. package/dist/data/memory/schema-ddl.js +2 -1
  24. package/dist/data/memory/schema-ddl.js.map +1 -1
  25. package/dist/data/memory/thread-registry.js +1 -1
  26. package/dist/data/memory/thread-registry.js.map +1 -1
  27. package/dist/http-server.d.ts.map +1 -1
  28. package/dist/http-server.js +1 -9
  29. package/dist/http-server.js.map +1 -1
  30. package/dist/index.js +3 -6
  31. package/dist/index.js.map +1 -1
  32. package/dist/server/factory.js +1 -1
  33. package/dist/server/factory.js.map +1 -1
  34. package/dist/services/agent-spawn.service.d.ts +7 -1
  35. package/dist/services/agent-spawn.service.d.ts.map +1 -1
  36. package/dist/services/agent-spawn.service.js +69 -45
  37. package/dist/services/agent-spawn.service.js.map +1 -1
  38. package/dist/services/consolidation.service.d.ts.map +1 -1
  39. package/dist/services/consolidation.service.js +88 -35
  40. package/dist/services/consolidation.service.js.map +1 -1
  41. package/dist/services/keeper.service.d.ts +21 -0
  42. package/dist/services/keeper.service.d.ts.map +1 -0
  43. package/dist/services/keeper.service.js +195 -0
  44. package/dist/services/keeper.service.js.map +1 -0
  45. package/dist/services/maintenance-signal.d.ts +2 -0
  46. package/dist/services/maintenance-signal.d.ts.map +1 -1
  47. package/dist/services/maintenance-signal.js +7 -1
  48. package/dist/services/maintenance-signal.js.map +1 -1
  49. package/dist/services/memory-briefing.service.d.ts.map +1 -1
  50. package/dist/services/memory-briefing.service.js +17 -1
  51. package/dist/services/memory-briefing.service.js.map +1 -1
  52. package/dist/services/process.service.d.ts +19 -2
  53. package/dist/services/process.service.d.ts.map +1 -1
  54. package/dist/services/process.service.js +104 -10
  55. package/dist/services/process.service.js.map +1 -1
  56. package/dist/services/thread-lifecycle.service.d.ts +5 -0
  57. package/dist/services/thread-lifecycle.service.d.ts.map +1 -1
  58. package/dist/services/thread-lifecycle.service.js +33 -8
  59. package/dist/services/thread-lifecycle.service.js.map +1 -1
  60. package/dist/services/worker-cleanup.service.d.ts +14 -1
  61. package/dist/services/worker-cleanup.service.d.ts.map +1 -1
  62. package/dist/services/worker-cleanup.service.js +36 -38
  63. package/dist/services/worker-cleanup.service.js.map +1 -1
  64. package/dist/sessions.d.ts +0 -5
  65. package/dist/sessions.d.ts.map +1 -1
  66. package/dist/sessions.js +0 -7
  67. package/dist/sessions.js.map +1 -1
  68. package/dist/stdio-server.d.ts.map +1 -1
  69. package/dist/stdio-server.js +1 -7
  70. package/dist/stdio-server.js.map +1 -1
  71. package/dist/tools/delegate-tool.d.ts.map +1 -1
  72. package/dist/tools/delegate-tool.js +2 -2
  73. package/dist/tools/delegate-tool.js.map +1 -1
  74. package/dist/tools/session-tools.js +1 -1
  75. package/dist/tools/session-tools.js.map +1 -1
  76. package/dist/tools/start-session-tool.d.ts.map +1 -1
  77. package/dist/tools/start-session-tool.js +8 -9
  78. package/dist/tools/start-session-tool.js.map +1 -1
  79. package/dist/tools/wait/message-processing.d.ts.map +1 -1
  80. package/dist/tools/wait/message-processing.js +28 -0
  81. package/dist/tools/wait/message-processing.js.map +1 -1
  82. package/dist/tools/wait/poll-loop.js +1 -1
  83. package/dist/tools/wait/poll-loop.js.map +1 -1
  84. package/package.json +1 -1
  85. package/dist/tools/thread-lifecycle.d.ts +0 -6
  86. package/dist/tools/thread-lifecycle.d.ts.map +0 -1
  87. package/dist/tools/thread-lifecycle.js +0 -6
  88. package/dist/tools/thread-lifecycle.js.map +0 -1
  89. package/supervisor/config.go +0 -253
  90. package/supervisor/config_test.go +0 -78
  91. package/supervisor/go.mod +0 -15
  92. package/supervisor/go.sum +0 -20
  93. package/supervisor/health.go +0 -433
  94. package/supervisor/health_test.go +0 -93
  95. package/supervisor/keeper.go +0 -309
  96. package/supervisor/keeper_test.go +0 -27
  97. package/supervisor/lock.go +0 -57
  98. package/supervisor/lock_test.go +0 -54
  99. package/supervisor/log.go +0 -195
  100. package/supervisor/log_test.go +0 -125
  101. package/supervisor/main.go +0 -475
  102. package/supervisor/main_test.go +0 -130
  103. package/supervisor/notify.go +0 -53
  104. package/supervisor/process.go +0 -294
  105. package/supervisor/process_test.go +0 -108
  106. package/supervisor/process_unix.go +0 -14
  107. package/supervisor/process_windows.go +0 -15
  108. package/supervisor/secrets.go +0 -95
  109. package/supervisor/secrets_securevault_test.go +0 -98
  110. package/supervisor/secrets_test.go +0 -119
  111. package/supervisor/self_update.go +0 -282
  112. package/supervisor/self_update_test.go +0 -177
  113. package/supervisor/service_restart_stub.go +0 -9
  114. package/supervisor/service_restart_windows.go +0 -63
  115. package/supervisor/service_stub.go +0 -15
  116. package/supervisor/service_windows.go +0 -194
  117. package/supervisor/update_state.go +0 -264
  118. package/supervisor/update_state_test.go +0 -306
  119. package/supervisor/updater.go +0 -613
  120. package/supervisor/updater_test.go +0 -64
@@ -1,309 +0,0 @@
1
- package main
2
-
3
- import (
4
- "context"
5
- "encoding/json"
6
- "fmt"
7
- "math"
8
- "path/filepath"
9
- "sync"
10
- "time"
11
- )
12
-
13
- // KeeperConfig describes a thread that must be kept alive.
14
- type KeeperConfig struct {
15
- ThreadID int
16
- SessionName string
17
- Client string // e.g. "claude-code", "codex"
18
- WorkingDirectory string
19
- MaxRetries int
20
- CooldownMs int
21
- }
22
-
23
- // Keeper supervises a single thread, restarting it via the MCP server's
24
- // start_thread tool when it stops running. One goroutine per keeper.
25
- type Keeper struct {
26
- cfg KeeperConfig
27
- global Config
28
- mcp *MCPClient
29
- log *Logger
30
- onDeath func(threadID int, sessionName string)
31
-
32
- mu sync.Mutex
33
- stopped bool
34
- cancel context.CancelFunc
35
- done chan struct{}
36
- }
37
-
38
- func NewKeeper(cfg KeeperConfig, global Config, mcp *MCPClient, log *Logger, onDeath func(int, string)) *Keeper {
39
- maxRetries := cfg.MaxRetries
40
- if maxRetries <= 0 {
41
- maxRetries = global.KeeperMaxRetries
42
- }
43
- cfg.MaxRetries = maxRetries
44
- return &Keeper{
45
- cfg: cfg,
46
- global: global,
47
- mcp: mcp,
48
- log: log,
49
- onDeath: onDeath,
50
- done: make(chan struct{}),
51
- }
52
- }
53
-
54
- // Start begins the keeper loop in a separate goroutine.
55
- func (k *Keeper) Start() {
56
- ctx, cancel := context.WithCancel(context.Background())
57
- k.mu.Lock()
58
- k.cancel = cancel
59
- k.mu.Unlock()
60
-
61
- go k.run(ctx)
62
- }
63
-
64
- // Stop signals the keeper to shut down and waits for it to finish.
65
- func (k *Keeper) Stop() {
66
- k.mu.Lock()
67
- k.stopped = true
68
- if k.cancel != nil {
69
- k.cancel()
70
- }
71
- k.mu.Unlock()
72
- <-k.done
73
- }
74
-
75
- func (k *Keeper) isStopped() bool {
76
- k.mu.Lock()
77
- defer k.mu.Unlock()
78
- return k.stopped
79
- }
80
-
81
- func (k *Keeper) run(ctx context.Context) {
82
- defer close(k.done)
83
- defer func() {
84
- if r := recover(); r != nil {
85
- k.log.Error("Keeper panicked for thread %d: %v", k.cfg.ThreadID, r)
86
- }
87
- }()
88
- defer k.log.Info("Keeper stopped for thread %d", k.cfg.ThreadID)
89
-
90
- k.log.Info("Keeper started for thread %d ('%s') [client=%s]", k.cfg.ThreadID, k.cfg.SessionName, k.cfg.Client)
91
-
92
- // Wait for MCP server to be ready
93
- ready := k.mcp.WaitForReady(ctx, k.global.KeeperReadyPollInterval, k.global.KeeperReadyTimeout)
94
- if !ready && !k.isStopped() {
95
- k.log.Warn("MCP server not ready after %v — attempting start_thread anyway", k.global.KeeperReadyTimeout)
96
- }
97
-
98
- retryCount := 0
99
- fastExitCount := 0
100
- fastExitEscalation := 0
101
- var lastStartTime time.Time
102
- activeThreadID := k.cfg.ThreadID // may differ from root after start_thread
103
-
104
- checkAndStart := func() {
105
- if k.isStopped() {
106
- return
107
- }
108
-
109
- // Check if thread is running — use activeThreadID (worker) not root
110
- running := k.mcp.IsThreadRunning(ctx, activeThreadID)
111
- if running {
112
- // Check if stuck
113
- if k.mcp.IsThreadStuck(ctx, activeThreadID, k.global.StuckThreshold) {
114
- k.log.Warn("Thread %d (worker %d) is stuck (no heartbeat for %v) — restarting", k.cfg.ThreadID, activeThreadID, k.global.StuckThreshold)
115
- // Kill via MCP API, then fall through to restart
116
- k.killThread(ctx, activeThreadID)
117
- retryCount = 0
118
- activeThreadID = k.cfg.ThreadID // reset — will get new worker ID on restart
119
- } else {
120
- // Healthy — reset counters
121
- if retryCount > 0 {
122
- k.log.Info("Thread %d is healthy again (was at retry %d)", k.cfg.ThreadID, retryCount)
123
- } else {
124
- k.log.Info("Thread %d is healthy", k.cfg.ThreadID)
125
- }
126
- retryCount = 0
127
- fastExitCount = 0
128
- fastExitEscalation = 0
129
- return
130
- }
131
- }
132
-
133
- // Thread is not running (or was stuck and killed).
134
- // Detect fast-exit: if the previous successful start was recent, the thread exited too quickly.
135
- if !lastStartTime.IsZero() && time.Since(lastStartTime) < k.global.FastExitThreshold {
136
- fastExitCount++
137
- if fastExitCount >= k.global.FastExitMaxCount {
138
- cooldown := time.Duration(float64(k.global.FastExitBaseCooldown) * math.Pow(2, float64(fastExitEscalation)))
139
- if cooldown > k.global.FastExitMaxCooldown {
140
- cooldown = k.global.FastExitMaxCooldown
141
- }
142
- k.log.Warn("Thread %d: %d consecutive fast exits — backing off %v", k.cfg.ThreadID, fastExitCount, cooldown)
143
- if k.onDeath != nil {
144
- k.onDeath(k.cfg.ThreadID, k.cfg.SessionName+" (repeated fast exits — check credits/API key)")
145
- }
146
- fastExitEscalation++
147
- k.sleep(ctx, cooldown)
148
- fastExitCount = 0
149
- retryCount = 0
150
- return
151
- }
152
- } else if !lastStartTime.IsZero() {
153
- // Previous start was long ago — not a fast exit pattern, reset counters
154
- fastExitCount = 0
155
- fastExitEscalation = 0
156
- }
157
-
158
- if retryCount >= k.cfg.MaxRetries {
159
- cooldown := k.global.KeeperCooldown
160
- if k.cfg.CooldownMs > 0 {
161
- cooldown = time.Duration(k.cfg.CooldownMs) * time.Millisecond
162
- }
163
- k.log.Warn("Max retries (%d) exceeded — cooling down for %v", k.cfg.MaxRetries, cooldown)
164
- if k.onDeath != nil {
165
- k.onDeath(k.cfg.ThreadID, k.cfg.SessionName)
166
- }
167
- k.sleep(ctx, cooldown)
168
- retryCount = 0
169
- fastExitCount = 0
170
- return
171
- }
172
-
173
- k.log.Info("Thread %d not running — calling start_thread (attempt %d/%d)", k.cfg.ThreadID, retryCount+1, k.cfg.MaxRetries)
174
-
175
- // Re-verify root still has keepAlive before restarting
176
- // (user may have archived the worker, which disables keepAlive on the root)
177
- if !k.isRootKeepAlive(ctx) {
178
- k.log.Info("Thread %d root has keepAlive=false — stopping keeper", k.cfg.ThreadID)
179
- k.mu.Lock()
180
- k.stopped = true
181
- k.mu.Unlock()
182
- return
183
- }
184
-
185
- ok, workerID := k.callStartThread(ctx)
186
-
187
- if ok {
188
- lastStartTime = time.Now()
189
- if workerID > 0 {
190
- activeThreadID = workerID
191
- k.log.Info("Thread %d start_thread succeeded (worker %d)", k.cfg.ThreadID, workerID)
192
- } else {
193
- k.log.Info("Thread %d start_thread succeeded", k.cfg.ThreadID)
194
- }
195
- retryCount = 0
196
- } else {
197
- retryCount++
198
- delay := k.backoff(retryCount)
199
- k.log.Info("Backing off %v before next attempt", delay)
200
- k.sleep(ctx, delay)
201
- }
202
- }
203
-
204
- // Initial check
205
- checkAndStart()
206
-
207
- // Health check loop
208
- for {
209
- if k.isStopped() {
210
- return
211
- }
212
-
213
- select {
214
- case <-ctx.Done():
215
- return
216
- case <-time.After(k.global.KeeperHealthCheckInterval):
217
- checkAndStart()
218
- }
219
- }
220
- }
221
-
222
- // callStartThread starts the thread via MCP. Returns (success, workerThreadID).
223
- // workerThreadID is extracted from the response JSON when available.
224
- func (k *Keeper) callStartThread(ctx context.Context) (bool, int) {
225
- sessionID, err := k.mcp.OpenMCPSession(ctx)
226
- if err != nil {
227
- k.log.Error("Failed to open MCP session: %v", err)
228
- return false, 0
229
- }
230
- defer k.mcp.CloseMCPSession(ctx, sessionID)
231
-
232
- text, err := k.mcp.CallStartThread(ctx, sessionID, k.cfg.ThreadID, k.cfg.SessionName, k.cfg.Client, k.cfg.WorkingDirectory)
233
- if err != nil {
234
- k.log.Error("start_thread failed: %v", err)
235
- return false, 0
236
- }
237
-
238
- if text != "" {
239
- k.log.Info("start_thread response: %.200s", text)
240
- }
241
-
242
- // Parse the worker threadId from the response JSON
243
- workerID := parseWorkerThreadID(text)
244
- return true, workerID
245
- }
246
-
247
- func (k *Keeper) killThread(ctx context.Context, threadID int) {
248
- k.log.Info("Killing stuck thread %d", threadID)
249
- // Read PID from thread PID file
250
- pidFile := filepath.Join(k.global.Paths.PIDsDir, fmt.Sprintf("%d.pid", threadID))
251
- pid, err := ReadPIDFile(pidFile)
252
- if err != nil {
253
- k.log.Warn("Cannot read PID for thread %d: %v", threadID, err)
254
- return
255
- }
256
- if err := KillProcess(pid, k.log); err != nil {
257
- k.log.Error("Failed to kill thread %d (PID %d): %v", threadID, pid, err)
258
- }
259
- }
260
-
261
- func (k *Keeper) backoff(retry int) time.Duration {
262
- delay := time.Duration(float64(k.global.KeeperBaseBackoff) * math.Pow(2, float64(retry)))
263
- if delay > k.global.KeeperMaxBackoff {
264
- delay = k.global.KeeperMaxBackoff
265
- }
266
- return delay
267
- }
268
-
269
- func (k *Keeper) sleep(ctx context.Context, d time.Duration) {
270
- select {
271
- case <-ctx.Done():
272
- case <-time.After(d):
273
- }
274
- }
275
-
276
- // isRootKeepAlive checks whether the thread still has keepAlive=true.
277
- // Uses the /api/threads/keepalive endpoint (root, branch, daily — excludes workers)
278
- // so that branch and daily threads are correctly included in the check.
279
- func (k *Keeper) isRootKeepAlive(ctx context.Context) bool {
280
- threads, err := k.mcp.GetKeepAliveThreads(ctx)
281
- if err != nil {
282
- k.log.Debug("isRootKeepAlive(%d): failed to fetch keepalive threads: %v — assuming still alive", k.cfg.ThreadID, err)
283
- return true // fail-open: don't stop keeper if we can't check
284
- }
285
- for _, r := range threads {
286
- tidFloat, _ := r["threadId"].(float64)
287
- if int(tidFloat) == k.cfg.ThreadID {
288
- keepAlive, _ := r["keepAlive"].(bool)
289
- status, _ := r["status"].(string)
290
- return keepAlive && (status == "" || status == "active")
291
- }
292
- }
293
- k.log.Debug("isRootKeepAlive(%d): thread not found in keepalive list", k.cfg.ThreadID)
294
- return false // thread gone or keepAlive removed → stop keeper
295
- }
296
-
297
- // parseWorkerThreadID extracts the "threadId" field from a start_thread JSON response.
298
- func parseWorkerThreadID(text string) int {
299
- if text == "" {
300
- return 0
301
- }
302
- var resp struct {
303
- ThreadID int `json:"threadId"`
304
- }
305
- if json.Unmarshal([]byte(text), &resp) == nil && resp.ThreadID > 0 {
306
- return resp.ThreadID
307
- }
308
- return 0
309
- }
@@ -1,27 +0,0 @@
1
- package main
2
-
3
- import "testing"
4
-
5
- func TestParseWorkerThreadID(t *testing.T) {
6
- tests := []struct {
7
- name string
8
- text string
9
- want int
10
- }{
11
- {"normal response", `{"threadId":11226,"status":"restarted","name":"Sensorium 2","pid":87108}`, 11226},
12
- {"already_running", `{"threadId":11226,"status":"already_running","name":"Sensorium 2","pid":40568}`, 11226},
13
- {"empty string", "", 0},
14
- {"no threadId", `{"status":"error"}`, 0},
15
- {"threadId zero", `{"threadId":0}`, 0},
16
- {"invalid JSON", `not json`, 0},
17
- {"negative threadId", `{"threadId":-5}`, 0},
18
- }
19
- for _, tt := range tests {
20
- t.Run(tt.name, func(t *testing.T) {
21
- got := parseWorkerThreadID(tt.text)
22
- if got != tt.want {
23
- t.Errorf("parseWorkerThreadID(%q) = %d, want %d", tt.text, got, tt.want)
24
- }
25
- })
26
- }
27
- }
@@ -1,57 +0,0 @@
1
- package main
2
-
3
- import (
4
- "fmt"
5
- "os"
6
- "strconv"
7
- "strings"
8
- )
9
-
10
- // AcquireLock creates a lock file to prevent multiple supervisor instances.
11
- // Uses O_CREATE|O_EXCL for atomic creation. If a stale lock exists (PID not
12
- // running), it reclaims the lock.
13
- func AcquireLock(lockPath string, log *Logger) bool {
14
- // Try atomic create first
15
- f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
16
- if err == nil {
17
- // Lock acquired — write our PID
18
- fmt.Fprintf(f, "%d", os.Getpid())
19
- f.Close()
20
- log.Info("Lock acquired: %s (PID %d)", lockPath, os.Getpid())
21
- return true
22
- }
23
-
24
- // Lock file exists — check if the PID is still alive
25
- data, err := os.ReadFile(lockPath)
26
- if err != nil {
27
- log.Error("Failed to read lockfile %s: %v", lockPath, err)
28
- return false
29
- }
30
-
31
- pidStr := strings.TrimSpace(string(data))
32
- pid, err := strconv.Atoi(pidStr)
33
- if err == nil && pid > 0 && IsProcessAlive(pid) {
34
- log.Error("Another supervisor is running (PID %d). Lockfile: %s", pid, lockPath)
35
- return false
36
- }
37
-
38
- // Stale lock — reclaim
39
- log.Warn("Reclaimed stale supervisor lockfile (old PID %s)", pidStr)
40
- _ = os.Remove(lockPath)
41
-
42
- // Re-acquire atomically
43
- f, err = os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
44
- if err != nil {
45
- log.Error("Failed to acquire lockfile after reclaim: %v", err)
46
- return false
47
- }
48
- fmt.Fprintf(f, "%d", os.Getpid())
49
- f.Close()
50
- log.Info("Lock acquired: %s (PID %d)", lockPath, os.Getpid())
51
- return true
52
- }
53
-
54
- // ReleaseLock removes the lock file.
55
- func ReleaseLock(lockPath string) {
56
- _ = os.Remove(lockPath)
57
- }
@@ -1,54 +0,0 @@
1
- package main
2
-
3
- import (
4
- "os"
5
- "path/filepath"
6
- "testing"
7
- )
8
-
9
- func TestAcquireLock_Fresh(t *testing.T) {
10
- dir := t.TempDir()
11
- lockPath := filepath.Join(dir, "test.lock")
12
- log := NewLogger("")
13
-
14
- if !AcquireLock(lockPath, log) {
15
- t.Fatal("expected lock acquisition to succeed")
16
- }
17
- defer ReleaseLock(lockPath)
18
-
19
- // Verify lock file was created with our PID
20
- data, err := os.ReadFile(lockPath)
21
- if err != nil {
22
- t.Fatalf("lock file not created: %v", err)
23
- }
24
- if len(data) == 0 {
25
- t.Fatal("lock file is empty")
26
- }
27
- }
28
-
29
- func TestAcquireLock_StalePID(t *testing.T) {
30
- dir := t.TempDir()
31
- lockPath := filepath.Join(dir, "test.lock")
32
- log := NewLogger("")
33
-
34
- // Write a stale lock with PID 1 (guaranteed to not be a supervisor)
35
- // Use PID 99999999 which is almost certainly not running
36
- os.WriteFile(lockPath, []byte("99999999"), 0644)
37
-
38
- if !AcquireLock(lockPath, log) {
39
- t.Fatal("expected stale lock to be reclaimed")
40
- }
41
- defer ReleaseLock(lockPath)
42
- }
43
-
44
- func TestReleaseLock(t *testing.T) {
45
- dir := t.TempDir()
46
- lockPath := filepath.Join(dir, "test.lock")
47
- os.WriteFile(lockPath, []byte("12345"), 0644)
48
-
49
- ReleaseLock(lockPath)
50
-
51
- if _, err := os.Stat(lockPath); !os.IsNotExist(err) {
52
- t.Error("expected lock file to be removed")
53
- }
54
- }
package/supervisor/log.go DELETED
@@ -1,195 +0,0 @@
1
- package main
2
-
3
- import (
4
- "fmt"
5
- "os"
6
- "path/filepath"
7
- "sort"
8
- "strings"
9
- "sync"
10
- "time"
11
- )
12
-
13
- // Logger writes to both stderr and a rotating log file.
14
- // Rotates daily (at midnight) and when the file exceeds maxSize bytes.
15
- type Logger struct {
16
- mu sync.Mutex
17
- logPath string
18
- file *os.File
19
- debug bool
20
- size int64
21
- maxSize int64 // default 5 MB
22
- maxKeep int // max daily rotated files to keep
23
- today string
24
- stopTimer chan struct{}
25
- }
26
-
27
- func NewLogger(logPath string) *Logger {
28
- l := &Logger{
29
- logPath: logPath,
30
- debug: os.Getenv("SUPERVISOR_DEBUG") == "1" || os.Getenv("SUPERVISOR_DEBUG") == "true",
31
- maxSize: 5 * 1024 * 1024, // 5 MB
32
- maxKeep: 7, // keep 7 daily files
33
- stopTimer: make(chan struct{}),
34
- }
35
- // Rotate previous day's log on startup if needed
36
- l.today = time.Now().Format("2006-01-02")
37
- l.rotateDailyIfNeeded()
38
- l.openFile()
39
- l.startMidnightTimer()
40
- return l
41
- }
42
-
43
- func (l *Logger) openFile() {
44
- if l.logPath == "" {
45
- return
46
- }
47
- f, err := os.OpenFile(l.logPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
48
- if err != nil {
49
- fmt.Fprintf(os.Stderr, "[WARN] cannot open log file %s: %v\n", l.logPath, err)
50
- return
51
- }
52
- l.file = f
53
- // Seed current size for rotation checks
54
- if info, err := f.Stat(); err == nil {
55
- l.size = info.Size()
56
- }
57
- }
58
-
59
- func (l *Logger) log(level, format string, args ...any) {
60
- ts := time.Now().Format("2006-01-02 15:04:05")
61
- msg := fmt.Sprintf(format, args...)
62
- line := fmt.Sprintf("[%s] [%s] %s\n", ts, level, msg)
63
-
64
- l.mu.Lock()
65
- defer l.mu.Unlock()
66
- // Always write to file for post-mortem debugging; only emit DEBUG to stderr
67
- // when SUPERVISOR_DEBUG is set.
68
- if level != "DEBUG" || l.debug {
69
- fmt.Fprint(os.Stderr, line)
70
- }
71
- if l.file != nil {
72
- n, err := l.file.WriteString(line)
73
- if err != nil {
74
- fmt.Fprintf(os.Stderr, "[ERR] log write failed: %v\n", err)
75
- }
76
- l.size += int64(n)
77
- if l.maxSize > 0 && l.size >= l.maxSize {
78
- l.rotate()
79
- }
80
- }
81
- }
82
-
83
- func (l *Logger) Info(format string, args ...any) { l.log("INFO", format, args...) }
84
- func (l *Logger) Warn(format string, args ...any) { l.log("WARN", format, args...) }
85
- func (l *Logger) Error(format string, args ...any) { l.log("ERROR", format, args...) }
86
- func (l *Logger) Debug(format string, args ...any) { l.log("DEBUG", format, args...) }
87
-
88
- // rotateDailyIfNeeded renames the current log to a dated archive if it was
89
- // written on a previous day. Called without mu held (used at startup and from
90
- // the midnight timer before the lock is acquired).
91
- func (l *Logger) rotateDailyIfNeeded() {
92
- info, err := os.Stat(l.logPath)
93
- if err != nil {
94
- return // file doesn't exist yet — nothing to rotate
95
- }
96
- modDay := info.ModTime().Format("2006-01-02")
97
- if modDay == l.today {
98
- return // same day — no rotation needed
99
- }
100
- // Rename to dated file
101
- ext := filepath.Ext(l.logPath)
102
- base := strings.TrimSuffix(l.logPath, ext)
103
- dated := fmt.Sprintf("%s.%s%s", base, modDay, ext)
104
- if err := os.Rename(l.logPath, dated); err != nil {
105
- fmt.Fprintf(os.Stderr, "[WARN] daily log rotate: %v\n", err)
106
- }
107
- l.pruneOldLogs()
108
- }
109
-
110
- // pruneOldLogs deletes daily log files beyond maxKeep. Called without mu held.
111
- func (l *Logger) pruneOldLogs() {
112
- dir := filepath.Dir(l.logPath)
113
- base := strings.TrimSuffix(filepath.Base(l.logPath), filepath.Ext(l.logPath))
114
- entries, err := os.ReadDir(dir)
115
- if err != nil {
116
- return
117
- }
118
- var dated []string
119
- for _, e := range entries {
120
- name := e.Name()
121
- if strings.HasPrefix(name, base+".") && name != filepath.Base(l.logPath) {
122
- dated = append(dated, filepath.Join(dir, name))
123
- }
124
- }
125
- sort.Strings(dated) // ascending — oldest first
126
- for len(dated) > l.maxKeep {
127
- os.Remove(dated[0])
128
- dated = dated[1:]
129
- }
130
- }
131
-
132
- // startMidnightTimer fires a daily rotation at local midnight.
133
- func (l *Logger) startMidnightTimer() {
134
- stopCh := l.stopTimer // capture channel value; Close() may nil the field concurrently
135
- go func() {
136
- for {
137
- now := time.Now()
138
- next := time.Date(now.Year(), now.Month(), now.Day()+1, 0, 0, 0, 0, now.Location())
139
- select {
140
- case <-time.After(time.Until(next)):
141
- case <-stopCh:
142
- return
143
- }
144
- l.mu.Lock()
145
- l.today = time.Now().Format("2006-01-02")
146
- if l.file != nil {
147
- l.file.Close()
148
- l.file = nil
149
- }
150
- l.rotateDailyIfNeeded()
151
- l.size = 0
152
- l.openFile()
153
- l.mu.Unlock()
154
- }
155
- }()
156
- }
157
-
158
- // rotate closes the current log file, renames it with a dated suffix for
159
- // size-based rotation mid-day, and opens a fresh log. Called with mu held.
160
- func (l *Logger) rotate() {
161
- if l.file != nil {
162
- l.file.Close()
163
- l.file = nil
164
- }
165
- // Use a timestamp suffix to avoid colliding with the daily dated file
166
- ts := time.Now().Format("2006-01-02T150405")
167
- ext := filepath.Ext(l.logPath)
168
- base := strings.TrimSuffix(l.logPath, ext)
169
- if err := os.Rename(l.logPath, fmt.Sprintf("%s.%s%s", base, ts, ext)); err != nil {
170
- fmt.Fprintf(os.Stderr, "[WARN] log size-rotate rename: %v\n", err)
171
- }
172
- l.pruneOldLogs()
173
-
174
- // Open a fresh file
175
- l.size = 0
176
- f, err := os.OpenFile(l.logPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
177
- if err != nil {
178
- fmt.Fprintf(os.Stderr, "[WARN] log rotate: cannot create fresh log: %v\n", err)
179
- return
180
- }
181
- l.file = f
182
- }
183
-
184
- func (l *Logger) Close() {
185
- if l.stopTimer != nil {
186
- close(l.stopTimer)
187
- l.stopTimer = nil
188
- }
189
- l.mu.Lock()
190
- defer l.mu.Unlock()
191
- if l.file != nil {
192
- l.file.Close()
193
- l.file = nil
194
- }
195
- }