sensorium-mcp 2.17.26 → 2.17.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/dashboard/routes/threads.d.ts.map +1 -1
  2. package/dist/dashboard/routes/threads.js +18 -5
  3. package/dist/dashboard/routes/threads.js.map +1 -1
  4. package/dist/data/memory/bootstrap.js +2 -2
  5. package/dist/data/memory/bootstrap.js.map +1 -1
  6. package/dist/data/memory/consolidation.d.ts.map +1 -1
  7. package/dist/data/memory/consolidation.js +75 -4
  8. package/dist/data/memory/consolidation.js.map +1 -1
  9. package/dist/data/memory/index.d.ts +1 -0
  10. package/dist/data/memory/index.d.ts.map +1 -1
  11. package/dist/data/memory/index.js +1 -0
  12. package/dist/data/memory/index.js.map +1 -1
  13. package/dist/data/memory/quality-scoring.d.ts +32 -0
  14. package/dist/data/memory/quality-scoring.d.ts.map +1 -0
  15. package/dist/data/memory/quality-scoring.js +182 -0
  16. package/dist/data/memory/quality-scoring.js.map +1 -0
  17. package/dist/data/memory/semantic.d.ts +12 -0
  18. package/dist/data/memory/semantic.d.ts.map +1 -1
  19. package/dist/data/memory/semantic.js +45 -2
  20. package/dist/data/memory/semantic.js.map +1 -1
  21. package/dist/data/memory/thread-registry.d.ts +7 -0
  22. package/dist/data/memory/thread-registry.d.ts.map +1 -1
  23. package/dist/data/memory/thread-registry.js +11 -1
  24. package/dist/data/memory/thread-registry.js.map +1 -1
  25. package/dist/index.js +17 -5
  26. package/dist/index.js.map +1 -1
  27. package/dist/tools/defs/memory-defs.d.ts.map +1 -1
  28. package/dist/tools/defs/memory-defs.js +19 -0
  29. package/dist/tools/defs/memory-defs.js.map +1 -1
  30. package/dist/tools/memory-tools.d.ts.map +1 -1
  31. package/dist/tools/memory-tools.js +15 -0
  32. package/dist/tools/memory-tools.js.map +1 -1
  33. package/dist/tools/thread-lifecycle.d.ts.map +1 -1
  34. package/dist/tools/thread-lifecycle.js +31 -17
  35. package/dist/tools/thread-lifecycle.js.map +1 -1
  36. package/package.json +10 -2
  37. package/scripts/install-supervisor.ps1 +67 -0
  38. package/scripts/install-supervisor.sh +43 -0
  39. package/scripts/start-supervisor.ps1 +46 -0
  40. package/scripts/start-supervisor.sh +20 -0
  41. package/supervisor/config.go +140 -0
  42. package/supervisor/go.mod +3 -0
  43. package/supervisor/health.go +390 -0
  44. package/supervisor/health_test.go +93 -0
  45. package/supervisor/keeper.go +303 -0
  46. package/supervisor/keeper_test.go +27 -0
  47. package/supervisor/lock.go +56 -0
  48. package/supervisor/lock_test.go +54 -0
  49. package/supervisor/log.go +114 -0
  50. package/supervisor/log_test.go +45 -0
  51. package/supervisor/main.go +325 -0
  52. package/supervisor/notify.go +53 -0
  53. package/supervisor/process.go +222 -0
  54. package/supervisor/process_test.go +94 -0
  55. package/supervisor/process_unix.go +14 -0
  56. package/supervisor/process_windows.go +15 -0
  57. package/supervisor/updater.go +281 -0
  58. package/templates/coding-task.default.md +12 -0
  59. package/dist/claude-keeper.d.ts +0 -24
  60. package/dist/claude-keeper.d.ts.map +0 -1
  61. package/dist/claude-keeper.js +0 -374
  62. package/dist/claude-keeper.js.map +0 -1
  63. package/dist/watcher-service.d.ts +0 -2
  64. package/dist/watcher-service.d.ts.map +0 -1
  65. package/dist/watcher-service.js +0 -997
  66. package/dist/watcher-service.js.map +0 -1
@@ -0,0 +1,303 @@
1
+ package main
2
+
3
+ import (
4
+ "context"
5
+ "encoding/json"
6
+ "fmt"
7
+ "math"
8
+ "sync"
9
+ "time"
10
+ )
11
+
12
+ // KeeperConfig describes a thread that must be kept alive.
13
+ type KeeperConfig struct {
14
+ ThreadID int
15
+ SessionName string
16
+ Client string // e.g. "claude-code", "codex"
17
+ WorkingDirectory string
18
+ MaxRetries int
19
+ CooldownMs int
20
+ }
21
+
22
+ // Keeper supervises a single thread, restarting it via the MCP server's
23
+ // start_thread tool when it stops running. One goroutine per keeper.
24
+ type Keeper struct {
25
+ cfg KeeperConfig
26
+ global Config
27
+ mcp *MCPClient
28
+ log *Logger
29
+ onDeath func(threadID int, sessionName string)
30
+
31
+ mu sync.Mutex
32
+ stopped bool
33
+ cancel context.CancelFunc
34
+ done chan struct{}
35
+ }
36
+
37
+ func NewKeeper(cfg KeeperConfig, global Config, mcp *MCPClient, log *Logger, onDeath func(int, string)) *Keeper {
38
+ maxRetries := cfg.MaxRetries
39
+ if maxRetries <= 0 {
40
+ maxRetries = global.KeeperMaxRetries
41
+ }
42
+ cfg.MaxRetries = maxRetries
43
+ return &Keeper{
44
+ cfg: cfg,
45
+ global: global,
46
+ mcp: mcp,
47
+ log: log,
48
+ onDeath: onDeath,
49
+ done: make(chan struct{}),
50
+ }
51
+ }
52
+
53
+ // Start begins the keeper loop in a separate goroutine.
54
+ func (k *Keeper) Start() {
55
+ ctx, cancel := context.WithCancel(context.Background())
56
+ k.mu.Lock()
57
+ k.cancel = cancel
58
+ k.mu.Unlock()
59
+
60
+ go k.run(ctx)
61
+ }
62
+
63
+ // Stop signals the keeper to shut down and waits for it to finish.
64
+ func (k *Keeper) Stop() {
65
+ k.mu.Lock()
66
+ k.stopped = true
67
+ if k.cancel != nil {
68
+ k.cancel()
69
+ }
70
+ k.mu.Unlock()
71
+ <-k.done
72
+ }
73
+
74
+ func (k *Keeper) isStopped() bool {
75
+ k.mu.Lock()
76
+ defer k.mu.Unlock()
77
+ return k.stopped
78
+ }
79
+
80
+ func (k *Keeper) run(ctx context.Context) {
81
+ defer close(k.done)
82
+ defer func() {
83
+ if r := recover(); r != nil {
84
+ k.log.Error("Keeper panicked for thread %d: %v", k.cfg.ThreadID, r)
85
+ }
86
+ }()
87
+ defer k.log.Info("Keeper stopped for thread %d", k.cfg.ThreadID)
88
+
89
+ k.log.Info("Keeper started for thread %d ('%s') [client=%s]", k.cfg.ThreadID, k.cfg.SessionName, k.cfg.Client)
90
+
91
+ // Wait for MCP server to be ready
92
+ ready := k.mcp.WaitForReady(ctx, k.global.KeeperReadyPollInterval, k.global.KeeperReadyTimeout)
93
+ if !ready && !k.isStopped() {
94
+ k.log.Warn("MCP server not ready after %v — attempting start_thread anyway", k.global.KeeperReadyTimeout)
95
+ }
96
+
97
+ retryCount := 0
98
+ fastExitCount := 0
99
+ fastExitEscalation := 0
100
+ var lastStartTime time.Time
101
+ activeThreadID := k.cfg.ThreadID // may differ from root after start_thread
102
+
103
+ checkAndStart := func() {
104
+ if k.isStopped() {
105
+ return
106
+ }
107
+
108
+ // Check if thread is running — use activeThreadID (worker) not root
109
+ running := k.mcp.IsThreadRunning(ctx, activeThreadID)
110
+ if running {
111
+ // Check if stuck
112
+ if k.mcp.IsThreadStuck(ctx, activeThreadID, k.global.StuckThreshold) {
113
+ k.log.Warn("Thread %d (worker %d) is stuck (no heartbeat for %v) — restarting", k.cfg.ThreadID, activeThreadID, k.global.StuckThreshold)
114
+ // Kill via MCP API, then fall through to restart
115
+ k.killThread(ctx, activeThreadID)
116
+ activeThreadID = k.cfg.ThreadID // reset — will get new worker ID on restart
117
+ } else {
118
+ // Healthy — reset counters
119
+ if retryCount > 0 {
120
+ k.log.Info("Thread %d is healthy again (was at retry %d)", k.cfg.ThreadID, retryCount)
121
+ } else {
122
+ k.log.Debug("Thread %d is healthy", k.cfg.ThreadID)
123
+ }
124
+ retryCount = 0
125
+ return
126
+ }
127
+ }
128
+
129
+ // Thread is not running (or was stuck and killed)
130
+ if retryCount >= k.cfg.MaxRetries {
131
+ cooldown := k.global.KeeperCooldown
132
+ if k.cfg.CooldownMs > 0 {
133
+ cooldown = time.Duration(k.cfg.CooldownMs) * time.Millisecond
134
+ }
135
+ k.log.Warn("Max retries (%d) exceeded — cooling down for %v", k.cfg.MaxRetries, cooldown)
136
+ if k.onDeath != nil {
137
+ k.onDeath(k.cfg.ThreadID, k.cfg.SessionName)
138
+ }
139
+ k.sleep(ctx, cooldown)
140
+ retryCount = 0
141
+ fastExitCount = 0
142
+ return
143
+ }
144
+
145
+ k.log.Info("Thread %d not running — calling start_thread (attempt %d/%d)", k.cfg.ThreadID, retryCount+1, k.cfg.MaxRetries)
146
+
147
+ // Re-verify root still has keepAlive before restarting
148
+ // (user may have archived the worker, which disables keepAlive on the root)
149
+ if !k.isRootKeepAlive(ctx) {
150
+ k.log.Info("Thread %d root has keepAlive=false — stopping keeper", k.cfg.ThreadID)
151
+ k.mu.Lock()
152
+ k.stopped = true
153
+ k.mu.Unlock()
154
+ return
155
+ }
156
+
157
+ lastStartTime = time.Now()
158
+ ok, workerID := k.callStartThread(ctx)
159
+
160
+ if ok {
161
+ if workerID > 0 {
162
+ activeThreadID = workerID
163
+ k.log.Info("Thread %d start_thread succeeded (worker %d)", k.cfg.ThreadID, workerID)
164
+ } else {
165
+ k.log.Info("Thread %d start_thread succeeded", k.cfg.ThreadID)
166
+ }
167
+ retryCount = 0
168
+ // Check for fast exit on next check
169
+ } else {
170
+ // Check for fast exit pattern
171
+ if !lastStartTime.IsZero() && time.Since(lastStartTime) < k.global.FastExitThreshold {
172
+ fastExitCount++
173
+ if fastExitCount >= k.global.FastExitMaxCount {
174
+ cooldown := time.Duration(float64(k.global.FastExitBaseCooldown) * math.Pow(2, float64(fastExitEscalation)))
175
+ if cooldown > k.global.FastExitMaxCooldown {
176
+ cooldown = k.global.FastExitMaxCooldown
177
+ }
178
+ k.log.Warn("Thread %d: %d consecutive fast exits — backing off %v", k.cfg.ThreadID, fastExitCount, cooldown)
179
+ if k.onDeath != nil {
180
+ k.onDeath(k.cfg.ThreadID, k.cfg.SessionName+" (repeated fast exits — check credits/API key)")
181
+ }
182
+ fastExitEscalation++
183
+ k.sleep(ctx, cooldown)
184
+ fastExitCount = 0
185
+ retryCount = 0
186
+ return
187
+ }
188
+ } else {
189
+ fastExitCount = 0
190
+ fastExitEscalation = 0
191
+ }
192
+
193
+ retryCount++
194
+ delay := k.backoff(retryCount)
195
+ k.log.Info("Backing off %v before next attempt", delay)
196
+ k.sleep(ctx, delay)
197
+ }
198
+ }
199
+
200
+ // Initial check
201
+ checkAndStart()
202
+
203
+ // Health check loop
204
+ for {
205
+ if k.isStopped() {
206
+ return
207
+ }
208
+
209
+ select {
210
+ case <-ctx.Done():
211
+ return
212
+ case <-time.After(k.global.KeeperHealthCheckInterval):
213
+ checkAndStart()
214
+ }
215
+ }
216
+ }
217
+
218
+ // callStartThread starts the thread via MCP. Returns (success, workerThreadID).
219
+ // workerThreadID is extracted from the response JSON when available.
220
+ func (k *Keeper) callStartThread(ctx context.Context) (bool, int) {
221
+ sessionID, err := k.mcp.OpenMCPSession(ctx)
222
+ if err != nil {
223
+ k.log.Error("Failed to open MCP session: %v", err)
224
+ return false, 0
225
+ }
226
+ defer k.mcp.CloseMCPSession(ctx, sessionID)
227
+
228
+ text, err := k.mcp.CallStartThread(ctx, sessionID, k.cfg.ThreadID, k.cfg.SessionName, k.cfg.Client, k.cfg.WorkingDirectory)
229
+ if err != nil {
230
+ k.log.Error("start_thread failed: %v", err)
231
+ return false, 0
232
+ }
233
+
234
+ if text != "" {
235
+ k.log.Info("start_thread response: %.200s", text)
236
+ }
237
+
238
+ // Parse the worker threadId from the response JSON
239
+ workerID := parseWorkerThreadID(text)
240
+ return true, workerID
241
+ }
242
+
243
+ func (k *Keeper) killThread(ctx context.Context, threadID int) {
244
+ k.log.Info("Killing stuck thread %d", threadID)
245
+ // Read PID from thread PID file
246
+ pidFile := k.global.Paths.PIDsDir + "/" + fmt.Sprintf("%d.pid", threadID)
247
+ pid, err := ReadPIDFile(pidFile)
248
+ if err != nil {
249
+ k.log.Warn("Cannot read PID for thread %d: %v", k.cfg.ThreadID, err)
250
+ return
251
+ }
252
+ if err := KillProcess(pid, k.log); err != nil {
253
+ k.log.Error("Failed to kill thread %d (PID %d): %v", k.cfg.ThreadID, pid, err)
254
+ }
255
+ }
256
+
257
+ func (k *Keeper) backoff(retry int) time.Duration {
258
+ delay := time.Duration(float64(k.global.KeeperBaseBackoff) * math.Pow(2, float64(retry)))
259
+ if delay > k.global.KeeperMaxBackoff {
260
+ delay = k.global.KeeperMaxBackoff
261
+ }
262
+ return delay
263
+ }
264
+
265
+ func (k *Keeper) sleep(ctx context.Context, d time.Duration) {
266
+ select {
267
+ case <-ctx.Done():
268
+ case <-time.After(d):
269
+ }
270
+ }
271
+
272
+ // isRootKeepAlive checks whether the root thread still has keepAlive=true.
273
+ func (k *Keeper) isRootKeepAlive(ctx context.Context) bool {
274
+ roots, err := k.mcp.GetRootThreads(ctx)
275
+ if err != nil {
276
+ k.log.Debug("isRootKeepAlive(%d): failed to fetch roots: %v — assuming still alive", k.cfg.ThreadID, err)
277
+ return true // fail-open: don't stop keeper if we can't check
278
+ }
279
+ for _, r := range roots {
280
+ tidFloat, _ := r["threadId"].(float64)
281
+ if int(tidFloat) == k.cfg.ThreadID {
282
+ keepAlive, _ := r["keepAlive"].(bool)
283
+ status, _ := r["status"].(string)
284
+ return keepAlive && (status == "" || status == "active")
285
+ }
286
+ }
287
+ k.log.Debug("isRootKeepAlive(%d): root thread not found in response", k.cfg.ThreadID)
288
+ return false // root thread gone → stop keeper
289
+ }
290
+
291
+ // parseWorkerThreadID extracts the "threadId" field from a start_thread JSON response.
292
+ func parseWorkerThreadID(text string) int {
293
+ if text == "" {
294
+ return 0
295
+ }
296
+ var resp struct {
297
+ ThreadID int `json:"threadId"`
298
+ }
299
+ if json.Unmarshal([]byte(text), &resp) == nil && resp.ThreadID > 0 {
300
+ return resp.ThreadID
301
+ }
302
+ return 0
303
+ }
@@ -0,0 +1,27 @@
1
+ package main
2
+
3
+ import "testing"
4
+
5
+ func TestParseWorkerThreadID(t *testing.T) {
6
+ tests := []struct {
7
+ name string
8
+ text string
9
+ want int
10
+ }{
11
+ {"normal response", `{"threadId":11226,"status":"restarted","name":"Sensorium 2","pid":87108}`, 11226},
12
+ {"already_running", `{"threadId":11226,"status":"already_running","name":"Sensorium 2","pid":40568}`, 11226},
13
+ {"empty string", "", 0},
14
+ {"no threadId", `{"status":"error"}`, 0},
15
+ {"threadId zero", `{"threadId":0}`, 0},
16
+ {"invalid JSON", `not json`, 0},
17
+ {"negative threadId", `{"threadId":-5}`, 0},
18
+ }
19
+ for _, tt := range tests {
20
+ t.Run(tt.name, func(t *testing.T) {
21
+ got := parseWorkerThreadID(tt.text)
22
+ if got != tt.want {
23
+ t.Errorf("parseWorkerThreadID(%q) = %d, want %d", tt.text, got, tt.want)
24
+ }
25
+ })
26
+ }
27
+ }
@@ -0,0 +1,56 @@
1
+ package main
2
+
3
+ import (
4
+ "fmt"
5
+ "os"
6
+ "strconv"
7
+ "strings"
8
+ )
9
+
10
+ // AcquireLock creates a lock file to prevent multiple supervisor instances.
11
+ // Uses O_CREATE|O_EXCL for atomic creation. If a stale lock exists (PID not
12
+ // running), it reclaims the lock.
13
+ func AcquireLock(lockPath string, log *Logger) bool {
14
+ // Try atomic create first
15
+ f, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
16
+ if err == nil {
17
+ // Lock acquired — write our PID
18
+ fmt.Fprintf(f, "%d", os.Getpid())
19
+ f.Close()
20
+ log.Info("Lock acquired: %s (PID %d)", lockPath, os.Getpid())
21
+ return true
22
+ }
23
+
24
+ // Lock file exists — check if the PID is still alive
25
+ data, err := os.ReadFile(lockPath)
26
+ if err != nil {
27
+ log.Error("Failed to read lockfile %s: %v", lockPath, err)
28
+ return false
29
+ }
30
+
31
+ pidStr := strings.TrimSpace(string(data))
32
+ pid, err := strconv.Atoi(pidStr)
33
+ if err == nil && pid > 0 && IsProcessAlive(pid) {
34
+ log.Error("Another supervisor is running (PID %d). Lockfile: %s", pid, lockPath)
35
+ return false
36
+ }
37
+
38
+ // Stale lock — reclaim
39
+ log.Warn("Reclaimed stale supervisor lockfile (old PID %s)", pidStr)
40
+ _ = os.Remove(lockPath)
41
+
42
+ // Re-acquire atomically
43
+ f, err = os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
44
+ if err != nil {
45
+ log.Error("Failed to acquire lockfile after reclaim: %v", err)
46
+ return false
47
+ }
48
+ fmt.Fprintf(f, "%d", os.Getpid())
49
+ f.Close()
50
+ return true
51
+ }
52
+
53
+ // ReleaseLock removes the lock file.
54
+ func ReleaseLock(lockPath string) {
55
+ _ = os.Remove(lockPath)
56
+ }
@@ -0,0 +1,54 @@
1
+ package main
2
+
3
+ import (
4
+ "os"
5
+ "path/filepath"
6
+ "testing"
7
+ )
8
+
9
+ func TestAcquireLock_Fresh(t *testing.T) {
10
+ dir := t.TempDir()
11
+ lockPath := filepath.Join(dir, "test.lock")
12
+ log := NewLogger("")
13
+
14
+ if !AcquireLock(lockPath, log) {
15
+ t.Fatal("expected lock acquisition to succeed")
16
+ }
17
+ defer ReleaseLock(lockPath)
18
+
19
+ // Verify lock file was created with our PID
20
+ data, err := os.ReadFile(lockPath)
21
+ if err != nil {
22
+ t.Fatalf("lock file not created: %v", err)
23
+ }
24
+ if len(data) == 0 {
25
+ t.Fatal("lock file is empty")
26
+ }
27
+ }
28
+
29
+ func TestAcquireLock_StalePID(t *testing.T) {
30
+ dir := t.TempDir()
31
+ lockPath := filepath.Join(dir, "test.lock")
32
+ log := NewLogger("")
33
+
34
+ // Write a stale lock with PID 1 (guaranteed to not be a supervisor)
35
+ // Use PID 99999999 which is almost certainly not running
36
+ os.WriteFile(lockPath, []byte("99999999"), 0644)
37
+
38
+ if !AcquireLock(lockPath, log) {
39
+ t.Fatal("expected stale lock to be reclaimed")
40
+ }
41
+ defer ReleaseLock(lockPath)
42
+ }
43
+
44
+ func TestReleaseLock(t *testing.T) {
45
+ dir := t.TempDir()
46
+ lockPath := filepath.Join(dir, "test.lock")
47
+ os.WriteFile(lockPath, []byte("12345"), 0644)
48
+
49
+ ReleaseLock(lockPath)
50
+
51
+ if _, err := os.Stat(lockPath); !os.IsNotExist(err) {
52
+ t.Error("expected lock file to be removed")
53
+ }
54
+ }
@@ -0,0 +1,114 @@
1
+ package main
2
+
3
+ import (
4
+ "fmt"
5
+ "os"
6
+ "sync"
7
+ "time"
8
+ )
9
+
10
+ // Logger writes to both stderr and a rotating log file.
11
+ // Rotates when the file exceeds maxSize bytes.
12
+ type Logger struct {
13
+ mu sync.Mutex
14
+ logPath string
15
+ file *os.File
16
+ debug bool
17
+ size int64
18
+ maxSize int64 // default 5 MB
19
+ maxKeep int // max rotated files to keep
20
+ }
21
+
22
+ func NewLogger(logPath string) *Logger {
23
+ l := &Logger{
24
+ logPath: logPath,
25
+ debug: os.Getenv("SUPERVISOR_DEBUG") == "1" || os.Getenv("SUPERVISOR_DEBUG") == "true",
26
+ maxSize: 5 * 1024 * 1024, // 5 MB
27
+ maxKeep: 3,
28
+ }
29
+ l.openFile()
30
+ return l
31
+ }
32
+
33
+ func (l *Logger) openFile() {
34
+ if l.logPath == "" {
35
+ return
36
+ }
37
+ f, err := os.OpenFile(l.logPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
38
+ if err != nil {
39
+ fmt.Fprintf(os.Stderr, "[WARN] cannot open log file %s: %v\n", l.logPath, err)
40
+ return
41
+ }
42
+ l.file = f
43
+ // Seed current size for rotation checks
44
+ if info, err := f.Stat(); err == nil {
45
+ l.size = info.Size()
46
+ }
47
+ }
48
+
49
+ func (l *Logger) log(level, format string, args ...any) {
50
+ ts := time.Now().Format("2006-01-02 15:04:05")
51
+ msg := fmt.Sprintf(format, args...)
52
+ line := fmt.Sprintf("[%s] [%s] %s\n", ts, level, msg)
53
+
54
+ l.mu.Lock()
55
+ defer l.mu.Unlock()
56
+ fmt.Fprint(os.Stderr, line)
57
+ if l.file != nil {
58
+ n, err := l.file.WriteString(line)
59
+ if err != nil {
60
+ fmt.Fprintf(os.Stderr, "[ERR] log write failed: %v\n", err)
61
+ }
62
+ l.size += int64(n)
63
+ if l.maxSize > 0 && l.size >= l.maxSize {
64
+ l.rotate()
65
+ }
66
+ }
67
+ }
68
+
69
+ func (l *Logger) Info(format string, args ...any) { l.log("INFO", format, args...) }
70
+ func (l *Logger) Warn(format string, args ...any) { l.log("WARN", format, args...) }
71
+ func (l *Logger) Error(format string, args ...any) { l.log("ERROR", format, args...) }
72
+ func (l *Logger) Debug(format string, args ...any) {
73
+ if l.debug {
74
+ l.log("DEBUG", format, args...)
75
+ }
76
+ }
77
+
78
+ // rotate closes the current log file, renames it with a .1 suffix (shifting
79
+ // older rotated files), and opens a fresh log. Called with mu held.
80
+ func (l *Logger) rotate() {
81
+ if l.file != nil {
82
+ l.file.Close()
83
+ l.file = nil
84
+ }
85
+
86
+ // Shift existing rotated logs: .3 → delete, .2 → .3, .1 → .2, current → .1
87
+ for i := l.maxKeep; i >= 1; i-- {
88
+ old := fmt.Sprintf("%s.%d", l.logPath, i)
89
+ if i == l.maxKeep {
90
+ os.Remove(old)
91
+ } else {
92
+ os.Rename(old, fmt.Sprintf("%s.%d", l.logPath, i+1))
93
+ }
94
+ }
95
+ os.Rename(l.logPath, l.logPath+".1")
96
+
97
+ // Open a fresh file
98
+ l.size = 0
99
+ f, err := os.OpenFile(l.logPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
100
+ if err != nil {
101
+ fmt.Fprintf(os.Stderr, "[WARN] log rotate: cannot create fresh log: %v\n", err)
102
+ return
103
+ }
104
+ l.file = f
105
+ }
106
+
107
+ func (l *Logger) Close() {
108
+ l.mu.Lock()
109
+ defer l.mu.Unlock()
110
+ if l.file != nil {
111
+ l.file.Close()
112
+ l.file = nil
113
+ }
114
+ }
@@ -0,0 +1,45 @@
1
+ package main
2
+
3
+ import (
4
+ "os"
5
+ "path/filepath"
6
+ "strings"
7
+ "testing"
8
+ )
9
+
10
+ func TestLogRotation(t *testing.T) {
11
+ dir := t.TempDir()
12
+ logPath := filepath.Join(dir, "test.log")
13
+
14
+ l := &Logger{
15
+ logPath: logPath,
16
+ maxSize: 100, // tiny threshold for test
17
+ maxKeep: 2,
18
+ }
19
+ l.openFile()
20
+ defer l.Close()
21
+
22
+ // Write enough lines to trigger multiple rotations
23
+ for i := 0; i < 50; i++ {
24
+ l.Info("line %d: %s", i, strings.Repeat("x", 20))
25
+ }
26
+
27
+ // Current log should exist and be under maxSize
28
+ info, err := os.Stat(logPath)
29
+ if err != nil {
30
+ t.Fatalf("log file missing: %v", err)
31
+ }
32
+ if info.Size() >= 100 {
33
+ t.Errorf("log file should have been rotated, size=%d", info.Size())
34
+ }
35
+
36
+ // At least .1 should exist
37
+ if _, err := os.Stat(logPath + ".1"); err != nil {
38
+ t.Error("expected .1 rotated file to exist")
39
+ }
40
+
41
+ // .3 should NOT exist (maxKeep=2)
42
+ if _, err := os.Stat(logPath + ".3"); err == nil {
43
+ t.Error("expected .3 file to not exist (maxKeep=2)")
44
+ }
45
+ }