@dmsdc-ai/aigentry-telepty 0.1.95 → 0.1.97
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +16 -8
- package/daemon.js +43 -20
- package/package.json +4 -4
- package/session-state.js +154 -51
- package/skills/telepty/SKILL.md +39 -118
- package/skills/telepty-allow/SKILL.md +78 -0
- package/skills/telepty-attach/SKILL.md +52 -0
- package/skills/telepty-broadcast/SKILL.md +63 -0
- package/skills/telepty-daemon/SKILL.md +94 -0
- package/skills/telepty-inject/SKILL.md +93 -0
- package/skills/telepty-list/SKILL.md +81 -0
- package/skills/telepty-listen/SKILL.md +86 -0
- package/skills/telepty-rename/SKILL.md +63 -0
- package/skills/telepty-session/SKILL.md +83 -0
- package/src/mailbox/config.js +4 -0
- package/src/mailbox/delivery.js +93 -32
- package/src/mailbox/index.js +11 -0
- package/src/mailbox/storage.js +84 -5
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: telepty-listen
|
|
3
|
+
description: Monitor telepty events and read session screen output. Covers listen (event bus) and read-screen commands.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# telepty-listen — Event Monitoring and Screen Reading
|
|
7
|
+
|
|
8
|
+
## listen — Subscribe to event bus
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
telepty listen
|
|
12
|
+
telepty listen --json
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Natural-language: "이벤트 보여줘", "listen to the bus", "monitor session events"
|
|
16
|
+
|
|
17
|
+
Connects to the daemon's WebSocket event bus and streams all events in real-time.
|
|
18
|
+
|
|
19
|
+
### Event types
|
|
20
|
+
|
|
21
|
+
| Event | Description |
|
|
22
|
+
|-------|-------------|
|
|
23
|
+
| `session_health` | Periodic health status for all sessions |
|
|
24
|
+
| `inject_written` | Message delivered to a session |
|
|
25
|
+
| `inject_failed` | Delivery failure with error code |
|
|
26
|
+
| `session_register` | New session registered |
|
|
27
|
+
| `session_rename` | Session ID changed |
|
|
28
|
+
| `session_stale` | Session disconnected beyond stale threshold |
|
|
29
|
+
| `session_cleanup` | Stale session auto-removed |
|
|
30
|
+
| `submit` | Enter keystroke sent |
|
|
31
|
+
| `mailbox_delivered` | Mailbox message successfully delivered |
|
|
32
|
+
| `mailbox_delivery_failed` | Mailbox delivery failed, will retry |
|
|
33
|
+
|
|
34
|
+
### Examples
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Human-readable event stream
|
|
38
|
+
telepty listen
|
|
39
|
+
|
|
40
|
+
# JSON format for scripting
|
|
41
|
+
telepty listen --json
|
|
42
|
+
|
|
43
|
+
# Filter specific events with jq
|
|
44
|
+
telepty listen --json | jq 'select(.type == "inject_written")'
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## read-screen — Read session screen buffer
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
telepty read-screen <session_id> [--lines N] [--raw]
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Natural-language: "세션 화면 읽어줘", "what's on the analyst's screen", "read screen output"
|
|
54
|
+
|
|
55
|
+
Reads the last N lines of a session's PTY output buffer (default: 50 lines).
|
|
56
|
+
|
|
57
|
+
### Options
|
|
58
|
+
|
|
59
|
+
| Flag | Description |
|
|
60
|
+
|------|-------------|
|
|
61
|
+
| `--lines N` | Number of lines to read (default: 50) |
|
|
62
|
+
| `--raw` | Return raw output with ANSI escape sequences |
|
|
63
|
+
|
|
64
|
+
### Examples
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Read last 50 lines (cleaned)
|
|
68
|
+
telepty read-screen my-claude
|
|
69
|
+
|
|
70
|
+
# Read last 100 lines
|
|
71
|
+
telepty read-screen my-claude --lines 100
|
|
72
|
+
|
|
73
|
+
# Raw output with escape sequences
|
|
74
|
+
telepty read-screen my-claude --raw
|
|
75
|
+
|
|
76
|
+
# Use in scripts
|
|
77
|
+
SCREEN=$(telepty read-screen my-claude --lines 10)
|
|
78
|
+
echo "$SCREEN" | grep "error"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Common Errors
|
|
82
|
+
|
|
83
|
+
| Error | Cause | Fix |
|
|
84
|
+
|-------|-------|-----|
|
|
85
|
+
| `Session not found` | Session doesn't exist | Check `telepty list` |
|
|
86
|
+
| `(empty screen)` | No output captured yet | Wait for session to produce output |
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: telepty-rename
|
|
3
|
+
description: Rename, delete, and clean up telepty sessions. Session lifecycle management.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# telepty-rename — Session Lifecycle Management
|
|
7
|
+
|
|
8
|
+
## rename — Change a session's ID
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
telepty rename <old_id> <new_id>
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Natural-language: "세션 이름 바꿔줘", "rename the session"
|
|
15
|
+
|
|
16
|
+
Renames a session while preserving all state, connections, and attached clients. Publishes a `session_rename` event on the bus.
|
|
17
|
+
|
|
18
|
+
### Examples
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
telepty rename temp-session analyst-claude
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## delete — Remove a session
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
telepty delete <session_id>
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Natural-language: "세션 삭제해줘", "kill that session", "remove the dead session"
|
|
31
|
+
|
|
32
|
+
Forcefully closes the session's PTY process, disconnects all clients, and removes it from the daemon registry.
|
|
33
|
+
|
|
34
|
+
### Examples
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
telepty delete stale-session
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## clean — Remove ghost sessions
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
telepty clean
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Natural-language: "고스트 세션 정리해줘", "clean up stale sessions"
|
|
47
|
+
|
|
48
|
+
Scans all sessions and removes those with `STALE` or `DISCONNECTED` health status. Safe to run periodically.
|
|
49
|
+
|
|
50
|
+
### Example output
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
🗑 Removed ghost: old-brain-claude (STALE)
|
|
54
|
+
🗑 Removed ghost: temp-test (DISCONNECTED)
|
|
55
|
+
✅ Cleaned 2 ghost session(s).
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Common Errors
|
|
59
|
+
|
|
60
|
+
| Error | Cause | Fix |
|
|
61
|
+
|-------|-------|-----|
|
|
62
|
+
| `Session not found` | Session doesn't exist or already removed | Check `telepty list` |
|
|
63
|
+
| `Session ID already active` | New name conflicts with existing session | Choose a different name |
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: telepty-session
|
|
3
|
+
description: Multi-session orchestration — start multiple sessions at once and arrange terminal layouts. Covers session start and layout commands.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# telepty-session — Multi-Session Orchestration
|
|
7
|
+
|
|
8
|
+
## session start — Launch multiple sessions
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
telepty session start [--launch]
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Natural-language: "세션 여러 개 시작해줘", "start all sessions", "launch the ecosystem"
|
|
15
|
+
|
|
16
|
+
Starts pre-configured sessions (from aigentry ecosystem or custom config). With `--launch`, opens each session in a new terminal tab/window.
|
|
17
|
+
|
|
18
|
+
### Options
|
|
19
|
+
|
|
20
|
+
| Flag | Description |
|
|
21
|
+
|------|-------------|
|
|
22
|
+
| `--launch` | Open each session in a new kitty/ghostty tab |
|
|
23
|
+
|
|
24
|
+
### Examples
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
# Start sessions interactively
|
|
28
|
+
telepty session start
|
|
29
|
+
|
|
30
|
+
# Start and launch in terminal tabs
|
|
31
|
+
telepty session start --launch
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## layout — Arrange terminal windows in a grid
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
telepty layout [columns]
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Natural-language: "터미널 배치해줘", "arrange the windows", "layout the sessions"
|
|
41
|
+
|
|
42
|
+
Arranges all terminal windows in a grid layout on the screen. Defaults to auto-calculated columns based on session count.
|
|
43
|
+
|
|
44
|
+
### Examples
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# Auto-layout
|
|
48
|
+
telepty layout
|
|
49
|
+
|
|
50
|
+
# Force 3-column grid
|
|
51
|
+
telepty layout 3
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## session info — Detailed session metadata
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
telepty session info <session_id>
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Shows comprehensive session details including:
|
|
61
|
+
- Session type, command, CWD
|
|
62
|
+
- Terminal detection (ghostty, kitty, aterm)
|
|
63
|
+
- Health status and reason
|
|
64
|
+
- Transport block (delivery endpoint, backend)
|
|
65
|
+
- Semantic state (phase, current task, blocker)
|
|
66
|
+
- Mailbox stats (pending, dead-letter count)
|
|
67
|
+
|
|
68
|
+
## deliberate — Start multi-session deliberation
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
telepty deliberate "<topic>"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Natural-language: "토론 시작해줘", "start a deliberation"
|
|
75
|
+
|
|
76
|
+
Initiates a structured multi-session deliberation thread on the given topic.
|
|
77
|
+
|
|
78
|
+
## Common Errors
|
|
79
|
+
|
|
80
|
+
| Error | Cause | Fix |
|
|
81
|
+
|-------|-------|-----|
|
|
82
|
+
| `No sessions configured` | No aigentry session config found | Configure sessions first |
|
|
83
|
+
| `Terminal not supported` | Layout requires kitty or ghostty | Use a supported terminal |
|
package/src/mailbox/config.js
CHANGED
|
@@ -27,6 +27,10 @@ const DEFAULTS = {
|
|
|
27
27
|
deliveryPollMs: 200,
|
|
28
28
|
/** Notification coalesce window in ms. */
|
|
29
29
|
notifyCoalesceMs: 25,
|
|
30
|
+
/** Lock age threshold in seconds — break locks older than this (handles PID recycling). */
|
|
31
|
+
staleLockAgeSecs: 60,
|
|
32
|
+
/** Force-break lock after this many consecutive lock timeout failures per session. */
|
|
33
|
+
lockBreakAfterFailures: 3,
|
|
30
34
|
};
|
|
31
35
|
|
|
32
36
|
function createConfig(overrides = {}) {
|
package/src/mailbox/delivery.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
3
6
|
/**
|
|
4
7
|
* DeliveryEngine — polls mailbox for pending messages and delivers them.
|
|
5
8
|
*
|
|
@@ -9,6 +12,7 @@
|
|
|
9
12
|
* Also handles:
|
|
10
13
|
* - In-flight timeout recovery (auto-nack stuck messages)
|
|
11
14
|
* - TTL expiry (expire stale pending messages)
|
|
15
|
+
* - Stale lock detection: consecutive failure tracking + exponential backoff
|
|
12
16
|
*/
|
|
13
17
|
class DeliveryEngine {
|
|
14
18
|
/**
|
|
@@ -28,6 +32,10 @@ class DeliveryEngine {
|
|
|
28
32
|
this._timer = null;
|
|
29
33
|
this._running = false;
|
|
30
34
|
this._tickInProgress = false;
|
|
35
|
+
// Fix 4: Per-session consecutive lock failure count
|
|
36
|
+
this._lockFailures = new Map(); // sessionId → count
|
|
37
|
+
// Fix 5: Per-session skip-until timestamp for backoff
|
|
38
|
+
this._skipUntil = new Map(); // sessionId → timestamp
|
|
31
39
|
}
|
|
32
40
|
|
|
33
41
|
/**
|
|
@@ -66,8 +74,15 @@ class DeliveryEngine {
|
|
|
66
74
|
|
|
67
75
|
try {
|
|
68
76
|
const sessionIds = this.sessionResolver();
|
|
77
|
+
const lockBreakThreshold = this.mailbox.config.lockBreakAfterFailures || 3;
|
|
69
78
|
|
|
70
79
|
for (const sessionId of sessionIds) {
|
|
80
|
+
// Fix 5: Skip sessions in backoff
|
|
81
|
+
const skipUntil = this._skipUntil.get(sessionId) || 0;
|
|
82
|
+
if (Date.now() < skipUntil) continue;
|
|
83
|
+
|
|
84
|
+
let lockFailed = false;
|
|
85
|
+
|
|
71
86
|
// 1. Recover in-flight timeouts
|
|
72
87
|
try {
|
|
73
88
|
const recovered = this.mailbox.recoverInflight(sessionId);
|
|
@@ -75,52 +90,98 @@ class DeliveryEngine {
|
|
|
75
90
|
console.log(`[MAILBOX] Recovered ${recovered} in-flight message(s) for ${sessionId}`);
|
|
76
91
|
}
|
|
77
92
|
} catch (err) {
|
|
78
|
-
|
|
93
|
+
if (err.message.includes('lock timeout')) {
|
|
94
|
+
lockFailed = true;
|
|
95
|
+
} else {
|
|
96
|
+
console.error(`[MAILBOX] recoverInflight error for ${sessionId}: ${err.message}`);
|
|
97
|
+
}
|
|
79
98
|
}
|
|
80
99
|
|
|
81
100
|
// 2. Expire stale messages
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
101
|
+
if (!lockFailed) {
|
|
102
|
+
try {
|
|
103
|
+
const expired = this.mailbox.expireStale(sessionId);
|
|
104
|
+
if (expired > 0) {
|
|
105
|
+
console.log(`[MAILBOX] Expired ${expired} stale message(s) for ${sessionId}`);
|
|
106
|
+
}
|
|
107
|
+
} catch (err) {
|
|
108
|
+
if (err.message.includes('lock timeout')) {
|
|
109
|
+
lockFailed = true;
|
|
110
|
+
} else {
|
|
111
|
+
console.error(`[MAILBOX] expireStale error for ${sessionId}: ${err.message}`);
|
|
112
|
+
}
|
|
86
113
|
}
|
|
87
|
-
} catch (err) {
|
|
88
|
-
console.error(`[MAILBOX] expireStale error for ${sessionId}: ${err.message}`);
|
|
89
114
|
}
|
|
90
115
|
|
|
91
116
|
// 3. Dequeue and deliver
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
117
|
+
if (!lockFailed) {
|
|
118
|
+
try {
|
|
119
|
+
const msg = this.mailbox.dequeue(sessionId);
|
|
120
|
+
if (!msg) {
|
|
121
|
+
// Success path (no message but lock acquired OK)
|
|
122
|
+
this._lockFailures.delete(sessionId);
|
|
123
|
+
this._skipUntil.delete(sessionId);
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
95
126
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
127
|
+
if (!this.deliverFn) {
|
|
128
|
+
// No delivery function — auto-ack (testing mode)
|
|
129
|
+
this.mailbox.ack(sessionId, msg.msg_id);
|
|
130
|
+
this._lockFailures.delete(sessionId);
|
|
131
|
+
this._skipUntil.delete(sessionId);
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
101
134
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
135
|
+
let result;
|
|
136
|
+
try {
|
|
137
|
+
result = await this.deliverFn(sessionId, msg);
|
|
138
|
+
} catch (err) {
|
|
139
|
+
result = { success: false, error: err.message };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (result && result.success) {
|
|
143
|
+
this.mailbox.ack(sessionId, msg.msg_id);
|
|
144
|
+
if (this.onDelivery) {
|
|
145
|
+
this.onDelivery(sessionId, msg.msg_id, { success: true });
|
|
146
|
+
}
|
|
147
|
+
} else {
|
|
148
|
+
const reason = (result && result.error) || 'delivery failed';
|
|
149
|
+
this.mailbox.nack(sessionId, msg.msg_id, reason);
|
|
150
|
+
console.log(`[MAILBOX] Delivery failed for ${sessionId}/${msg.msg_id}: ${reason} (attempt ${msg.attempt})`);
|
|
151
|
+
if (this.onDelivery) {
|
|
152
|
+
this.onDelivery(sessionId, msg.msg_id, { success: false, error: reason });
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Success path (lock was acquired)
|
|
157
|
+
this._lockFailures.delete(sessionId);
|
|
158
|
+
this._skipUntil.delete(sessionId);
|
|
105
159
|
} catch (err) {
|
|
106
|
-
|
|
160
|
+
if (err.message.includes('lock timeout')) {
|
|
161
|
+
lockFailed = true;
|
|
162
|
+
} else {
|
|
163
|
+
console.error(`[MAILBOX] Delivery loop error for ${sessionId}: ${err.message}`);
|
|
164
|
+
}
|
|
107
165
|
}
|
|
166
|
+
}
|
|
108
167
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
168
|
+
// Fix 4 & 5: Handle lock failure — track, force-break, backoff
|
|
169
|
+
if (lockFailed) {
|
|
170
|
+
const failCount = (this._lockFailures.get(sessionId) || 0) + 1;
|
|
171
|
+
this._lockFailures.set(sessionId, failCount);
|
|
172
|
+
|
|
173
|
+
// Fix 4: Force-break after N consecutive failures
|
|
174
|
+
if (failCount >= lockBreakThreshold) {
|
|
175
|
+
const lockPath = path.join(this.mailbox._sessionDir(sessionId), '.lock');
|
|
176
|
+
try { fs.unlinkSync(lockPath); } catch {}
|
|
177
|
+
console.warn(`[MAILBOX] Force-broke stale lock for ${sessionId} after ${failCount} consecutive failures`);
|
|
178
|
+
this._lockFailures.delete(sessionId);
|
|
179
|
+
this._skipUntil.delete(sessionId);
|
|
114
180
|
} else {
|
|
115
|
-
|
|
116
|
-
this.
|
|
117
|
-
|
|
118
|
-
if (this.onDelivery) {
|
|
119
|
-
this.onDelivery(sessionId, msg.msg_id, { success: false, error: reason });
|
|
120
|
-
}
|
|
181
|
+
// Fix 5: Exponential backoff — skip this session for increasing intervals
|
|
182
|
+
const backoffMs = Math.min(this.pollMs * (1 << failCount), 30000);
|
|
183
|
+
this._skipUntil.set(sessionId, Date.now() + backoffMs);
|
|
121
184
|
}
|
|
122
|
-
} catch (err) {
|
|
123
|
-
console.error(`[MAILBOX] Delivery loop error for ${sessionId}: ${err.message}`);
|
|
124
185
|
}
|
|
125
186
|
}
|
|
126
187
|
} finally {
|
package/src/mailbox/index.js
CHANGED
|
@@ -4,6 +4,7 @@ const path = require('path');
|
|
|
4
4
|
const fs = require('fs');
|
|
5
5
|
const {
|
|
6
6
|
acquireLock,
|
|
7
|
+
breakStaleLocks,
|
|
7
8
|
ensureSessionDir,
|
|
8
9
|
loadStates,
|
|
9
10
|
appendState,
|
|
@@ -278,6 +279,16 @@ class FileMailbox {
|
|
|
278
279
|
try { fs.writeFileSync(p, ''); } catch {}
|
|
279
280
|
}
|
|
280
281
|
|
|
282
|
+
/**
|
|
283
|
+
* Break stale lock files across all session directories.
|
|
284
|
+
* Call at daemon startup before DeliveryEngine.start().
|
|
285
|
+
* Returns count of broken locks.
|
|
286
|
+
*/
|
|
287
|
+
breakStaleLocks() {
|
|
288
|
+
const staleLockAgeMs = (this.config.staleLockAgeSecs || 60) * 1000;
|
|
289
|
+
return breakStaleLocks(this.config.root, { staleLockAgeMs });
|
|
290
|
+
}
|
|
291
|
+
|
|
281
292
|
/**
|
|
282
293
|
* List all session IDs that have a mailbox directory.
|
|
283
294
|
*/
|
package/src/mailbox/storage.js
CHANGED
|
@@ -7,6 +7,7 @@ const path = require('path');
|
|
|
7
7
|
|
|
8
8
|
const LOCK_POLL_MS = 10;
|
|
9
9
|
const LOCK_TIMEOUT_MS = 500;
|
|
10
|
+
const DEFAULT_STALE_LOCK_AGE_MS = 60000; // 60s — lock hold time is ~5ms, so 60s is definitionally stale
|
|
10
11
|
|
|
11
12
|
function isProcessAlive(pid) {
|
|
12
13
|
try {
|
|
@@ -20,10 +21,15 @@ function isProcessAlive(pid) {
|
|
|
20
21
|
/**
|
|
21
22
|
* Acquire an advisory lock for a session directory.
|
|
22
23
|
* Returns a release function. Throws on timeout.
|
|
24
|
+
*
|
|
25
|
+
* @param {string} sessionDir
|
|
26
|
+
* @param {Object} [options]
|
|
27
|
+
* @param {number} [options.staleLockAgeMs] — break locks older than this (default 60s)
|
|
23
28
|
*/
|
|
24
|
-
function acquireLock(sessionDir) {
|
|
29
|
+
function acquireLock(sessionDir, options = {}) {
|
|
25
30
|
const lockPath = path.join(sessionDir, '.lock');
|
|
26
31
|
const deadline = Date.now() + LOCK_TIMEOUT_MS;
|
|
32
|
+
const staleLockAgeMs = options.staleLockAgeMs || DEFAULT_STALE_LOCK_AGE_MS;
|
|
27
33
|
|
|
28
34
|
while (Date.now() < deadline) {
|
|
29
35
|
try {
|
|
@@ -36,12 +42,28 @@ function acquireLock(sessionDir) {
|
|
|
36
42
|
} catch (err) {
|
|
37
43
|
if (err.code !== 'EEXIST') throw err;
|
|
38
44
|
|
|
39
|
-
// Lock file exists — check
|
|
45
|
+
// Lock file exists — check age first, then PID
|
|
46
|
+
|
|
47
|
+
// Fix 2: Lock age threshold — if lock is older than staleLockAgeMs,
|
|
48
|
+
// break regardless of PID (handles PID recycling)
|
|
49
|
+
try {
|
|
50
|
+
const stat = fs.statSync(lockPath);
|
|
51
|
+
const ageMs = Date.now() - stat.mtimeMs;
|
|
52
|
+
if (ageMs > staleLockAgeMs) {
|
|
53
|
+
try { fs.unlinkSync(lockPath); } catch {}
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
} catch {
|
|
57
|
+
// stat failed — file may have been removed between EEXIST and stat
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Fix 1: Invalid PID handling — treat NaN, 0, negative, empty as stale
|
|
40
62
|
try {
|
|
41
63
|
const content = fs.readFileSync(lockPath, 'utf8').trim();
|
|
42
64
|
const pid = Number(content);
|
|
43
|
-
if (pid
|
|
44
|
-
//
|
|
65
|
+
if (!Number.isFinite(pid) || pid <= 0 || !isProcessAlive(pid)) {
|
|
66
|
+
// Invalid PID (empty, NaN, 0, negative) OR dead PID → stale lock
|
|
45
67
|
try { fs.unlinkSync(lockPath); } catch {}
|
|
46
68
|
continue;
|
|
47
69
|
}
|
|
@@ -51,7 +73,7 @@ function acquireLock(sessionDir) {
|
|
|
51
73
|
continue;
|
|
52
74
|
}
|
|
53
75
|
|
|
54
|
-
// Lock is held by a live process — wait
|
|
76
|
+
// Lock is held by a live process with a recent lock — wait
|
|
55
77
|
const buffer = new SharedArrayBuffer(4);
|
|
56
78
|
const view = new Int32Array(buffer);
|
|
57
79
|
Atomics.wait(view, 0, 0, LOCK_POLL_MS);
|
|
@@ -61,6 +83,61 @@ function acquireLock(sessionDir) {
|
|
|
61
83
|
throw new Error(`Mailbox lock timeout for ${sessionDir}`);
|
|
62
84
|
}
|
|
63
85
|
|
|
86
|
+
/**
|
|
87
|
+
* Break stale lock files across all session directories (startup sweep).
|
|
88
|
+
* Returns count of broken locks.
|
|
89
|
+
*
|
|
90
|
+
* @param {string} root — mailbox root directory
|
|
91
|
+
* @param {Object} [options]
|
|
92
|
+
* @param {number} [options.staleLockAgeMs] — age threshold (default 60s)
|
|
93
|
+
*/
|
|
94
|
+
function breakStaleLocks(root, options = {}) {
|
|
95
|
+
const staleLockAgeMs = options.staleLockAgeMs || DEFAULT_STALE_LOCK_AGE_MS;
|
|
96
|
+
const dirs = listSessionDirs(root);
|
|
97
|
+
let broken = 0;
|
|
98
|
+
|
|
99
|
+
for (const { sessionId, dir } of dirs) {
|
|
100
|
+
const lockPath = path.join(dir, '.lock');
|
|
101
|
+
if (!fs.existsSync(lockPath)) continue;
|
|
102
|
+
|
|
103
|
+
let shouldBreak = false;
|
|
104
|
+
let reason = '';
|
|
105
|
+
|
|
106
|
+
try {
|
|
107
|
+
const stat = fs.statSync(lockPath);
|
|
108
|
+
const ageMs = Date.now() - stat.mtimeMs;
|
|
109
|
+
|
|
110
|
+
if (ageMs > staleLockAgeMs) {
|
|
111
|
+
shouldBreak = true;
|
|
112
|
+
reason = `age ${Math.round(ageMs / 1000)}s > ${Math.round(staleLockAgeMs / 1000)}s threshold`;
|
|
113
|
+
} else {
|
|
114
|
+
// Check PID validity
|
|
115
|
+
const content = fs.readFileSync(lockPath, 'utf8').trim();
|
|
116
|
+
const pid = Number(content);
|
|
117
|
+
if (!Number.isFinite(pid) || pid <= 0) {
|
|
118
|
+
shouldBreak = true;
|
|
119
|
+
reason = `invalid PID: ${JSON.stringify(content)}`;
|
|
120
|
+
} else if (!isProcessAlive(pid)) {
|
|
121
|
+
shouldBreak = true;
|
|
122
|
+
reason = `dead PID ${pid}`;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
} catch {
|
|
126
|
+
// Can't read/stat lock — treat as stale
|
|
127
|
+
shouldBreak = true;
|
|
128
|
+
reason = 'unreadable lock file';
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (shouldBreak) {
|
|
132
|
+
try { fs.unlinkSync(lockPath); } catch {}
|
|
133
|
+
console.log(`[MAILBOX] Broke stale lock for ${sessionId}: ${reason}`);
|
|
134
|
+
broken++;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return broken;
|
|
139
|
+
}
|
|
140
|
+
|
|
64
141
|
// --- JSONL read/write ---
|
|
65
142
|
|
|
66
143
|
function readJsonl(filePath) {
|
|
@@ -172,6 +249,7 @@ function compact(sessionDir, threshold) {
|
|
|
172
249
|
|
|
173
250
|
module.exports = {
|
|
174
251
|
acquireLock,
|
|
252
|
+
breakStaleLocks,
|
|
175
253
|
readJsonl,
|
|
176
254
|
appendJsonl,
|
|
177
255
|
writeJsonl,
|
|
@@ -182,4 +260,5 @@ module.exports = {
|
|
|
182
260
|
loadMessages,
|
|
183
261
|
countPending,
|
|
184
262
|
compact,
|
|
263
|
+
isProcessAlive,
|
|
185
264
|
};
|