teleportation-cli 1.3.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/hooks/permission_request.mjs +11 -4
- package/.claude/hooks/post_tool_use.mjs +1 -3
- package/.claude/hooks/pre_tool_use.mjs +255 -289
- package/.claude/hooks/session-register.mjs +44 -29
- package/.claude/hooks/session_end.mjs +29 -3
- package/.claude/hooks/session_start.mjs +57 -1
- package/.claude/hooks/stop.mjs +245 -242
- package/.claude/hooks/user_prompt_submit.mjs +1 -3
- package/lib/config/manager.js +45 -1
- package/lib/daemon/session-file-registry.js +207 -0
- package/lib/daemon/task-executor-v2.js +239 -29
- package/lib/daemon/teleportation-daemon.js +469 -29
- package/lib/daemon/timeline-analyzer.js +19 -13
- package/lib/daemon/transcript-ingestion.js +310 -51
- package/lib/daemon/utils.js +0 -9
- package/lib/install/installer.js +126 -3
- package/lib/install/uhr-installer.js +32 -18
- package/lib/intelligence/benchmark.js +240 -0
- package/lib/intelligence/index.js +29 -0
- package/lib/intelligence/rebuild-policies.js +169 -0
- package/lib/intelligence/schema.js +259 -0
- package/lib/intelligence/transcript-mine.js +339 -0
- package/lib/session/metadata.js +23 -5
- package/lib/transcript-sync/lifecycle.js +88 -0
- package/lib/transcript-sync/repo-context.js +45 -0
- package/lib/transcript-sync/worker.js +233 -0
- package/lib/utils/log-sanitizer.js +65 -0
- package/package.json +2 -1
- package/scripts/sync-transcripts.sh +272 -0
- package/teleportation-cli.cjs +295 -4
|
@@ -39,10 +39,29 @@ import { fileURLToPath } from 'url';
|
|
|
39
39
|
import { spawn, exec } from 'child_process';
|
|
40
40
|
import { promisify } from 'util';
|
|
41
41
|
import { homedir, tmpdir } from 'os';
|
|
42
|
-
import { existsSync, appendFileSync } from 'fs';
|
|
43
|
-
import { join } from 'path';
|
|
42
|
+
import { existsSync, appendFileSync, readFileSync, unlinkSync } from 'fs';
|
|
43
|
+
import { join, dirname } from 'path';
|
|
44
44
|
// NOTE: PID locking is handled by agent-process at the platform level (launchd/systemd/pm2).
|
|
45
45
|
// Signal handling and heartbeat management are handled inline below.
|
|
46
|
+
|
|
47
|
+
// File-based session registry — imported lazily so daemon can start without it
|
|
48
|
+
// fileURLToPath is imported from 'url' at line 38.
|
|
49
|
+
// REGISTRY_UNAVAILABLE is a no-op sentinel returned on transient import failure.
|
|
50
|
+
// Callers guard with `if (!registry.readAllSessionFiles) return` so null-deref is safe.
|
|
51
|
+
// _registry stays null so the next scan cycle retries the import automatically.
|
|
52
|
+
const REGISTRY_UNAVAILABLE = Object.freeze({ readAllSessionFiles: null, isClaudePidAlive: null });
|
|
53
|
+
let _registry = null;
|
|
54
|
+
async function getRegistry() {
|
|
55
|
+
if (_registry) return _registry;
|
|
56
|
+
try {
|
|
57
|
+
const registryPath = join(dirname(fileURLToPath(import.meta.url)), 'session-file-registry.js');
|
|
58
|
+
_registry = await import(registryPath);
|
|
59
|
+
} catch (e) {
|
|
60
|
+
console.warn('[daemon] session-file-registry not available (will retry next scan):', e.message);
|
|
61
|
+
return REGISTRY_UNAVAILABLE;
|
|
62
|
+
}
|
|
63
|
+
return _registry;
|
|
64
|
+
}
|
|
46
65
|
// The following were removed in PRD-0025 migration:
|
|
47
66
|
// - pid-manager.js (replaced by agent-process platform locking)
|
|
48
67
|
// - lifecycle.js (replaced by inline signal handlers)
|
|
@@ -54,6 +73,7 @@ import {
|
|
|
54
73
|
executeTaskTurn,
|
|
55
74
|
stopTask,
|
|
56
75
|
stopAllTasks,
|
|
76
|
+
stopTasksForSession,
|
|
57
77
|
} from './task-executor-v2.js';
|
|
58
78
|
|
|
59
79
|
// Transcript ingestion for timeline completeness
|
|
@@ -86,6 +106,8 @@ const CLAUDE_CLI = process.env.CLAUDE_CLI_PATH || 'claude'; // Configurable Clau
|
|
|
86
106
|
const ALLOW_ALL_COMMANDS = process.env.TELEPORTATION_DAEMON_ALLOW_ALL_COMMANDS === 'true';
|
|
87
107
|
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.DAEMON_HEARTBEAT_INTERVAL_MS || '30000', 10); // 30 sec default
|
|
88
108
|
const HEARTBEAT_CHECK_INTERVAL_MS = parseInt(process.env.DAEMON_HEARTBEAT_CHECK_INTERVAL_MS || '60000', 10); // 1 min default
|
|
109
|
+
// How long a PID must be dead before the daemon marks the session stopped (ms)
|
|
110
|
+
const DEAD_PID_THRESHOLD_MS = parseInt(process.env.DAEMON_DEAD_PID_THRESHOLD_MS || '60000', 10); // 60s default
|
|
89
111
|
|
|
90
112
|
// Message routing configuration
|
|
91
113
|
// REQUIRE_COMMAND_WHITELIST: If true, use legacy shell execution with command whitelist
|
|
@@ -109,6 +131,29 @@ const ROUTER_MAX_ESCALATIONS = parseInt(process.env.TELEPORTATION_ROUTER_MAX_ESC
|
|
|
109
131
|
// Debug logging configuration
|
|
110
132
|
const DEBUG = process.env.TELEPORTATION_DEBUG === 'true';
|
|
111
133
|
const LOG_DIR = process.env.TELEPORTATION_LOG_DIR || tmpdir();
|
|
134
|
+
const SESSION_LOG_FILE = join(homedir(), '.teleportation', 'session-events.log');
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Append a register event to the session log so daemon restarts can recover
|
|
138
|
+
* full session metadata (hostname, branch, etc.) and re-register correctly
|
|
139
|
+
* after a Redis TTL expiry.
|
|
140
|
+
*/
|
|
141
|
+
function appendSessionRegisterLog(session) {
|
|
142
|
+
try {
|
|
143
|
+
const line = JSON.stringify({
|
|
144
|
+
type: 'register',
|
|
145
|
+
session_id: session.session_id,
|
|
146
|
+
claude_session_id: session.claude_session_id,
|
|
147
|
+
pid: session.pid || null,
|
|
148
|
+
cwd: session.cwd,
|
|
149
|
+
meta: session.meta,
|
|
150
|
+
timestamp: session.registered_at || Date.now()
|
|
151
|
+
}) + '\n';
|
|
152
|
+
appendFileSync(SESSION_LOG_FILE, line);
|
|
153
|
+
} catch (err) {
|
|
154
|
+
if (DEBUG) console.error(`[daemon] Failed to append session log: ${err.message}`);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
112
157
|
|
|
113
158
|
/**
|
|
114
159
|
* Cross-platform debug logging utility
|
|
@@ -242,6 +287,13 @@ const stoppedSessions = new Set();
|
|
|
242
287
|
// Session activity tracking for cleanup
|
|
243
288
|
const sessionActivity = new Map(); // sessionId -> lastActivityTimestamp
|
|
244
289
|
|
|
290
|
+
// PID liveness tracking (Bug 2):
|
|
291
|
+
// Caches the Claude PID per session to avoid re-reading the session file every poll cycle.
|
|
292
|
+
// lastPidCheck throttles the PID check to at most once every PID_CHECK_INTERVAL_MS per session.
|
|
293
|
+
const sessionPidCache = new Map(); // sessionId -> number (claude_pid)
|
|
294
|
+
const lastPidCheck = new Map(); // sessionId -> number (timestamp of last check)
|
|
295
|
+
const PID_CHECK_INTERVAL_MS = parseInt(process.env.DAEMON_PID_CHECK_INTERVAL_MS || '30000', 10); // 30s default
|
|
296
|
+
|
|
245
297
|
// Transcript ingestion throttling: Prevents concurrent ingestion runs per session
|
|
246
298
|
// Map<session_id, Promise> tracks in-progress ingestion promises
|
|
247
299
|
// If ingestion takes >5 seconds, prevents stacking multiple concurrent calls
|
|
@@ -270,6 +322,8 @@ setInterval(() => {
|
|
|
270
322
|
sessionActivity.delete(sessionId);
|
|
271
323
|
heartbeatState.delete(sessionId);
|
|
272
324
|
heartbeatFailureLogged.delete(sessionId);
|
|
325
|
+
sessionPidCache.delete(sessionId);
|
|
326
|
+
lastPidCheck.delete(sessionId);
|
|
273
327
|
sessionCleanedCount++;
|
|
274
328
|
|
|
275
329
|
if (process.env.DEBUG) {
|
|
@@ -787,7 +841,11 @@ async function handleRequest(req, res) {
|
|
|
787
841
|
}
|
|
788
842
|
}
|
|
789
843
|
|
|
790
|
-
|
|
844
|
+
// Clear stale PID cache so the new session's PID is read fresh from marker file
|
|
845
|
+
sessionPidCache.delete(session_id);
|
|
846
|
+
lastPidCheck.delete(session_id);
|
|
847
|
+
|
|
848
|
+
const sessionEntry = {
|
|
791
849
|
session_id,
|
|
792
850
|
claude_session_id: claude_session_id || session_id, // Fallback to session_id if not provided
|
|
793
851
|
cwd: cwd || process.cwd(),
|
|
@@ -796,7 +854,10 @@ async function handleRequest(req, res) {
|
|
|
796
854
|
daemon_pid: process.pid // Add daemon PID to metadata
|
|
797
855
|
},
|
|
798
856
|
registered_at: Date.now()
|
|
799
|
-
}
|
|
857
|
+
};
|
|
858
|
+
sessions.set(session_id, sessionEntry);
|
|
859
|
+
// Persist to log so daemon restarts can recover full meta for re-registration
|
|
860
|
+
appendSessionRegisterLog(sessionEntry);
|
|
800
861
|
|
|
801
862
|
console.log(`[daemon] Session registered: ${session_id} (claude_id: ${claude_session_id || session_id}) (daemon_pid: ${process.pid}) (cwd: ${cwd || process.cwd()})`);
|
|
802
863
|
|
|
@@ -1144,6 +1205,67 @@ async function handleInboxMessage(session_id, message) {
|
|
|
1144
1205
|
return;
|
|
1145
1206
|
}
|
|
1146
1207
|
|
|
1208
|
+
// Check for paused tasks — route message to task instead of spawning new process.
|
|
1209
|
+
// This adds one relay round-trip per inbox message, but only fires for 'command' type
|
|
1210
|
+
// messages (user-initiated from mobile), not for auto-continue or approval messages.
|
|
1211
|
+
// Future optimization: include paused task info in the session polling response.
|
|
1212
|
+
try {
|
|
1213
|
+
const tasksResp = await fetch(
|
|
1214
|
+
`${RELAY_API_URL}/api/sessions/${encodeURIComponent(session_id)}/tasks`,
|
|
1215
|
+
{ headers: { 'Authorization': `Bearer ${RELAY_API_KEY}` }, signal: AbortSignal.timeout(5000) }
|
|
1216
|
+
);
|
|
1217
|
+
if (tasksResp.ok) {
|
|
1218
|
+
const tasks = await tasksResp.json();
|
|
1219
|
+
const pausedTasks = tasks.filter(t => t.status === 'paused' || t.status === 'waiting_input');
|
|
1220
|
+
if (pausedTasks.length > 1) {
|
|
1221
|
+
logWarn(`[daemon] ⚠️ Multiple paused tasks (${pausedTasks.length}) for session ${session_id} — routing to first`);
|
|
1222
|
+
}
|
|
1223
|
+
const pausedTask = pausedTasks[0];
|
|
1224
|
+
if (pausedTask) {
|
|
1225
|
+
logInfo(`[daemon] 📨 Routing message to paused task ${pausedTask.id.slice(0, 20)}... (status: ${pausedTask.status})`);
|
|
1226
|
+
|
|
1227
|
+
let routeResp;
|
|
1228
|
+
if (pausedTask.status === 'waiting_input') {
|
|
1229
|
+
// Task is waiting for user input — use the answer endpoint
|
|
1230
|
+
routeResp = await fetch(
|
|
1231
|
+
`${RELAY_API_URL}/api/sessions/${encodeURIComponent(session_id)}/tasks/${encodeURIComponent(pausedTask.id)}/answer`,
|
|
1232
|
+
{
|
|
1233
|
+
method: 'POST',
|
|
1234
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${RELAY_API_KEY}` },
|
|
1235
|
+
body: JSON.stringify({ answer: commandText })
|
|
1236
|
+
}
|
|
1237
|
+
);
|
|
1238
|
+
} else {
|
|
1239
|
+
// Task is paused — use redirect to set new instructions and resume
|
|
1240
|
+
routeResp = await fetch(
|
|
1241
|
+
`${RELAY_API_URL}/api/sessions/${encodeURIComponent(session_id)}/tasks/${encodeURIComponent(pausedTask.id)}/redirect`,
|
|
1242
|
+
{
|
|
1243
|
+
method: 'POST',
|
|
1244
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${RELAY_API_KEY}` },
|
|
1245
|
+
body: JSON.stringify({ instruction: commandText })
|
|
1246
|
+
}
|
|
1247
|
+
);
|
|
1248
|
+
}
|
|
1249
|
+
|
|
1250
|
+
if (routeResp.ok) {
|
|
1251
|
+
logInfo(`[daemon] ✅ Message routed to task, will resume on next poll cycle`);
|
|
1252
|
+
// Acknowledge the inbox message
|
|
1253
|
+
await fetch(`${RELAY_API_URL}/api/messages/${encodeURIComponent(message.id)}/ack`, {
|
|
1254
|
+
method: 'POST',
|
|
1255
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${RELAY_API_KEY}` },
|
|
1256
|
+
body: JSON.stringify({ session_id })
|
|
1257
|
+
}).catch(() => {});
|
|
1258
|
+
return; // Don't spawn a new process
|
|
1259
|
+
}
|
|
1260
|
+
// If route failed (e.g. wrong status), fall through to normal execution
|
|
1261
|
+
logWarn(`[daemon] Failed to route message to task (${routeResp.status}), falling back to normal execution`);
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
} catch (taskCheckError) {
|
|
1265
|
+
// Non-critical — fall through to normal execution
|
|
1266
|
+
logWarn(`[daemon] Task check failed: ${taskCheckError.message}`);
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1147
1269
|
// Invalidate pending approvals BEFORE executing new command
|
|
1148
1270
|
// This prevents race conditions where stale approvals could be acted upon
|
|
1149
1271
|
try {
|
|
@@ -1466,6 +1588,100 @@ async function sendHeartbeat(session_id) {
|
|
|
1466
1588
|
}
|
|
1467
1589
|
}
|
|
1468
1590
|
|
|
1591
|
+
/**
|
|
1592
|
+
* Check if a Claude PID is still alive.
|
|
1593
|
+
* Uses signal 0 (no signal sent, just existence check).
|
|
1594
|
+
* CROSS-REFERENCE: pid-liveness.test.js has an equivalent local implementation.
|
|
1595
|
+
* If this logic changes, update the test copy to match.
|
|
1596
|
+
* @param {number} pid - Process ID to check
|
|
1597
|
+
* @returns {boolean} true if process exists, false otherwise
|
|
1598
|
+
*/
|
|
1599
|
+
function isPidAlive(pid) {
|
|
1600
|
+
try {
|
|
1601
|
+
process.kill(pid, 0);
|
|
1602
|
+
return true;
|
|
1603
|
+
} catch (e) {
|
|
1604
|
+
// EPERM means process exists but we lack permission to signal it (different user/container)
|
|
1605
|
+
return e.code === 'EPERM';
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
/**
|
|
1610
|
+
* Read Claude PID from session marker file.
|
|
1611
|
+
* Caches the PID per session to avoid reading the file on every poll cycle.
|
|
1612
|
+
* CROSS-REFERENCE: pid-liveness.test.js has an equivalent local implementation
|
|
1613
|
+
* (without cache). If this file-reading logic changes, update the test copy to match.
|
|
1614
|
+
* @param {string} session_id - Session ID
|
|
1615
|
+
* @returns {number|null} Claude PID or null if unavailable
|
|
1616
|
+
*/
|
|
1617
|
+
function getSessionPid(session_id) {
|
|
1618
|
+
// Check cache first
|
|
1619
|
+
if (sessionPidCache.has(session_id)) {
|
|
1620
|
+
return sessionPidCache.get(session_id);
|
|
1621
|
+
}
|
|
1622
|
+
|
|
1623
|
+
// Read from marker file
|
|
1624
|
+
const sessionFile = join(tmpdir(), `teleportation-session-${session_id}.json`);
|
|
1625
|
+
try {
|
|
1626
|
+
const content = readFileSync(sessionFile, 'utf8');
|
|
1627
|
+
const data = JSON.parse(content);
|
|
1628
|
+
const pid = data.claude_pid;
|
|
1629
|
+
if (typeof pid === 'number' && pid > 0) {
|
|
1630
|
+
sessionPidCache.set(session_id, pid);
|
|
1631
|
+
return pid;
|
|
1632
|
+
}
|
|
1633
|
+
} catch {
|
|
1634
|
+
// File doesn't exist or can't be read - return null (no PID available)
|
|
1635
|
+
}
|
|
1636
|
+
return null;
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1639
|
+
/**
|
|
1640
|
+
* Clean up a dead session: remove from all tracking maps, deregister from relay,
|
|
1641
|
+
* and delete marker file.
|
|
1642
|
+
* @param {string} session_id - Session ID to clean up
|
|
1643
|
+
* @param {number|null} pid - The dead PID (for logging)
|
|
1644
|
+
*/
|
|
1645
|
+
async function cleanupDeadSession(session_id, pid) {
|
|
1646
|
+
logInfo(`[daemon] PID ${pid} for session ${session_id.slice(0, 8)}... is dead - cleaning up`);
|
|
1647
|
+
|
|
1648
|
+
// Remove from all tracking maps
|
|
1649
|
+
sessions.delete(session_id);
|
|
1650
|
+
sessionActivity.delete(session_id);
|
|
1651
|
+
heartbeatState.delete(session_id);
|
|
1652
|
+
heartbeatFailureLogged.delete(session_id);
|
|
1653
|
+
sessionPidCache.delete(session_id);
|
|
1654
|
+
lastPidCheck.delete(session_id);
|
|
1655
|
+
ingestionInProgress.delete(session_id);
|
|
1656
|
+
stoppedSessions.add(session_id); // Prevent re-activation from stale registration attempts
|
|
1657
|
+
|
|
1658
|
+
// Delete marker file
|
|
1659
|
+
const sessionFile = join(tmpdir(), `teleportation-session-${session_id}.json`);
|
|
1660
|
+
try {
|
|
1661
|
+
unlinkSync(sessionFile);
|
|
1662
|
+
} catch {
|
|
1663
|
+
// File may already be deleted
|
|
1664
|
+
}
|
|
1665
|
+
|
|
1666
|
+
// Deregister from relay (best-effort)
|
|
1667
|
+
try {
|
|
1668
|
+
await fetch(
|
|
1669
|
+
`${RELAY_API_URL}/api/sessions/${encodeURIComponent(session_id)}/end`,
|
|
1670
|
+
{
|
|
1671
|
+
method: 'POST',
|
|
1672
|
+
headers: {
|
|
1673
|
+
'Authorization': `Bearer ${RELAY_API_KEY}`,
|
|
1674
|
+
'Content-Type': 'application/json'
|
|
1675
|
+
},
|
|
1676
|
+
body: JSON.stringify({ reason: 'pid_dead' }),
|
|
1677
|
+
signal: AbortSignal.timeout(5000)
|
|
1678
|
+
}
|
|
1679
|
+
);
|
|
1680
|
+
} catch {
|
|
1681
|
+
// Best-effort - relay will eventually expire the session via heartbeat timeout
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1469
1685
|
/**
|
|
1470
1686
|
* Relay API Polling Loop
|
|
1471
1687
|
* Polls relay API every 5 seconds for approved requests
|
|
@@ -1491,8 +1707,96 @@ async function pollRelayAPI() {
|
|
|
1491
1707
|
// Debug: Log to file for visibility
|
|
1492
1708
|
debugLog('daemon-poll-debug.log', `Polling session ${session_id}`);
|
|
1493
1709
|
|
|
1494
|
-
//
|
|
1495
|
-
sessionActivity
|
|
1710
|
+
// PID liveness check (Bug 2):
|
|
1711
|
+
// Instead of unconditionally refreshing sessionActivity, check if the Claude
|
|
1712
|
+
// process is still alive. Only update activity if PID is confirmed alive.
|
|
1713
|
+
// Throttled to at most once per PID_CHECK_INTERVAL_MS per session.
|
|
1714
|
+
const pidCheckNow = Date.now();
|
|
1715
|
+
const lastCheck = lastPidCheck.get(session_id) || 0;
|
|
1716
|
+
if (pidCheckNow - lastCheck >= PID_CHECK_INTERVAL_MS) {
|
|
1717
|
+
lastPidCheck.set(session_id, pidCheckNow);
|
|
1718
|
+
const pid = getSessionPid(session_id);
|
|
1719
|
+
if (pid !== null && !isPidAlive(pid)) {
|
|
1720
|
+
// PID is dead - clean up and skip this session
|
|
1721
|
+
await cleanupDeadSession(session_id, pid);
|
|
1722
|
+
continue;
|
|
1723
|
+
}
|
|
1724
|
+
// PID is alive (or no PID file exists - backward compat: treat as alive)
|
|
1725
|
+
sessionActivity.set(session_id, pidCheckNow);
|
|
1726
|
+
}
|
|
1727
|
+
// Between PID checks, do NOT refresh sessionActivity - let the cleanup
|
|
1728
|
+
// sweep use the last confirmed-alive timestamp.
|
|
1729
|
+
|
|
1730
|
+
// 0) Check for stop_requested flag (mobile stop button)
|
|
1731
|
+
// Uses daemon-state endpoint (lightweight) instead of full session fetch
|
|
1732
|
+
try {
|
|
1733
|
+
const stopCheckResponse = await fetch(
|
|
1734
|
+
`${RELAY_API_URL}/api/sessions/${encodeURIComponent(session_id)}/daemon-state`,
|
|
1735
|
+
{
|
|
1736
|
+
headers: { 'Authorization': `Bearer ${RELAY_API_KEY}` },
|
|
1737
|
+
signal: AbortSignal.timeout(5000)
|
|
1738
|
+
}
|
|
1739
|
+
);
|
|
1740
|
+
|
|
1741
|
+
if (stopCheckResponse.ok) {
|
|
1742
|
+
const daemonState = await stopCheckResponse.json();
|
|
1743
|
+
|
|
1744
|
+
if (daemonState.stop_requested) {
|
|
1745
|
+
logInfo(`[daemon] 🛑 Stop requested for session ${session_id} — killing running processes`);
|
|
1746
|
+
|
|
1747
|
+
// Kill any running approval execution processes for this session
|
|
1748
|
+
let killedExecution = false;
|
|
1749
|
+
for (const [approval_id, exec] of executions) {
|
|
1750
|
+
if (exec.session_id === session_id && exec.status === 'executing' && exec.child_process) {
|
|
1751
|
+
try {
|
|
1752
|
+
const child = exec.child_process;
|
|
1753
|
+
child.kill('SIGTERM');
|
|
1754
|
+
// Track SIGKILL timer so it can be cancelled if process exits cleanly
|
|
1755
|
+
const killTimer = setTimeout(() => {
|
|
1756
|
+
try { child.kill('SIGKILL'); } catch {}
|
|
1757
|
+
}, 2000);
|
|
1758
|
+
child.once('exit', () => clearTimeout(killTimer));
|
|
1759
|
+
killedExecution = true;
|
|
1760
|
+
logInfo(`[daemon] Killed execution process for approval ${approval_id}`);
|
|
1761
|
+
} catch (killErr) {
|
|
1762
|
+
logWarn(`[daemon] Failed to kill execution ${approval_id}: ${killErr.message}`);
|
|
1763
|
+
}
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
|
|
1767
|
+
// Kill any running task processes for this session
|
|
1768
|
+
const killedTaskCount = stopTasksForSession(session_id);
|
|
1769
|
+
const killedTask = killedTaskCount > 0;
|
|
1770
|
+
if (killedTask) {
|
|
1771
|
+
logInfo(`[daemon] Killed ${killedTaskCount} task process(es) for session ${session_id}`);
|
|
1772
|
+
}
|
|
1773
|
+
|
|
1774
|
+
// Clear stop_requested flag
|
|
1775
|
+
try {
|
|
1776
|
+
await fetch(`${RELAY_API_URL}/api/sessions/${encodeURIComponent(session_id)}/daemon-state`, {
|
|
1777
|
+
method: 'PATCH',
|
|
1778
|
+
headers: {
|
|
1779
|
+
'Content-Type': 'application/json',
|
|
1780
|
+
'Authorization': `Bearer ${RELAY_API_KEY}`
|
|
1781
|
+
},
|
|
1782
|
+
body: JSON.stringify({ stop_requested: false })
|
|
1783
|
+
});
|
|
1784
|
+
logInfo(`[daemon] Cleared stop_requested flag for session ${session_id}`);
|
|
1785
|
+
} catch (clearErr) {
|
|
1786
|
+
logWarn(`[daemon] Failed to clear stop_requested: ${clearErr.message}`);
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
if (!killedExecution && !killedTask) {
|
|
1790
|
+
logInfo(`[daemon] No running processes found to stop for session ${session_id}`);
|
|
1791
|
+
}
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1794
|
+
} catch (stopCheckError) {
|
|
1795
|
+
// Don't block polling if stop check fails
|
|
1796
|
+
if (stopCheckError.name !== 'AbortError') {
|
|
1797
|
+
logWarn(`[daemon] Stop check error for ${session_id}: ${stopCheckError.message}`);
|
|
1798
|
+
}
|
|
1799
|
+
}
|
|
1496
1800
|
|
|
1497
1801
|
// 1) Approvals polling (existing behavior)
|
|
1498
1802
|
try {
|
|
@@ -1603,8 +1907,8 @@ async function pollRelayAPI() {
|
|
|
1603
1907
|
|
|
1604
1908
|
// Process each task (stateless - queries timeline each time)
|
|
1605
1909
|
for (const task of tasks) {
|
|
1606
|
-
// Skip
|
|
1607
|
-
if (task.status === 'stopped' || task.status === 'completed') {
|
|
1910
|
+
// Skip non-runnable tasks (paused tasks wait for user message to resume)
|
|
1911
|
+
if (task.status === 'stopped' || task.status === 'completed' || task.status === 'paused') {
|
|
1608
1912
|
continue;
|
|
1609
1913
|
}
|
|
1610
1914
|
|
|
@@ -1643,11 +1947,21 @@ async function pollRelayAPI() {
|
|
|
1643
1947
|
const claude_session_id = sessionData.claude_session_id || session_id;
|
|
1644
1948
|
const cwd = sessionData.cwd || process.cwd();
|
|
1645
1949
|
|
|
1950
|
+
// 4) Heartbeat - send periodically to keep session alive
|
|
1951
|
+
// Must run before ingestion throttle check — ingestion `continue` must not skip heartbeats
|
|
1952
|
+
const now = Date.now();
|
|
1953
|
+
const sessionHeartbeat = heartbeatState.get(session_id);
|
|
1954
|
+
const lastSent = sessionHeartbeat?.lastSent || 0;
|
|
1955
|
+
if (now - lastSent >= SESSION_HEARTBEAT_INTERVAL_MS) {
|
|
1956
|
+
await sendHeartbeat(session_id);
|
|
1957
|
+
}
|
|
1958
|
+
|
|
1959
|
+
// 5) Transcript ingestion - backup to stop hook for timeline completeness
|
|
1646
1960
|
// Throttling: Check if ingestion is already in progress for this session
|
|
1647
1961
|
// Prevents concurrent ingestion runs that could cause race conditions
|
|
1648
1962
|
if (ingestionInProgress.has(session_id)) {
|
|
1649
1963
|
debugLog('daemon-transcript-debug.log', `Ingestion already in progress for ${session_id}, skipping`);
|
|
1650
|
-
// Skip this
|
|
1964
|
+
// Skip ingestion this cycle (heartbeat already sent above)
|
|
1651
1965
|
continue;
|
|
1652
1966
|
}
|
|
1653
1967
|
|
|
@@ -1680,15 +1994,6 @@ async function pollRelayAPI() {
|
|
|
1680
1994
|
|
|
1681
1995
|
// Track the promise (but don't await - fire-and-forget)
|
|
1682
1996
|
ingestionInProgress.set(session_id, ingestionPromise);
|
|
1683
|
-
|
|
1684
|
-
// 5) Heartbeat - send periodically to keep session alive
|
|
1685
|
-
// Only send heartbeat if enough time has passed since last one (throttled per session)
|
|
1686
|
-
const now = Date.now();
|
|
1687
|
-
const sessionHeartbeat = heartbeatState.get(session_id);
|
|
1688
|
-
const lastSent = sessionHeartbeat?.lastSent || 0;
|
|
1689
|
-
if (now - lastSent >= SESSION_HEARTBEAT_INTERVAL_MS) {
|
|
1690
|
-
await sendHeartbeat(session_id);
|
|
1691
|
-
}
|
|
1692
1997
|
}
|
|
1693
1998
|
|
|
1694
1999
|
// Process approval queue
|
|
@@ -1823,6 +2128,7 @@ async function processQueue() {
|
|
|
1823
2128
|
// Mark as executing (child_process will be set when spawnClaudeProcess is called)
|
|
1824
2129
|
executions.set(approval_id, {
|
|
1825
2130
|
approval_id,
|
|
2131
|
+
session_id,
|
|
1826
2132
|
status: 'executing',
|
|
1827
2133
|
started_at: Date.now(),
|
|
1828
2134
|
completed_at: null,
|
|
@@ -2685,8 +2991,6 @@ async function cleanup() {
|
|
|
2685
2991
|
* Enables daemon to recover sessions after restart
|
|
2686
2992
|
*/
|
|
2687
2993
|
async function discoverSessionsFromLog() {
|
|
2688
|
-
const SESSION_LOG_FILE = join(homedir(), '.teleportation', 'session-events.log');
|
|
2689
|
-
|
|
2690
2994
|
try {
|
|
2691
2995
|
const { readFile } = await import('fs/promises');
|
|
2692
2996
|
const content = await readFile(SESSION_LOG_FILE, 'utf8');
|
|
@@ -2771,8 +3075,6 @@ async function discoverSessionsFromLog() {
|
|
|
2771
3075
|
* Keeps only active sessions to prevent unbounded growth
|
|
2772
3076
|
*/
|
|
2773
3077
|
async function compactSessionLog(activeSessions) {
|
|
2774
|
-
const SESSION_LOG_FILE = join(homedir(), '.teleportation', 'session-events.log');
|
|
2775
|
-
|
|
2776
3078
|
try {
|
|
2777
3079
|
const { writeFile } = await import('fs/promises');
|
|
2778
3080
|
|
|
@@ -2813,22 +3115,39 @@ async function compactSessionLog(activeSessions) {
|
|
|
2813
3115
|
async function main() {
|
|
2814
3116
|
console.log('[daemon] Main function started.');
|
|
2815
3117
|
|
|
2816
|
-
// Load credentials from encrypted file if not in environment
|
|
2817
|
-
|
|
3118
|
+
// Load credentials from encrypted file if not in environment, OR if the env var
|
|
3119
|
+
// looks like a relay service key (raw hex, no 'tp_' prefix) rather than a user API key.
|
|
3120
|
+
// This handles the case where Bun auto-loads relay/.env when the daemon cwd is /relay,
|
|
3121
|
+
// injecting a hex service key that causes all heartbeats to fail with 404.
|
|
3122
|
+
const envKeyIsServiceKey = RELAY_API_KEY && !RELAY_API_KEY.startsWith('tp_');
|
|
3123
|
+
if (!RELAY_API_KEY || envKeyIsServiceKey) {
|
|
2818
3124
|
try {
|
|
2819
|
-
console.log(
|
|
3125
|
+
console.log(
|
|
3126
|
+
envKeyIsServiceKey
|
|
3127
|
+
? '[daemon] RELAY_API_KEY looks like a service key (no tp_ prefix), loading user credentials from encrypted file...'
|
|
3128
|
+
: '[daemon] RELAY_API_KEY not in environment, loading from credentials file...'
|
|
3129
|
+
);
|
|
2820
3130
|
const credManager = new CredentialManager();
|
|
2821
3131
|
const creds = await credManager.load();
|
|
2822
3132
|
if (creds && creds.apiKey) {
|
|
2823
3133
|
RELAY_API_KEY = creds.apiKey;
|
|
2824
|
-
RELAY_API_URL = creds.relayUrl || RELAY_API_URL;
|
|
3134
|
+
RELAY_API_URL = creds.relayApiUrl || creds.relayUrl || RELAY_API_URL;
|
|
2825
3135
|
console.log('[daemon] ✅ Loaded credentials from encrypted file');
|
|
3136
|
+
} else if (envKeyIsServiceKey) {
|
|
3137
|
+
console.warn('[daemon] ⚠️ No user credentials found — refusing to use service key for heartbeats');
|
|
3138
|
+
RELAY_API_KEY = '';
|
|
2826
3139
|
} else {
|
|
2827
3140
|
console.warn('[daemon] ⚠️ No API key found in credentials file');
|
|
2828
3141
|
}
|
|
2829
3142
|
} catch (e) {
|
|
2830
|
-
|
|
2831
|
-
|
|
3143
|
+
if (envKeyIsServiceKey) {
|
|
3144
|
+
console.warn('[daemon] ⚠️ Failed to load credentials and env key is a service key:', e.message);
|
|
3145
|
+
console.warn('[daemon] Clearing service key — daemon will run without relay auth');
|
|
3146
|
+
RELAY_API_KEY = '';
|
|
3147
|
+
} else {
|
|
3148
|
+
console.warn('[daemon] ⚠️ Failed to load credentials:', e.message);
|
|
3149
|
+
console.warn('[daemon] Daemon will run but cannot authenticate with relay API');
|
|
3150
|
+
}
|
|
2832
3151
|
}
|
|
2833
3152
|
} else {
|
|
2834
3153
|
console.log('[daemon] Using RELAY_API_KEY from environment');
|
|
@@ -2890,6 +3209,41 @@ async function main() {
|
|
|
2890
3209
|
});
|
|
2891
3210
|
if (!hbResponse.ok) {
|
|
2892
3211
|
const errMsg = `HTTP ${hbResponse.status}`;
|
|
3212
|
+
|
|
3213
|
+
// 404 means session expired from Redis — try to re-register it.
|
|
3214
|
+
// The relay heartbeat endpoint also attempts recovery from mech-storage,
|
|
3215
|
+
// but if that fails (e.g., session never persisted), daemon-side re-registration
|
|
3216
|
+
// ensures the session is recreated with correct metadata.
|
|
3217
|
+
if (hbResponse.status === 404) {
|
|
3218
|
+
const sessionData = sessions.get(sessionId);
|
|
3219
|
+
try {
|
|
3220
|
+
const regResponse = await fetch(`${RELAY_API_URL}/api/sessions/register`, {
|
|
3221
|
+
method: 'POST',
|
|
3222
|
+
headers: {
|
|
3223
|
+
'Content-Type': 'application/json',
|
|
3224
|
+
'Authorization': `Bearer ${RELAY_API_KEY}`
|
|
3225
|
+
},
|
|
3226
|
+
body: JSON.stringify({
|
|
3227
|
+
session_id: sessionId,
|
|
3228
|
+
claude_session_id: sessionData?.claude_session_id || undefined,
|
|
3229
|
+
cwd: sessionData?.cwd || process.cwd(),
|
|
3230
|
+
meta: sessionData?.meta || {}
|
|
3231
|
+
}),
|
|
3232
|
+
signal: AbortSignal.timeout(5000)
|
|
3233
|
+
});
|
|
3234
|
+
if (regResponse.ok) {
|
|
3235
|
+
console.log(`[daemon] Re-registered expired session ${sessionId} after heartbeat 404`);
|
|
3236
|
+
// Clear failure tracking so next heartbeat is treated fresh
|
|
3237
|
+
heartbeatFailureLogged.delete(sessionId);
|
|
3238
|
+
continue; // Skip failure logging — session recovered
|
|
3239
|
+
} else {
|
|
3240
|
+
console.warn(`[daemon] Failed to re-register session ${sessionId}: HTTP ${regResponse.status}`);
|
|
3241
|
+
}
|
|
3242
|
+
} catch (regErr) {
|
|
3243
|
+
console.warn(`[daemon] Re-registration attempt failed for ${sessionId}: ${regErr.message}`);
|
|
3244
|
+
}
|
|
3245
|
+
}
|
|
3246
|
+
|
|
2893
3247
|
if (!heartbeatFailureLogged.has(sessionId)) {
|
|
2894
3248
|
heartbeatFailureLogged.add(sessionId);
|
|
2895
3249
|
console.warn(`[daemon] Heartbeat rejected for ${sessionId}: ${errMsg} (further failures for this session suppressed unless DEBUG is set)`);
|
|
@@ -2912,6 +3266,85 @@ async function main() {
|
|
|
2912
3266
|
}, HEARTBEAT_INTERVAL_MS);
|
|
2913
3267
|
console.log(`[daemon] Session heartbeat interval started (${HEARTBEAT_INTERVAL_MS / 1000}s)`);
|
|
2914
3268
|
|
|
3269
|
+
// PID-based session file scan: discover sessions written by session_start.mjs hooks
|
|
3270
|
+
// and check liveness via OS process table rather than waiting for hooks to re-register.
|
|
3271
|
+
const scanSessionFiles = async () => {
|
|
3272
|
+
if (isShuttingDown) return;
|
|
3273
|
+
const registry = await getRegistry();
|
|
3274
|
+
if (!registry.readAllSessionFiles || !registry.isClaudePidAlive) return;
|
|
3275
|
+
|
|
3276
|
+
let records;
|
|
3277
|
+
try { records = await registry.readAllSessionFiles(); } catch { return; }
|
|
3278
|
+
|
|
3279
|
+
for (const record of records) {
|
|
3280
|
+
const { session_id, claude_pid, cwd, meta, acked, ended } = record;
|
|
3281
|
+
if (!session_id || !claude_pid) continue;
|
|
3282
|
+
|
|
3283
|
+
// Fast path: session already ended
|
|
3284
|
+
if (ended) {
|
|
3285
|
+
if (sessions.has(session_id)) {
|
|
3286
|
+
sessions.delete(session_id);
|
|
3287
|
+
console.log(`[daemon] Session ${session_id.slice(0,8)} ended (file flag)`);
|
|
3288
|
+
}
|
|
3289
|
+
try { await registry.deleteSessionFile(session_id); } catch {}
|
|
3290
|
+
continue;
|
|
3291
|
+
}
|
|
3292
|
+
|
|
3293
|
+
const alive = await registry.isClaudePidAlive(claude_pid);
|
|
3294
|
+
|
|
3295
|
+
if (!sessions.has(session_id) && alive) {
|
|
3296
|
+
// New session discovered via file — add to Map and ack
|
|
3297
|
+
// claude_session_id is intentionally set to session_id here: session files
|
|
3298
|
+
// don't store the Anthropic-assigned claude session ID (not available at
|
|
3299
|
+
// hook time). The daemon only uses session_id for heartbeats/relay calls;
|
|
3300
|
+
// claude_session_id is only needed for --resume, which goes through the HTTP
|
|
3301
|
+
// registration path or agentic-executor, not file-based discovery.
|
|
3302
|
+
sessions.set(session_id, { session_id, claude_session_id: session_id, cwd, meta: meta || {}, claude_pid });
|
|
3303
|
+
try { await registry.ackSessionFile(session_id, process.pid); } catch {}
|
|
3304
|
+
console.log(`[daemon] Discovered session ${session_id.slice(0,8)} (PID ${claude_pid}, project: ${meta?.project_name || 'unknown'})`);
|
|
3305
|
+
// Send immediate heartbeat so relay shows it active right away
|
|
3306
|
+
try {
|
|
3307
|
+
await fetch(`${RELAY_API_URL}/api/sessions/${encodeURIComponent(session_id)}/heartbeat`, {
|
|
3308
|
+
method: 'POST',
|
|
3309
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${RELAY_API_KEY}` },
|
|
3310
|
+
body: JSON.stringify({ timestamp: Date.now() }),
|
|
3311
|
+
signal: AbortSignal.timeout(5000)
|
|
3312
|
+
});
|
|
3313
|
+
} catch {}
|
|
3314
|
+
} else if (sessions.has(session_id) && !alive) {
|
|
3315
|
+
// Track when PID first went dead
|
|
3316
|
+
const sessionData = sessions.get(session_id);
|
|
3317
|
+
const now = Date.now();
|
|
3318
|
+
if (!sessionData._pid_dead_since) {
|
|
3319
|
+
sessionData._pid_dead_since = now;
|
|
3320
|
+
console.log(`[daemon] Session ${session_id.slice(0,8)} PID ${claude_pid} no longer alive — starting ${DEAD_PID_THRESHOLD_MS / 1000}s grace period`);
|
|
3321
|
+
} else if (now - sessionData._pid_dead_since >= DEAD_PID_THRESHOLD_MS) {
|
|
3322
|
+
// Grace period expired — mark stopped
|
|
3323
|
+
sessions.delete(session_id);
|
|
3324
|
+
try { await registry.deleteSessionFile(session_id); } catch {}
|
|
3325
|
+
console.log(`[daemon] Session ${session_id.slice(0,8)} marked stopped (PID dead > ${DEAD_PID_THRESHOLD_MS / 1000}s)`);
|
|
3326
|
+
try {
|
|
3327
|
+
await fetch(`${RELAY_API_URL}/api/sessions/${encodeURIComponent(session_id)}/daemon-state`, {
|
|
3328
|
+
method: 'PATCH',
|
|
3329
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${RELAY_API_KEY}` },
|
|
3330
|
+
body: JSON.stringify({ status: 'stopped', stopped_reason: 'pid_dead' }),
|
|
3331
|
+
signal: AbortSignal.timeout(5000)
|
|
3332
|
+
});
|
|
3333
|
+
} catch {}
|
|
3334
|
+
}
|
|
3335
|
+
} else if (!sessions.has(session_id) && !alive) {
|
|
3336
|
+
// Stale file for a dead PID we never tracked — clean up
|
|
3337
|
+
try { await registry.deleteSessionFile(session_id); } catch {}
|
|
3338
|
+
}
|
|
3339
|
+
}
|
|
3340
|
+
};
|
|
3341
|
+
|
|
3342
|
+
// Run scan on startup to recover sessions from a daemon restart
|
|
3343
|
+
scanSessionFiles().catch(e => console.warn('[daemon] Initial session file scan failed:', e.message));
|
|
3344
|
+
// Then scan on every heartbeat cycle
|
|
3345
|
+
setInterval(scanSessionFiles, HEARTBEAT_INTERVAL_MS);
|
|
3346
|
+
console.log(`[daemon] PID-based session file scan active (${HEARTBEAT_INTERVAL_MS / 1000}s interval)`);
|
|
3347
|
+
|
|
2915
3348
|
// Start polling loop
|
|
2916
3349
|
console.log('[daemon] Starting relay API polling...');
|
|
2917
3350
|
pollRelayAPI();
|
|
@@ -2982,7 +3415,14 @@ const __test = {
|
|
|
2982
3415
|
// Stopped sessions test helpers
|
|
2983
3416
|
_getStoppedSessions: () => stoppedSessions,
|
|
2984
3417
|
_addStoppedSession: (session_id) => stoppedSessions.add(session_id),
|
|
2985
|
-
_clearStoppedSessions: () => stoppedSessions.clear()
|
|
3418
|
+
_clearStoppedSessions: () => stoppedSessions.clear(),
|
|
3419
|
+
// PID liveness test helpers (Bug 2)
|
|
3420
|
+
isPidAlive,
|
|
3421
|
+
getSessionPid,
|
|
3422
|
+
cleanupDeadSession,
|
|
3423
|
+
_getSessionPidCache: () => sessionPidCache,
|
|
3424
|
+
_getLastPidCheck: () => lastPidCheck,
|
|
3425
|
+
_getSessionActivity: () => sessionActivity,
|
|
2986
3426
|
};
|
|
2987
3427
|
|
|
2988
3428
|
// Test helper to register a session
|