@portel/photon 1.20.1 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +5 -5
  2. package/dist/ag-ui/adapter.d.ts.map +1 -1
  3. package/dist/ag-ui/adapter.js +25 -0
  4. package/dist/ag-ui/adapter.js.map +1 -1
  5. package/dist/auto-ui/beam/routes/api-browse.d.ts.map +1 -1
  6. package/dist/auto-ui/beam/routes/api-browse.js +8 -49
  7. package/dist/auto-ui/beam/routes/api-browse.js.map +1 -1
  8. package/dist/auto-ui/beam.d.ts.map +1 -1
  9. package/dist/auto-ui/beam.js +23 -31
  10. package/dist/auto-ui/beam.js.map +1 -1
  11. package/dist/auto-ui/bridge/index.d.ts.map +1 -1
  12. package/dist/auto-ui/bridge/index.js +107 -11
  13. package/dist/auto-ui/bridge/index.js.map +1 -1
  14. package/dist/auto-ui/bridge/renderers.d.ts +14 -0
  15. package/dist/auto-ui/bridge/renderers.d.ts.map +1 -1
  16. package/dist/auto-ui/bridge/renderers.js +680 -57
  17. package/dist/auto-ui/bridge/renderers.js.map +1 -1
  18. package/dist/auto-ui/frontend/index.html +3 -3
  19. package/dist/auto-ui/frontend/pure-view.html +19 -19
  20. package/dist/auto-ui/streamable-http-transport.d.ts.map +1 -1
  21. package/dist/auto-ui/streamable-http-transport.js +29 -0
  22. package/dist/auto-ui/streamable-http-transport.js.map +1 -1
  23. package/dist/auto-ui/ui-resolver.d.ts +25 -0
  24. package/dist/auto-ui/ui-resolver.d.ts.map +1 -0
  25. package/dist/auto-ui/ui-resolver.js +95 -0
  26. package/dist/auto-ui/ui-resolver.js.map +1 -0
  27. package/dist/beam-form.bundle.js +7 -7
  28. package/dist/beam-form.bundle.js.map +1 -1
  29. package/dist/beam.bundle.js +905 -185
  30. package/dist/beam.bundle.js.map +4 -4
  31. package/dist/cli/commands/build.d.ts.map +1 -1
  32. package/dist/cli/commands/build.js +9 -5
  33. package/dist/cli/commands/build.js.map +1 -1
  34. package/dist/cli/commands/init.d.ts.map +1 -1
  35. package/dist/cli/commands/init.js +90 -50
  36. package/dist/cli/commands/init.js.map +1 -1
  37. package/dist/cli/commands/publish.d.ts +14 -0
  38. package/dist/cli/commands/publish.d.ts.map +1 -0
  39. package/dist/cli/commands/publish.js +126 -0
  40. package/dist/cli/commands/publish.js.map +1 -0
  41. package/dist/cli/commands/run.d.ts.map +1 -1
  42. package/dist/cli/commands/run.js +2 -0
  43. package/dist/cli/commands/run.js.map +1 -1
  44. package/dist/cli/index.d.ts.map +1 -1
  45. package/dist/cli/index.js +3 -0
  46. package/dist/cli/index.js.map +1 -1
  47. package/dist/context.d.ts +6 -0
  48. package/dist/context.d.ts.map +1 -1
  49. package/dist/context.js +17 -5
  50. package/dist/context.js.map +1 -1
  51. package/dist/daemon/client.d.ts +9 -1
  52. package/dist/daemon/client.d.ts.map +1 -1
  53. package/dist/daemon/client.js +54 -1
  54. package/dist/daemon/client.js.map +1 -1
  55. package/dist/daemon/manager.d.ts +3 -0
  56. package/dist/daemon/manager.d.ts.map +1 -1
  57. package/dist/daemon/manager.js +88 -38
  58. package/dist/daemon/manager.js.map +1 -1
  59. package/dist/daemon/ownership.d.ts +12 -0
  60. package/dist/daemon/ownership.d.ts.map +1 -0
  61. package/dist/daemon/ownership.js +55 -0
  62. package/dist/daemon/ownership.js.map +1 -0
  63. package/dist/daemon/protocol.d.ts +3 -1
  64. package/dist/daemon/protocol.d.ts.map +1 -1
  65. package/dist/daemon/protocol.js +14 -2
  66. package/dist/daemon/protocol.js.map +1 -1
  67. package/dist/daemon/server.js +549 -83
  68. package/dist/daemon/server.js.map +1 -1
  69. package/dist/daemon/session-manager.d.ts +9 -1
  70. package/dist/daemon/session-manager.d.ts.map +1 -1
  71. package/dist/daemon/session-manager.js +54 -1
  72. package/dist/daemon/session-manager.js.map +1 -1
  73. package/dist/daemon/worker-manager.d.ts +12 -0
  74. package/dist/daemon/worker-manager.d.ts.map +1 -1
  75. package/dist/daemon/worker-manager.js +89 -6
  76. package/dist/daemon/worker-manager.js.map +1 -1
  77. package/dist/loader.d.ts +3 -9
  78. package/dist/loader.d.ts.map +1 -1
  79. package/dist/loader.js +168 -113
  80. package/dist/loader.js.map +1 -1
  81. package/dist/photon-cli-runner.d.ts.map +1 -1
  82. package/dist/photon-cli-runner.js +26 -2
  83. package/dist/photon-cli-runner.js.map +1 -1
  84. package/dist/photons/canvas/ui/canvas.photon.html +1493 -0
  85. package/dist/photons/canvas.photon.d.ts +400 -0
  86. package/dist/photons/canvas.photon.d.ts.map +1 -0
  87. package/dist/photons/canvas.photon.js +662 -0
  88. package/dist/photons/canvas.photon.js.map +1 -0
  89. package/dist/photons/canvas.photon.ts +814 -0
  90. package/dist/photons/publish.photon.d.ts +97 -0
  91. package/dist/photons/publish.photon.d.ts.map +1 -0
  92. package/dist/photons/publish.photon.js +569 -0
  93. package/dist/photons/publish.photon.js.map +1 -0
  94. package/dist/photons/publish.photon.ts +683 -0
  95. package/dist/photons/ui/canvas.photon.html +624 -0
  96. package/dist/resource-server.d.ts.map +1 -1
  97. package/dist/resource-server.js +7 -1
  98. package/dist/resource-server.js.map +1 -1
  99. package/dist/shared-utils.d.ts.map +1 -1
  100. package/dist/shared-utils.js +2 -2
  101. package/dist/shared-utils.js.map +1 -1
  102. package/dist/tsx-compiler.d.ts +23 -0
  103. package/dist/tsx-compiler.d.ts.map +1 -0
  104. package/dist/tsx-compiler.js +221 -0
  105. package/dist/tsx-compiler.js.map +1 -0
  106. package/package.json +7 -7
@@ -24,6 +24,7 @@ import { timingSafeEqual, readBody, SimpleRateLimiter } from '../shared/security
24
24
  import { audit } from '../shared/audit.js';
25
25
  import { WorkerManager } from './worker-manager.js';
26
26
  import fastJsonPatch from 'fast-json-patch';
27
+ import { getOwnerFilePath, isPidAlive, readOwnerRecord, removeOwnerRecord, waitForPidExit, writeOwnerRecord, } from './ownership.js';
27
28
  // eslint-disable-next-line @typescript-eslint/unbound-method
28
29
  const jsonPatchCompare = fastJsonPatch.compare;
29
30
  // Command line args: socketPath (global daemon only needs socket path)
@@ -37,6 +38,29 @@ if (!socketPath) {
37
38
  logger.error('Missing required argument: socketPath');
38
39
  process.exit(1);
39
40
  }
41
+ const pidFile = path.join(path.dirname(socketPath), 'daemon.pid');
42
+ const ownerFile = getOwnerFilePath(socketPath);
43
+ let daemonOwnershipConfirmed = false;
44
+ async function isSocketResponsive(target) {
45
+ if (process.platform === 'win32' || !fs.existsSync(target))
46
+ return false;
47
+ return new Promise((resolve) => {
48
+ const client = net.createConnection(target);
49
+ const timer = setTimeout(() => {
50
+ client.destroy();
51
+ resolve(false);
52
+ }, 1000);
53
+ client.on('connect', () => {
54
+ clearTimeout(timer);
55
+ client.destroy();
56
+ resolve(true);
57
+ });
58
+ client.on('error', () => {
59
+ clearTimeout(timer);
60
+ resolve(false);
61
+ });
62
+ });
63
+ }
40
64
  // ════════════════════════════════════════════════════════════════════════════════
41
65
  // IN-PROCESS BROKER
42
66
  // All photons run inside the daemon process, so pub/sub is just in-memory dispatch.
@@ -104,6 +128,38 @@ workerManager.onPublish = (channel, message) => {
104
128
  // Also forward to other workers
105
129
  workerManager.dispatchToWorkers(channel, message);
106
130
  };
131
+ // Track connected sockets for graceful shutdown broadcast
132
+ const connectedSockets = new Set();
133
+ /** Reference to the daemon server for closing the listener during shutdown */
134
+ let daemonServer = null;
135
+ /** Whether the daemon is shutting down (reject new commands) */
136
+ let isShuttingDown = false;
137
+ /** Tracks active executeTool calls per composite key */
138
+ const activeExecutions = new Map();
139
+ /** Per-key mutex to prevent concurrent reloads (format-on-save race) */
140
+ const reloadMutex = new Map();
141
+ function trackExecution(key) {
142
+ const tracker = activeExecutions.get(key);
143
+ if (tracker) {
144
+ tracker.count++;
145
+ }
146
+ else {
147
+ activeExecutions.set(key, { count: 1 });
148
+ }
149
+ }
150
+ function untrackExecution(key) {
151
+ const tracker = activeExecutions.get(key);
152
+ if (!tracker)
153
+ return;
154
+ tracker.count--;
155
+ if (tracker.count <= 0 && tracker.drainResolve) {
156
+ tracker.drainResolve();
157
+ tracker.drainResolve = undefined;
158
+ }
159
+ if (tracker.count <= 0) {
160
+ activeExecutions.delete(key);
161
+ }
162
+ }
107
163
  // Map of compositeKey -> SessionManager (lazy initialized)
108
164
  const sessionManagers = new Map();
109
165
  const photonPaths = new Map(); // compositeKey -> photonPath
@@ -484,6 +540,7 @@ async function runJob(jobId) {
484
540
  return;
485
541
  }
486
542
  logger.info('Running scheduled job', { jobId, method: job.method, photon: job.photonName });
543
+ trackExecution(key);
487
544
  try {
488
545
  const session = await sessionManager.getOrCreateSession('scheduler', 'scheduler');
489
546
  await sessionManager.loader.executeTool(session.instance, job.method, job.args || {});
@@ -511,6 +568,9 @@ async function runJob(jobId) {
511
568
  error: getErrorMessage(error),
512
569
  });
513
570
  }
571
+ finally {
572
+ untrackExecution(key);
573
+ }
514
574
  scheduleJob(job);
515
575
  }
516
576
  function unscheduleJob(jobId) {
@@ -525,13 +585,14 @@ function unscheduleJob(jobId) {
525
585
  }
526
586
  return existed;
527
587
  }
528
- /** Update persisted schedule file (from ScheduleProvider) after job execution */
588
+ /** Update persisted schedule file after job execution */
529
589
  function updatePersistedSchedule(jobId, photonName, updates) {
530
- // Only for jobs loaded from schedule files (format: photonName:sched:uuid)
531
- const match = jobId.match(/^[^:]+:sched:(.+)$/);
532
- if (!match)
590
+ // Handle both ScheduleProvider jobs (photonName:sched:uuid) and IPC jobs (photonName:*:ipc:uuid)
591
+ const schedMatch = jobId.match(/^[^:]+:sched:(.+)$/);
592
+ const ipcMatch = jobId.match(/^[^:]+(?::[^:]+)?:ipc:(.+)$/);
593
+ if (!schedMatch && !ipcMatch)
533
594
  return;
534
- const taskId = match[1];
595
+ const taskId = schedMatch ? schedMatch[1] : ipcMatch[1];
535
596
  const schedulesDir = path.join(process.env.PHOTON_SCHEDULES_DIR || path.join(os.homedir(), '.photon', 'schedules'), photonName.replace(/[^a-zA-Z0-9_-]/g, '_'));
536
597
  const filePath = path.join(schedulesDir, `${taskId}.json`);
537
598
  try {
@@ -547,6 +608,152 @@ function updatePersistedSchedule(jobId, photonName, updates) {
547
608
  // File may have been removed — ignore
548
609
  }
549
610
  }
611
+ /** Persist an IPC-created schedule job to disk for daemon restart recovery */
612
+ function persistIpcSchedule(job) {
613
+ const schedulesDir = path.join(process.env.PHOTON_SCHEDULES_DIR || path.join(os.homedir(), '.photon', 'schedules'), job.photonName.replace(/[^a-zA-Z0-9_-]/g, '_'));
614
+ try {
615
+ fs.mkdirSync(schedulesDir, { recursive: true });
616
+ }
617
+ catch {
618
+ // Directory may already exist
619
+ }
620
+ // Extract taskId from job ID (format: photonName:dirHash:ipc:taskId or photonName:ipc:taskId)
621
+ const match = job.id.match(/:ipc:(.+)$/);
622
+ const taskId = match ? match[1] : job.id;
623
+ const filePath = path.join(schedulesDir, `${taskId}.json`);
624
+ const persisted = {
625
+ id: job.id,
626
+ method: job.method,
627
+ args: job.args || {},
628
+ cron: job.cron,
629
+ photonName: job.photonName,
630
+ workingDir: job.workingDir,
631
+ source: 'ipc',
632
+ status: 'active',
633
+ createdAt: new Date(job.createdAt).toISOString(),
634
+ createdBy: job.createdBy,
635
+ executionCount: job.runCount,
636
+ lastExecutionAt: job.lastRun ? new Date(job.lastRun).toISOString() : null,
637
+ };
638
+ try {
639
+ fs.writeFileSync(filePath, JSON.stringify(persisted, null, 2));
640
+ logger.debug('Persisted IPC schedule', { jobId: job.id, path: filePath });
641
+ }
642
+ catch (err) {
643
+ logger.warn('Failed to persist IPC schedule', {
644
+ jobId: job.id,
645
+ error: getErrorMessage(err),
646
+ });
647
+ }
648
+ }
649
+ /** Delete a persisted IPC schedule file */
650
+ function deletePersistedIpcSchedule(jobId, photonName) {
651
+ const match = jobId.match(/:ipc:(.+)$/);
652
+ if (!match)
653
+ return;
654
+ const taskId = match[1];
655
+ const schedulesDir = path.join(process.env.PHOTON_SCHEDULES_DIR || path.join(os.homedir(), '.photon', 'schedules'), photonName.replace(/[^a-zA-Z0-9_-]/g, '_'));
656
+ const filePath = path.join(schedulesDir, `${taskId}.json`);
657
+ try {
658
+ if (fs.existsSync(filePath)) {
659
+ fs.unlinkSync(filePath);
660
+ logger.debug('Deleted persisted IPC schedule', { jobId, path: filePath });
661
+ }
662
+ }
663
+ catch {
664
+ // Ignore — file may already be gone
665
+ }
666
+ }
667
+ /** Load all persisted schedules from disk on daemon startup */
668
+ function loadAllPersistedSchedules() {
669
+ const baseDir = process.env.PHOTON_SCHEDULES_DIR || path.join(os.homedir(), '.photon', 'schedules');
670
+ if (!fs.existsSync(baseDir))
671
+ return;
672
+ let loadedCount = 0;
673
+ let skippedCount = 0;
674
+ const TTL_DAYS = 30;
675
+ const ttlMs = TTL_DAYS * 24 * 60 * 60 * 1000;
676
+ try {
677
+ const photonDirs = fs.readdirSync(baseDir, { withFileTypes: true });
678
+ for (const dir of photonDirs) {
679
+ if (!dir.isDirectory())
680
+ continue;
681
+ const schedulesPath = path.join(baseDir, dir.name);
682
+ const files = fs.readdirSync(schedulesPath).filter((f) => f.endsWith('.json'));
683
+ for (const file of files) {
684
+ const filePath = path.join(schedulesPath, file);
685
+ try {
686
+ const content = fs.readFileSync(filePath, 'utf-8');
687
+ const task = JSON.parse(content);
688
+ // Skip non-IPC jobs (ScheduleProvider handles its own)
689
+ if (task.source !== 'ipc')
690
+ continue;
691
+ // Validate required fields
692
+ if (!task.id || !task.method || !task.cron || !task.photonName) {
693
+ logger.warn('Skipping invalid persisted schedule', { file: filePath });
694
+ skippedCount++;
695
+ continue;
696
+ }
697
+ // TTL check: skip jobs not executed in 30+ days
698
+ const lastExec = task.lastExecutionAt ? new Date(task.lastExecutionAt).getTime() : 0;
699
+ const created = task.createdAt ? new Date(task.createdAt).getTime() : 0;
700
+ const lastActivity = Math.max(lastExec, created);
701
+ if (lastActivity > 0 && Date.now() - lastActivity > ttlMs) {
702
+ logger.info('Removing expired schedule (TTL)', {
703
+ jobId: task.id,
704
+ lastActivity: new Date(lastActivity).toISOString(),
705
+ });
706
+ try {
707
+ fs.unlinkSync(filePath);
708
+ }
709
+ catch {
710
+ /* ignore */
711
+ }
712
+ skippedCount++;
713
+ continue;
714
+ }
715
+ // Skip if already registered (ScheduleProvider may have loaded it)
716
+ if (scheduledJobs.has(task.id))
717
+ continue;
718
+ const job = {
719
+ id: task.id,
720
+ method: task.method,
721
+ args: task.args || {},
722
+ cron: task.cron,
723
+ runCount: task.executionCount || 0,
724
+ createdAt: created || Date.now(),
725
+ createdBy: task.createdBy,
726
+ photonName: task.photonName,
727
+ workingDir: task.workingDir,
728
+ };
729
+ if (scheduleJob(job)) {
730
+ loadedCount++;
731
+ }
732
+ else {
733
+ logger.warn('Failed to schedule persisted job (invalid cron?)', { jobId: task.id });
734
+ skippedCount++;
735
+ }
736
+ }
737
+ catch (err) {
738
+ logger.warn('Failed to load persisted schedule file', {
739
+ file: filePath,
740
+ error: getErrorMessage(err),
741
+ });
742
+ skippedCount++;
743
+ }
744
+ }
745
+ }
746
+ }
747
+ catch (err) {
748
+ logger.warn('Failed to scan schedules directory', {
749
+ dir: baseDir,
750
+ error: getErrorMessage(err),
751
+ });
752
+ }
753
+ if (loadedCount > 0 || skippedCount > 0) {
754
+ logger.info('Loaded persisted schedules', { loaded: loadedCount, skipped: skippedCount });
755
+ }
756
+ }
550
757
  // ════════════════════════════════════════════════════════════════════════════════
551
758
  // WEBHOOK HTTP SERVER
552
759
  // ════════════════════════════════════════════════════════════════════════════════
@@ -644,6 +851,8 @@ function startWebhookServer(port) {
644
851
  }
645
852
  resolvedMethod = mapped;
646
853
  }
854
+ const webhookKey = compositeKey(photonName);
855
+ trackExecution(webhookKey);
647
856
  try {
648
857
  const session = await sessionManager.getOrCreateSession('webhook', 'webhook');
649
858
  const result = await sessionManager.loader.executeTool(session.instance, resolvedMethod, args);
@@ -665,6 +874,9 @@ function startWebhookServer(port) {
665
874
  res.writeHead(500, { 'Content-Type': 'application/json' });
666
875
  res.end(JSON.stringify({ error: getErrorMessage(error) }));
667
876
  }
877
+ finally {
878
+ untrackExecution(webhookKey);
879
+ }
668
880
  })();
669
881
  });
670
882
  webhookServer.listen(port, () => {
@@ -821,8 +1033,14 @@ async function getOrCreateSessionManager(photonName, photonPath, workingDir) {
821
1033
  const depManager = sessionManagers.get(depKey);
822
1034
  if (!depManager)
823
1035
  throw new Error(`Dependency ${depName} not loaded`);
824
- const loaded = await depManager.getOrLoadInstance('');
825
- return depManager.loader.executeTool(loaded, method, args);
1036
+ trackExecution(depKey);
1037
+ try {
1038
+ const loaded = await depManager.getOrLoadInstance('');
1039
+ return await depManager.loader.executeTool(loaded, method, args);
1040
+ }
1041
+ finally {
1042
+ untrackExecution(depKey);
1043
+ }
826
1044
  };
827
1045
  }
828
1046
  logger.info('Spawning worker thread for @worker photon', { photonName, key });
@@ -852,6 +1070,7 @@ async function getOrCreateSessionManager(photonName, photonPath, workingDir) {
852
1070
  key,
853
1071
  photonPath: pathToUse,
854
1072
  workingDir,
1073
+ ownerPid: daemonOwnershipConfirmed ? process.pid : null,
855
1074
  });
856
1075
  manager = new SessionManager(pathToUse, photonName, idleTimeout, logger.child({ scope: photonName }), workingDir);
857
1076
  // Wire @photon dependency resolver: when this photon's loader encounters
@@ -895,8 +1114,15 @@ async function getOrCreateSessionManager(photonName, photonPath, workingDir) {
895
1114
  prop !== 'on' &&
896
1115
  prop !== 'off') {
897
1116
  return async (params) => {
898
- const latest = depManager.getCurrentInstance(depInstanceKey) ?? loaded;
899
- return depManager.loader.executeTool(latest, prop, params || {});
1117
+ const depExecKey = compositeKey(depName, workingDir);
1118
+ trackExecution(depExecKey);
1119
+ try {
1120
+ const latest = depManager.getCurrentInstance(depInstanceKey) ?? loaded;
1121
+ return await depManager.loader.executeTool(latest, prop, params || {});
1122
+ }
1123
+ finally {
1124
+ untrackExecution(depExecKey);
1125
+ }
900
1126
  };
901
1127
  }
902
1128
  // Bind methods to current instance so `this` resolves correctly
@@ -1107,6 +1333,15 @@ async function handleRequest(request, socket) {
1107
1333
  if (request.type === 'ping') {
1108
1334
  return { type: 'pong', id: request.id };
1109
1335
  }
1336
+ // Reject new commands during shutdown (allow ping for health checks)
1337
+ if (isShuttingDown && request.type !== 'shutdown') {
1338
+ return {
1339
+ type: 'error',
1340
+ id: request.id,
1341
+ error: 'Daemon is shutting down',
1342
+ suggestion: 'Retry after the daemon restarts',
1343
+ };
1344
+ }
1110
1345
  if (request.type === 'status') {
1111
1346
  let totalSessions = 0;
1112
1347
  for (const sm of sessionManagers.values()) {
@@ -1403,9 +1638,16 @@ async function handleRequest(request, socket) {
1403
1638
  suggestion: 'Include photonName in the request payload',
1404
1639
  };
1405
1640
  }
1406
- const existing = scheduledJobs.get(request.jobId);
1641
+ // Generate IPC job ID with workingDir hash to prevent cross-project collisions
1642
+ const dirHash = request.workingDir
1643
+ ? crypto.createHash('sha256').update(request.workingDir).digest('hex').slice(0, 8)
1644
+ : '';
1645
+ const ipcJobId = dirHash
1646
+ ? `${photonName}:${dirHash}:ipc:${request.jobId}`
1647
+ : `${photonName}:ipc:${request.jobId}`;
1648
+ const existing = scheduledJobs.get(ipcJobId);
1407
1649
  const job = {
1408
- id: request.jobId,
1650
+ id: ipcJobId,
1409
1651
  method: request.method,
1410
1652
  args: request.args,
1411
1653
  cron: request.cron,
@@ -1416,6 +1658,9 @@ async function handleRequest(request, socket) {
1416
1658
  workingDir: request.workingDir,
1417
1659
  };
1418
1660
  const scheduled = scheduleJob(job);
1661
+ if (scheduled) {
1662
+ persistIpcSchedule(job);
1663
+ }
1419
1664
  return {
1420
1665
  type: 'result',
1421
1666
  id: request.id,
@@ -1428,8 +1673,28 @@ async function handleRequest(request, socket) {
1428
1673
  // Handle job unscheduling
1429
1674
  if (request.type === 'unschedule') {
1430
1675
  const jobId = request.jobId;
1431
- const unscheduled = unscheduleJob(jobId);
1432
- return { type: 'result', id: request.id, success: true, data: { unscheduled, jobId } };
1676
+ // Try exact match first, then look for IPC-prefixed version
1677
+ let actualJobId = jobId;
1678
+ if (!scheduledJobs.has(jobId)) {
1679
+ // Search for IPC-prefixed job
1680
+ for (const key of scheduledJobs.keys()) {
1681
+ if (key.endsWith(`:ipc:${jobId}`)) {
1682
+ actualJobId = key;
1683
+ break;
1684
+ }
1685
+ }
1686
+ }
1687
+ const job = scheduledJobs.get(actualJobId);
1688
+ const unscheduled = unscheduleJob(actualJobId);
1689
+ if (unscheduled && job) {
1690
+ deletePersistedIpcSchedule(actualJobId, job.photonName);
1691
+ }
1692
+ return {
1693
+ type: 'result',
1694
+ id: request.id,
1695
+ success: true,
1696
+ data: { unscheduled, jobId: actualJobId },
1697
+ };
1433
1698
  }
1434
1699
  // Handle list jobs
1435
1700
  if (request.type === 'list_jobs') {
@@ -1705,7 +1970,14 @@ async function handleRequest(request, socket) {
1705
1970
  // Snapshot state before execution for JSON Patch diffing
1706
1971
  const preSnapshot = await snapshotState(targetInst, photonName);
1707
1972
  const startTime = Date.now();
1708
- const result = await sessionManager.loader.executeTool(targetInst, request.method, request.args || {}, { outputHandler });
1973
+ trackExecution(cmdKey);
1974
+ let result;
1975
+ try {
1976
+ result = await sessionManager.loader.executeTool(targetInst, request.method, request.args || {}, { outputHandler });
1977
+ }
1978
+ finally {
1979
+ untrackExecution(cmdKey);
1980
+ }
1709
1981
  const durationMs = Date.now() - startTime;
1710
1982
  setPromptHandler(null);
1711
1983
  logger.info('Request completed', {
@@ -1791,7 +2063,14 @@ async function handleRequest(request, socket) {
1791
2063
  // Snapshot state before execution for JSON Patch diffing
1792
2064
  const preSnapshot = await snapshotState(session.instance, photonName);
1793
2065
  const startTime = Date.now();
1794
- const result = await sessionManager.loader.executeTool(session.instance, request.method, request.args || {}, { outputHandler });
2066
+ trackExecution(cmdKey);
2067
+ let result;
2068
+ try {
2069
+ result = await sessionManager.loader.executeTool(session.instance, request.method, request.args || {}, { outputHandler });
2070
+ }
2071
+ finally {
2072
+ untrackExecution(cmdKey);
2073
+ }
1795
2074
  const durationMs = Date.now() - startTime;
1796
2075
  setPromptHandler(null);
1797
2076
  logger.info('Request completed', {
@@ -2583,8 +2862,29 @@ function watchStateDir(workingDir) {
2583
2862
  // HOT RELOAD
2584
2863
  // ════════════════════════════════════════════════════════════════════════════════
2585
2864
  async function reloadPhoton(photonName, newPhotonPath, workingDir) {
2865
+ const key = compositeKey(photonName, workingDir);
2866
+ // Reload mutex: prevent concurrent reloads for the same photon
2867
+ // (format-on-save can trigger two rapid file change events)
2868
+ const existing = reloadMutex.get(key);
2869
+ if (existing) {
2870
+ logger.debug('Reload already in progress, waiting...', { photonName, key });
2871
+ await existing;
2872
+ }
2873
+ let mutexResolve;
2874
+ const mutexPromise = new Promise((resolve) => {
2875
+ mutexResolve = resolve;
2876
+ });
2877
+ reloadMutex.set(key, mutexPromise);
2878
+ try {
2879
+ return await doReloadPhoton(photonName, newPhotonPath, workingDir, key);
2880
+ }
2881
+ finally {
2882
+ reloadMutex.delete(key);
2883
+ mutexResolve();
2884
+ }
2885
+ }
2886
+ async function doReloadPhoton(photonName, newPhotonPath, workingDir, key) {
2586
2887
  try {
2587
- const key = compositeKey(photonName, workingDir);
2588
2888
  logger.info('Hot-reloading photon', { photonName, key, path: newPhotonPath });
2589
2889
  // If running in a worker, delegate reload to the worker
2590
2890
  if (workerManager.has(key)) {
@@ -2596,6 +2896,7 @@ async function reloadPhoton(photonName, newPhotonPath, workingDir) {
2596
2896
  worker: true,
2597
2897
  });
2598
2898
  logger.info('Worker photon reloaded', { photonName });
2899
+ workerManager.resetCrashHistory(key);
2599
2900
  }
2600
2901
  else {
2601
2902
  publishToChannel(`system:${photonName}`, {
@@ -2638,6 +2939,30 @@ async function reloadPhoton(photonName, newPhotonPath, workingDir) {
2638
2939
  });
2639
2940
  return { success: false, error: errorMessage };
2640
2941
  }
2942
+ // Drain: wait for in-flight executions to complete before swapping instances
2943
+ const DRAIN_TIMEOUT_MS = 2000;
2944
+ const tracker = activeExecutions.get(key);
2945
+ if (tracker && tracker.count > 0) {
2946
+ logger.info('Draining in-flight executions before reload', {
2947
+ photonName,
2948
+ activeCount: tracker.count,
2949
+ });
2950
+ await Promise.race([
2951
+ new Promise((resolve) => {
2952
+ tracker.drainResolve = resolve;
2953
+ }),
2954
+ new Promise((resolve) => setTimeout(() => {
2955
+ logger.warn('Drain timeout, proceeding with reload', {
2956
+ photonName,
2957
+ activeCount: tracker.count,
2958
+ timeoutMs: DRAIN_TIMEOUT_MS,
2959
+ });
2960
+ resolve();
2961
+ }, DRAIN_TIMEOUT_MS)),
2962
+ ]);
2963
+ // Clean up drain resolve if it was set but not called
2964
+ tracker.drainResolve = undefined;
2965
+ }
2641
2966
  const sessions = sessionManager.getSessions();
2642
2967
  let updatedCount = 0;
2643
2968
  for (const session of sessions) {
@@ -2891,7 +3216,11 @@ function startupWatchPhotons() {
2891
3216
  const manager = await getOrCreateSessionManager(p.name, p.path);
2892
3217
  if (manager) {
2893
3218
  await manager.getOrLoadInstance('');
2894
- logger.info('Eager-loaded lifecycle photon', { name: p.name });
3219
+ logger.info('Eager-loaded lifecycle photon', {
3220
+ name: p.name,
3221
+ photonPath: p.path,
3222
+ ownerPid: process.pid,
3223
+ });
2895
3224
  }
2896
3225
  }
2897
3226
  catch (err) {
@@ -2903,6 +3232,13 @@ function startupWatchPhotons() {
2903
3232
  }
2904
3233
  };
2905
3234
  setTimeout(() => {
3235
+ if (!daemonOwnershipConfirmed) {
3236
+ logger.warn('Skipping eager lifecycle load before exclusive ownership confirmation', {
3237
+ socketPath,
3238
+ currentPid: process.pid,
3239
+ });
3240
+ return;
3241
+ }
2906
3242
  eagerLoad().catch(() => { });
2907
3243
  }, 1000);
2908
3244
  }
@@ -2944,6 +3280,7 @@ function startupWatchPhotons() {
2944
3280
  function startServer() {
2945
3281
  const server = net.createServer((socket) => {
2946
3282
  logger.info('Client connected');
3283
+ connectedSockets.add(socket);
2947
3284
  let buffer = '';
2948
3285
  socket.on('data', (chunk) => {
2949
3286
  void (async () => {
@@ -2975,18 +3312,26 @@ function startServer() {
2975
3312
  });
2976
3313
  socket.on('end', () => {
2977
3314
  logger.info('Client disconnected');
3315
+ connectedSockets.delete(socket);
2978
3316
  cleanupSocketSubscriptions(socket);
2979
3317
  });
2980
3318
  socket.on('error', (error) => {
2981
3319
  logger.warn('Socket error', { error: getErrorMessage(error) });
3320
+ connectedSockets.delete(socket);
2982
3321
  cleanupSocketSubscriptions(socket);
2983
3322
  });
2984
3323
  socket.on('close', () => {
3324
+ connectedSockets.delete(socket);
2985
3325
  cleanupSocketSubscriptions(socket);
2986
3326
  });
2987
3327
  });
3328
+ daemonServer = server;
2988
3329
  server.listen(socketPath, () => {
2989
- logger.info('Global Photon daemon listening', { socketPath, pid: process.pid });
3330
+ logger.info('Global Photon daemon listening', {
3331
+ socketPath,
3332
+ pid: process.pid,
3333
+ ownerPid: process.pid,
3334
+ });
2990
3335
  });
2991
3336
  server.on('error', (error) => {
2992
3337
  logger.error('Server error', { error: getErrorMessage(error) });
@@ -3008,88 +3353,209 @@ function startServer() {
3008
3353
  });
3009
3354
  });
3010
3355
  }
3011
- function shutdown() {
3012
- logger.info('Shutting down global daemon');
3013
- if (idleTimer) {
3014
- clearTimeout(idleTimer);
3356
+ async function claimExclusiveOwnership() {
3357
+ const owner = readOwnerRecord(ownerFile);
3358
+ if (owner && owner.socketPath === socketPath && owner.pid !== process.pid) {
3359
+ if (isPidAlive(owner.pid)) {
3360
+ logger.warn('Sibling daemon detected for socket', {
3361
+ socketPath,
3362
+ currentPid: process.pid,
3363
+ ownerPid: owner.pid,
3364
+ action: 'terminate-stale-owner',
3365
+ });
3366
+ try {
3367
+ process.kill(owner.pid, 'SIGTERM');
3368
+ }
3369
+ catch {
3370
+ // Ignore races with process exit
3371
+ }
3372
+ const exited = await waitForPidExit(owner.pid, 5000);
3373
+ if (!exited) {
3374
+ logger.error('Failed to gain exclusive daemon ownership', {
3375
+ socketPath,
3376
+ currentPid: process.pid,
3377
+ ownerPid: owner.pid,
3378
+ action: 'startup-rejected',
3379
+ });
3380
+ throw new Error(`Could not terminate sibling daemon ${owner.pid}`);
3381
+ }
3382
+ }
3383
+ else {
3384
+ logger.warn('Removing stale daemon owner record', {
3385
+ socketPath,
3386
+ currentPid: process.pid,
3387
+ ownerPid: owner.pid,
3388
+ });
3389
+ }
3390
+ removeOwnerRecord(ownerFile);
3015
3391
  }
3016
- clearInterval(lockCleanupInterval);
3017
- clearInterval(staleMapCleanupInterval);
3018
- for (const timer of jobTimers.values()) {
3019
- clearTimeout(timer);
3392
+ if (process.platform !== 'win32' && fs.existsSync(socketPath)) {
3393
+ const responsive = await isSocketResponsive(socketPath);
3394
+ if (!responsive) {
3395
+ logger.warn('Removing stale daemon socket before listen', {
3396
+ socketPath,
3397
+ currentPid: process.pid,
3398
+ });
3399
+ try {
3400
+ fs.unlinkSync(socketPath);
3401
+ }
3402
+ catch {
3403
+ // Ignore races with other cleanup
3404
+ }
3405
+ }
3020
3406
  }
3021
- jobTimers.clear();
3022
- scheduledJobs.clear();
3023
- activeLocks.clear();
3024
- channelEventBuffers.clear();
3025
- eventLogSeq.clear();
3026
- stateKeysCache.clear();
3027
- // Resolve any pending prompts so promises don't hang
3028
- for (const [_id, pending] of pendingPrompts.entries()) {
3029
- pending.resolve(null);
3030
- }
3031
- pendingPrompts.clear();
3032
- socketPromptIds.clear();
3033
- // Close file watchers and debounce timers
3034
- for (const photonPath of fileWatchers.keys()) {
3035
- unwatchPhotonFile(photonPath);
3036
- }
3037
- // Clean up poll-based watchers (bun fallback)
3038
- for (const timer of pollTimers) {
3039
- clearInterval(timer);
3040
- }
3041
- pollTimers.clear();
3042
- // Terminate all worker threads
3043
- void workerManager.terminateAll().catch((err) => {
3044
- logger.warn('Error terminating workers during shutdown', { error: getErrorMessage(err) });
3407
+ writeOwnerRecord(ownerFile, {
3408
+ pid: process.pid,
3409
+ socketPath,
3410
+ claimedAt: Date.now(),
3045
3411
  });
3046
- for (const manager of sessionManagers.values()) {
3047
- manager.destroy();
3048
- }
3049
- sessionManagers.clear();
3050
- if (webhookServer) {
3051
- webhookServer.close();
3052
- }
3053
- // Only delete the socket if we still own it.
3054
- // After `daemon stop` + `daemon start`, a new daemon may have already created
3055
- // a new socket at this path. Deleting it would orphan the new daemon's listener.
3056
- if (fs.existsSync(socketPath) && process.platform !== 'win32') {
3057
- let weOwnSocket = true;
3412
+ fs.writeFileSync(pidFile, process.pid.toString());
3413
+ daemonOwnershipConfirmed = true;
3414
+ logger.info('Daemon ownership claimed', {
3415
+ socketPath,
3416
+ currentPid: process.pid,
3417
+ ownerPid: process.pid,
3418
+ pidFile,
3419
+ ownerFile,
3420
+ });
3421
+ }
3422
+ function shutdown() {
3423
+ // Guard against multiple shutdown calls (e.g. SIGTERM + SIGINT in quick succession)
3424
+ if (isShuttingDown)
3425
+ return;
3426
+ isShuttingDown = true;
3427
+ logger.info('Shutting down global daemon');
3428
+ // Step 1: Close the listener — stop accepting new connections
3429
+ if (daemonServer) {
3430
+ daemonServer.close();
3431
+ }
3432
+ // Step 2: Broadcast shutdown signal to all connected sockets
3433
+ const shutdownMessage = JSON.stringify({
3434
+ type: 'shutdown',
3435
+ id: 'daemon-shutdown',
3436
+ reason: 'daemon-shutting-down',
3437
+ }) + '\n';
3438
+ for (const socket of connectedSockets) {
3058
3439
  try {
3059
- const pidFile = getDefaultContext().pidFile;
3060
- const pidContent = fs.readFileSync(pidFile, 'utf-8').trim();
3061
- const filePid = parseInt(pidContent, 10);
3062
- if (!isNaN(filePid) && filePid !== process.pid) {
3063
- // PID file points to a different process — new daemon already started
3064
- weOwnSocket = false;
3065
- logger.info('Socket belongs to new daemon, skipping cleanup', {
3066
- ourPid: process.pid,
3067
- newPid: filePid,
3068
- });
3069
- }
3440
+ socket.write(shutdownMessage);
3070
3441
  }
3071
3442
  catch {
3072
- // PID file missing (deleted by stop) — another process may own the socket now.
3073
- // If the socket still exists, it likely belongs to a new daemon. Don't delete.
3074
- weOwnSocket = false;
3443
+ // Socket may already be closed
3444
+ }
3445
+ }
3446
+ // Step 3: Async cleanup with grace period for shutdown message flush
3447
+ void (async () => {
3448
+ // Give sockets 500ms to receive the shutdown message
3449
+ await new Promise((resolve) => setTimeout(resolve, 500));
3450
+ if (idleTimer) {
3451
+ clearTimeout(idleTimer);
3452
+ }
3453
+ clearInterval(lockCleanupInterval);
3454
+ clearInterval(staleMapCleanupInterval);
3455
+ for (const timer of jobTimers.values()) {
3456
+ clearTimeout(timer);
3457
+ }
3458
+ jobTimers.clear();
3459
+ scheduledJobs.clear();
3460
+ activeLocks.clear();
3461
+ channelEventBuffers.clear();
3462
+ eventLogSeq.clear();
3463
+ stateKeysCache.clear();
3464
+ // Resolve any pending prompts so promises don't hang
3465
+ for (const [_id, pending] of pendingPrompts.entries()) {
3466
+ pending.resolve(null);
3467
+ }
3468
+ pendingPrompts.clear();
3469
+ socketPromptIds.clear();
3470
+ // Close file watchers and debounce timers
3471
+ for (const photonPath of fileWatchers.keys()) {
3472
+ unwatchPhotonFile(photonPath);
3473
+ }
3474
+ // Clean up poll-based watchers (bun fallback)
3475
+ for (const timer of pollTimers) {
3476
+ clearInterval(timer);
3477
+ }
3478
+ pollTimers.clear();
3479
+ // Terminate all worker threads FIRST (before session destroy,
3480
+ // so @photon deps in workers still respond during onShutdown)
3481
+ try {
3482
+ await workerManager.terminateAll();
3483
+ }
3484
+ catch (err) {
3485
+ logger.warn('Error terminating workers during shutdown', { error: getErrorMessage(err) });
3486
+ }
3487
+ // Gracefully destroy all session managers (calls onShutdown on instances)
3488
+ await Promise.allSettled(Array.from(sessionManagers.values()).map((m) => m.destroyGraceful()));
3489
+ sessionManagers.clear();
3490
+ if (webhookServer) {
3491
+ webhookServer.close();
3075
3492
  }
3076
- if (weOwnSocket) {
3493
+ if (daemonOwnershipConfirmed) {
3494
+ const owner = readOwnerRecord(ownerFile);
3495
+ if (owner?.pid === process.pid && owner.socketPath === socketPath) {
3496
+ removeOwnerRecord(ownerFile, process.pid);
3497
+ }
3077
3498
  try {
3078
- fs.unlinkSync(socketPath);
3499
+ const pidContent = fs.readFileSync(pidFile, 'utf-8').trim();
3500
+ if (parseInt(pidContent, 10) === process.pid) {
3501
+ fs.unlinkSync(pidFile);
3502
+ }
3079
3503
  }
3080
3504
  catch {
3081
- // Ignore cleanup errors
3505
+ // Ignore missing pid file
3082
3506
  }
3083
3507
  }
3084
- }
3085
- process.exit(0);
3508
+ // Only delete the socket if we still own it.
3509
+ // After `daemon stop` + `daemon start`, a new daemon may have already created
3510
+ // a new socket at this path. Deleting it would orphan the new daemon's listener.
3511
+ if (fs.existsSync(socketPath) && process.platform !== 'win32') {
3512
+ let weOwnSocket = true;
3513
+ try {
3514
+ const pidContent = fs.readFileSync(pidFile, 'utf-8').trim();
3515
+ const filePid = parseInt(pidContent, 10);
3516
+ if (!isNaN(filePid) && filePid !== process.pid) {
3517
+ // PID file points to a different process — new daemon already started
3518
+ weOwnSocket = false;
3519
+ logger.info('Socket belongs to new daemon, skipping cleanup', {
3520
+ ourPid: process.pid,
3521
+ newPid: filePid,
3522
+ });
3523
+ }
3524
+ }
3525
+ catch {
3526
+ // PID file missing (deleted by stop) — another process may own the socket now.
3527
+ // If the socket still exists, it likely belongs to a new daemon. Don't delete.
3528
+ weOwnSocket = false;
3529
+ }
3530
+ if (weOwnSocket) {
3531
+ try {
3532
+ fs.unlinkSync(socketPath);
3533
+ }
3534
+ catch {
3535
+ // Ignore cleanup errors
3536
+ }
3537
+ }
3538
+ }
3539
+ process.exit(0);
3540
+ })();
3086
3541
  }
3087
3542
  // Main execution
3088
- (() => {
3543
+ void (async () => {
3544
+ await claimExclusiveOwnership();
3089
3545
  startupWatchPhotons();
3090
3546
  startServer();
3547
+ loadAllPersistedSchedules();
3091
3548
  startWebhookServer(WEBHOOK_PORT);
3092
3549
  startIdleTimer();
3093
3550
  startHealthMonitor();
3094
- })();
3551
+ // Notify photons that any locks from a prior daemon session are gone
3552
+ publishToChannel('system:*', {
3553
+ event: 'locks-reset',
3554
+ reason: 'daemon-startup',
3555
+ timestamp: Date.now(),
3556
+ });
3557
+ })().catch((err) => {
3558
+ logger.error('Daemon startup failed', { error: getErrorMessage(err) });
3559
+ process.exit(1);
3560
+ });
3095
3561
  //# sourceMappingURL=server.js.map