git-watchtower 1.12.3 → 1.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/git-watchtower.js +112 -44
  2. package/package.json +1 -1
@@ -101,7 +101,7 @@ const store = new Store();
101
101
 
102
102
  // Web dashboard server
103
103
  const { WebDashboardServer } = require('../src/server/web');
104
- const { Coordinator, Worker, generateProjectId, getActiveCoordinator, tryAcquireLock, finalizeLock, removeLock, removeSocket } = require('../src/server/coordinator');
104
+ const { Coordinator, Worker, generateProjectId, getActiveCoordinator, tryAcquireLock, finalizeLock, removeLock, removeSocket, isProcessAlive } = require('../src/server/coordinator');
105
105
 
106
106
  const PROJECT_ROOT = process.cwd();
107
107
 
@@ -802,7 +802,7 @@ const { ansi, box, truncate, sparkline: uiSparkline, visibleLength, stripAnsi, p
802
802
 
803
803
  // Error detection utilities imported from src/utils/errors.js
804
804
  const { ErrorHandler, isAuthError, isMergeConflict, isNetworkError } = require('../src/utils/errors');
805
- const { Mutex } = require('../src/utils/async');
805
+ const { Mutex, sleep } = require('../src/utils/async');
806
806
 
807
807
  // Keyboard handling utilities imported from src/ui/keybindings.js
808
808
  const { filterBranches } = require('../src/ui/keybindings');
@@ -3081,6 +3081,50 @@ async function handleWebAction(action, payload) {
3081
3081
  }
3082
3082
  }
3083
3083
 
3084
+ /**
3085
+ * Maximum attempts to connect to an existing coordinator as a worker
3086
+ * before giving up (or reclaiming the lock if the coordinator is dead).
3087
+ */
3088
+ const WORKER_CONNECT_MAX_ATTEMPTS = 3;
3089
+
3090
+ /**
3091
+ * Base delay for exponential backoff between worker-connect attempts (ms).
3092
+ * Delays are 200ms, 400ms — total added latency ~600ms in the worst case.
3093
+ */
3094
+ const WORKER_CONNECT_BASE_DELAY_MS = 200;
3095
+
3096
+ /**
3097
+ * Attempt to connect to an existing coordinator as a worker, with bounded
3098
+ * exponential backoff. Returns the connected Worker on success, or null if
3099
+ * every attempt failed. Between attempts, if the coordinator's process is
3100
+ * no longer alive, we stop retrying so the caller can reclaim the lock.
3101
+ *
3102
+ * @param {{pid: number, port: number, socketPath: string}} existing - Coordinator lock info
3103
+ * @param {string} projectIdArg - Project ID for worker registration
3104
+ * @returns {Promise<Worker|null>}
3105
+ */
3106
+ async function connectWorkerWithRetry(existing, projectIdArg) {
3107
+ for (let attempt = 1; attempt <= WORKER_CONNECT_MAX_ATTEMPTS; attempt++) {
3108
+ try {
3109
+ const w = new Worker({
3110
+ id: projectIdArg,
3111
+ projectPath: PROJECT_ROOT,
3112
+ projectName: path.basename(PROJECT_ROOT),
3113
+ socketPath: existing.socketPath,
3114
+ });
3115
+ w.onCommand = (action, payload) => handleWebAction(action, payload);
3116
+ await w.connect();
3117
+ return w;
3118
+ } catch (err) {
3119
+ if (attempt >= WORKER_CONNECT_MAX_ATTEMPTS) return null;
3120
+ // Stop early if the coordinator has exited — caller will reclaim.
3121
+ if (!isProcessAlive(existing.pid)) return null;
3122
+ await sleep(WORKER_CONNECT_BASE_DELAY_MS * Math.pow(2, attempt - 1));
3123
+ }
3124
+ }
3125
+ return null;
3126
+ }
3127
+
3084
3128
  /**
3085
3129
  * Create and start the web dashboard, with coordinator support.
3086
3130
  * @param {boolean} openBrowser - Whether to auto-open the browser
@@ -3108,51 +3152,75 @@ async function startWebDashboard(openBrowser) {
3108
3152
  // already owns the lock, connect as a worker instead. This prevents a
3109
3153
  // TOCTOU race where two instances both pass a "no coordinator" check and
3110
3154
  // then clobber each other's socket in Coordinator.start().
3111
- const lockResult = tryAcquireLock(process.pid);
3155
+ //
3156
+ // The outer loop runs at most twice so we can reclaim the coordinator
3157
+ // role if the existing coordinator dies while we're retrying the worker
3158
+ // handshake (e.g. it crashed just before we attached). Without this, a
3159
+ // transient connect failure (peer not yet accepting, EPIPE, slow fork)
3160
+ // against a coordinator that later crashes would leave us with no web
3161
+ // dashboard even though we could safely take over.
3162
+ let acquired = false;
3163
+ let existing = null;
3164
+ for (let outer = 0; outer < 2 && !acquired; outer++) {
3165
+ const lockResult = tryAcquireLock(process.pid);
3166
+ if (lockResult.acquired) {
3167
+ acquired = true;
3168
+ break;
3169
+ }
3112
3170
 
3113
- if (!lockResult.acquired) {
3114
- const existing = lockResult.existing || getActiveCoordinator();
3115
- if (existing) {
3116
- try {
3117
- worker = new Worker({
3118
- id: projectId,
3119
- projectPath: PROJECT_ROOT,
3120
- projectName: path.basename(PROJECT_ROOT),
3121
- socketPath: existing.socketPath,
3122
- });
3123
- worker.onCommand = (action, payload) => handleWebAction(action, payload);
3124
- await worker.connect();
3125
- addLog(`Joined web dashboard at ${localhostUrl(existing.port)} (tab)`, 'success');
3126
-
3127
- // Push state periodically
3128
- webStateInterval = setInterval(() => {
3129
- if (worker && worker.isConnected()) {
3130
- worker.pushState(webDashboard.getSerializableState());
3131
- } else {
3132
- clearInterval(webStateInterval);
3133
- webStateInterval = null;
3134
- }
3135
- }, 500);
3171
+ existing = lockResult.existing || getActiveCoordinator();
3172
+ if (!existing) {
3173
+ // Lock exists but we couldn't claim it and couldn't read the owner.
3174
+ // Bail out rather than race a concurrent startup.
3175
+ addLog('Web dashboard unavailable: could not acquire coordinator lock', 'error');
3176
+ webDashboard = null;
3177
+ render();
3178
+ return;
3179
+ }
3136
3180
 
3137
- // Don't start our own server piggyback on the coordinator's.
3138
- // Don't open browser either the existing tab will show this project automatically.
3139
- WEB_PORT = existing.port;
3140
- render();
3141
- return;
3142
- } catch (err) {
3143
- // Another coordinator owns the lock but we can't talk to it. Do NOT
3144
- // take over (that would unlink the live coordinator's socket). Run
3145
- // without a web dashboard for this instance.
3146
- worker = null;
3147
- addLog(`Could not join web dashboard at ${localhostUrl(existing.port)}: ${err.message}`, 'error');
3148
- webDashboard = null;
3149
- render();
3150
- return;
3151
- }
3181
+ // Try to connect as a worker with bounded retry + exponential backoff.
3182
+ // The coordinator may still be finishing its bind after finalizeLock()
3183
+ // writes the real socket path, or temporarily unresponsive.
3184
+ const connectedWorker = await connectWorkerWithRetry(existing, projectId);
3185
+ if (connectedWorker) {
3186
+ worker = connectedWorker;
3187
+ addLog(`Joined web dashboard at ${localhostUrl(existing.port)} (tab)`, 'success');
3188
+
3189
+ // Push state periodically
3190
+ webStateInterval = setInterval(() => {
3191
+ if (worker && worker.isConnected()) {
3192
+ worker.pushState(webDashboard.getSerializableState());
3193
+ } else {
3194
+ clearInterval(webStateInterval);
3195
+ webStateInterval = null;
3196
+ }
3197
+ }, 500);
3198
+
3199
+ // Don't start our own server — piggyback on the coordinator's.
3200
+ // Don't open browser either — the existing tab will show this project automatically.
3201
+ WEB_PORT = existing.port;
3202
+ render();
3203
+ return;
3152
3204
  }
3153
- // Lock exists but we couldn't claim it and couldn't read the owner.
3154
- // Bail out rather than race a concurrent startup.
3155
- addLog('Web dashboard unavailable: could not acquire coordinator lock', 'error');
3205
+
3206
+ // Every connect attempt failed. If the coordinator process died while
3207
+ // we were retrying, clean up the stale lock/socket and loop once to
3208
+ // claim the coordinator role ourselves. Otherwise abort — do NOT take
3209
+ // over a live coordinator's socket.
3210
+ if (!isProcessAlive(existing.pid)) {
3211
+ removeLock();
3212
+ removeSocket();
3213
+ continue;
3214
+ }
3215
+
3216
+ addLog(`Could not join web dashboard at ${localhostUrl(existing.port)}: coordinator unreachable`, 'error');
3217
+ webDashboard = null;
3218
+ render();
3219
+ return;
3220
+ }
3221
+
3222
+ if (!acquired) {
3223
+ addLog('Web dashboard unavailable: could not acquire coordinator lock after retry', 'error');
3156
3224
  webDashboard = null;
3157
3225
  render();
3158
3226
  return;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "git-watchtower",
3
- "version": "1.12.3",
3
+ "version": "1.12.4",
4
4
  "description": "Terminal-based Git branch monitor with activity sparklines and optional dev server with live reload",
5
5
  "main": "bin/git-watchtower.js",
6
6
  "bin": {