collab-runtime 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. collab/__init__.py +77 -0
  2. collab/__main__.py +11 -0
  3. collab_runtime-0.2.9.dist-info/METADATA +218 -0
  4. collab_runtime-0.2.9.dist-info/RECORD +82 -0
  5. collab_runtime-0.2.9.dist-info/WHEEL +5 -0
  6. collab_runtime-0.2.9.dist-info/entry_points.txt +3 -0
  7. collab_runtime-0.2.9.dist-info/licenses/LICENSE +21 -0
  8. collab_runtime-0.2.9.dist-info/top_level.txt +10 -0
  9. scripts/cleanup.py +395 -0
  10. scripts/collab_git_hook.py +190 -0
  11. scripts/format_code.py +594 -0
  12. scripts/generate_tests.py +560 -0
  13. scripts/validate_code.py +1397 -0
  14. src/__init__.py +4 -0
  15. src/dashboard/index.html +1131 -0
  16. src/live_locks_watcher.py +1982 -0
  17. src/lock_client.py +4268 -0
  18. src/logging_config.py +259 -0
  19. src/main.py +436 -0
  20. tests/backend/__init__.py +0 -0
  21. tests/backend/functional/__init__.py +0 -0
  22. tests/backend/functional/test_package_imports.py +43 -0
  23. tests/backend/integration/__init__.py +0 -0
  24. tests/backend/integration/test_cli_contract_parity.py +220 -0
  25. tests/backend/performance/__init__.py +0 -0
  26. tests/backend/reliability/__init__.py +0 -0
  27. tests/backend/security/__init__.py +0 -0
  28. tests/backend/unit/live_locks_watcher/__init__.py +5 -0
  29. tests/backend/unit/live_locks_watcher/_helpers.py +123 -0
  30. tests/backend/unit/live_locks_watcher/conftest.py +18 -0
  31. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_dashboard.py +188 -0
  32. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_developer.py +56 -0
  33. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_graceful_shutdown.py +459 -0
  34. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_main.py +1925 -0
  35. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_module.py +187 -0
  36. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_multi_session.py +320 -0
  37. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_notify.py +67 -0
  38. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_parsing.py +155 -0
  39. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_process_helpers.py +684 -0
  40. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_processing.py +173 -0
  41. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_prompt_abort.py +71 -0
  42. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_reconcile.py +516 -0
  43. tests/backend/unit/live_locks_watcher/test_live_locks_watcher_scan.py +296 -0
  44. tests/backend/unit/lock_client/__init__.py +1 -0
  45. tests/backend/unit/lock_client/_helpers.py +132 -0
  46. tests/backend/unit/lock_client/test_lock_client_acquire.py +214 -0
  47. tests/backend/unit/lock_client/test_lock_client_active.py +104 -0
  48. tests/backend/unit/lock_client/test_lock_client_api.py +63 -0
  49. tests/backend/unit/lock_client/test_lock_client_cli.py +682 -0
  50. tests/backend/unit/lock_client/test_lock_client_daemon.py +3730 -0
  51. tests/backend/unit/lock_client/test_lock_client_dashboard.py +438 -0
  52. tests/backend/unit/lock_client/test_lock_client_discover.py +241 -0
  53. tests/backend/unit/lock_client/test_lock_client_force_release.py +354 -0
  54. tests/backend/unit/lock_client/test_lock_client_helper_branches.py +1890 -0
  55. tests/backend/unit/lock_client/test_lock_client_history.py +301 -0
  56. tests/backend/unit/lock_client/test_lock_client_isolation.py +316 -0
  57. tests/backend/unit/lock_client/test_lock_client_pid.py +75 -0
  58. tests/backend/unit/lock_client/test_lock_client_reconcile.py +464 -0
  59. tests/backend/unit/lock_client/test_lock_client_release.py +77 -0
  60. tests/backend/unit/lock_client/test_lock_client_shutdown.py +1110 -0
  61. tests/backend/unit/lock_client/test_lock_client_utils.py +474 -0
  62. tests/backend/unit/lock_client/test_lock_client_watch.py +866 -0
  63. tests/backend/unit/scripts/__init__.py +1 -0
  64. tests/backend/unit/scripts/_helpers.py +42 -0
  65. tests/backend/unit/scripts/test_cleanup.py +285 -0
  66. tests/backend/unit/scripts/test_collab_git_hook.py +280 -0
  67. tests/backend/unit/scripts/test_collab_git_hook_ported.py +50 -0
  68. tests/backend/unit/scripts/test_format_code.py +368 -0
  69. tests/backend/unit/scripts/test_format_code_ported.py +177 -0
  70. tests/backend/unit/scripts/test_generate_tests.py +305 -0
  71. tests/backend/unit/scripts/test_hook_templates.py +357 -0
  72. tests/backend/unit/scripts/test_setup_hook_overlay.py +95 -0
  73. tests/backend/unit/scripts/test_validate_code.py +867 -0
  74. tests/backend/unit/scripts/test_validate_code_ported.py +237 -0
  75. tests/backend/unit/test_entrypoints_main_run.py +83 -0
  76. tests/backend/unit/test_logging_config.py +529 -0
  77. tests/backend/unit/test_main_watch_pid_file.py +278 -0
  78. tests/conftest.py +167 -0
  79. tests/frontend/__init__.py +0 -0
  80. tests/frontend/jest/__init__.py +0 -0
  81. tests/frontend/playwright/__init__.py +0 -0
  82. tests/packaging/test_smoke_install.py +76 -0
src/lock_client.py ADDED
@@ -0,0 +1,4268 @@
1
+ """Supabase-backed collaborative file lock client.
2
+
3
+ Provides atomic lock acquisition, release, and daemon management for preventing merge
4
+ conflicts in multi-developer workflows.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import atexit
10
+ import hashlib
11
+ import json
12
+ import logging
13
+ import os
14
+ import re
15
+ import shutil
16
+ import signal
17
+ import socket
18
+ import subprocess
19
+ import sys
20
+ import tempfile
21
+ import threading
22
+ import time
23
+ import uuid
24
+ from contextlib import contextmanager
25
+ from datetime import datetime, timedelta, timezone
26
+ from typing import Any, Dict, List, Optional, Tuple
27
+
28
+ from dotenv import load_dotenv
29
+
30
+ # CLI entrypoint (collab = "src.lock_client:main" in pyproject.toml).
31
+ # Main orchestration is in src/main.py; import here for backward compatibility.
32
+ from .main import _run_cli, main # noqa: F401
33
+
34
+
35
+ def _safe_now() -> datetime:
36
+ """Return the current datetime using the (possibly monkeypatched) ``datetime``
37
+ symbol imported into this module.
38
+
39
+ Tests patch ``datetime`` with a fake class/instance and some replacement objects may
40
+ present a ``now`` attribute that behaves oddly when bound. This helper attempts to
41
+ call the patched ``now`` safely and falls back to the real datetime on failure.
42
+ """
43
+ try:
44
+ return datetime.now()
45
+ except TypeError:
46
+ # If the patched datetime is an instance, try to fetch the class-level
47
+ # attribute and call it as an unbound function (avoids implicit binding)
48
+ try:
49
+ cls = datetime if isinstance(datetime, type) else datetime.__class__
50
+ now_attr = getattr(cls, "now", None)
51
+ if callable(now_attr):
52
+ # Call the class-level now and ensure we return a real datetime
53
+ try:
54
+ res = now_attr()
55
+ except TypeError:
56
+ # If calling now as an unbound function failed, continue to fallback
57
+ res = None
58
+ # Use the real stdlib datetime type for isinstance checks to avoid
59
+ # confusion when the module-level `datetime` has been monkeypatched
60
+ from datetime import datetime as _real_dt
61
+
62
+ if isinstance(res, _real_dt):
63
+ return res
64
+ except Exception:
65
+ pass
66
+ # Last-resort: use the real datetime type from the stdlib
67
+ from datetime import datetime as _real_dt
68
+
69
+ return _real_dt.now()
70
+
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Logging configuration
74
+ # ---------------------------------------------------------------------------
75
+ logger = logging.getLogger("collab.lock_client")
76
+
77
+
78
+ def _emit_log_resilient(log: logging.Logger, level: int, msg: str, *args: Any) -> None:
79
+ """Emit a log record while tolerating interpreter-shutdown handler states.
80
+
81
+ Daemon threads can outlive normal application flow, and by the time they log, some
82
+ handlers may already have closed streams. Python's logging module reports those as
83
+ noisy "Logging error" tracebacks. This helper keeps normal logging behavior for
84
+ healthy handlers, skips closed streams, and suppresses handler-level failures.
85
+ """
86
+ try:
87
+ if log.disabled or level < log.getEffectiveLevel():
88
+ return
89
+
90
+ record = log.makeRecord(
91
+ log.name,
92
+ level,
93
+ __file__,
94
+ 0,
95
+ msg,
96
+ args,
97
+ None,
98
+ None,
99
+ None,
100
+ )
101
+
102
+ current: Optional[logging.Logger] = log
103
+ emitted = False
104
+ while current is not None:
105
+ for handler in current.handlers:
106
+ try:
107
+ if record.levelno < handler.level:
108
+ continue
109
+ if not handler.filter(record):
110
+ continue
111
+ stream = getattr(handler, "stream", None)
112
+ if stream is not None and getattr(stream, "closed", False):
113
+ continue
114
+ handler.handle(record)
115
+ emitted = True
116
+ except Exception:
117
+ # Best-effort: never let late-shutdown logging fail noisily.
118
+ continue
119
+
120
+ if not current.propagate:
121
+ break
122
+ current = current.parent
123
+
124
+ if not emitted:
125
+ # Last fallback for debugging sessions with no available handlers.
126
+ try:
127
+ if sys.stderr is not None and not sys.stderr.closed:
128
+ sys.stderr.write(f"{record.levelname}: {record.getMessage()}\n")
129
+ except Exception:
130
+ pass
131
+ except Exception:
132
+ pass
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # Environment
137
+ # ---------------------------------------------------------------------------
138
+ _THIS_DIR = os.path.dirname(os.path.abspath(__file__))
139
+
140
+
141
+ def _read_clean_env_path(name: str) -> Optional[str]:
142
+ """Return a sanitized path-like environment override.
143
+
144
+ Treat empty values and comment-only values as unset. Inline comments are stripped so
145
+ values like ``X=path # comment`` remain usable.
146
+ """
147
+ raw = os.getenv(name)
148
+ if raw is None:
149
+ return None
150
+ cleaned = raw.strip()
151
+ if not cleaned:
152
+ return None
153
+ if "#" in cleaned:
154
+ cleaned = cleaned.split("#", 1)[0].strip()
155
+ if not cleaned or cleaned.startswith("#"):
156
+ return None
157
+ return cleaned
158
+
159
+
160
+ def _resolve_project_root() -> str:
161
+ """Resolve project root for runtime operations.
162
+
163
+ Priority:
164
+ 1) COLLAB_PROJECT_ROOT env var
165
+ 2) Current working directory
166
+ """
167
+ override = _read_clean_env_path("COLLAB_PROJECT_ROOT")
168
+ if override:
169
+ return os.path.abspath(override)
170
+ return os.path.abspath(os.getcwd())
171
+
172
+
173
+ # Resolve project root first — used by state-dir helpers below
174
+ _PROJECT_ROOT = _resolve_project_root()
175
+
176
+
177
+ def _is_test_mode() -> bool:
178
+ """Return True when running under pytest/test harness context."""
179
+ return (
180
+ os.getenv("COLLAB_TEST_MODE") == "1"
181
+ or os.getenv("TESTING") == "1"
182
+ or "PYTEST_CURRENT_TEST" in os.environ
183
+ )
184
+
185
+
186
+ def _get_state_dir() -> str:
187
+ """Return a per-workspace state directory outside the repo for non-essential runtime
188
+ markers (heartbeat, shutdown marker, startup summary). This avoids creating
189
+ transient files inside the workspace tree.
190
+
191
+ The location can be overridden with the `COLLAB_STATE_DIR` env var for testing or
192
+ custom setups.
193
+ """
194
+ state_dir = _read_clean_env_path("COLLAB_STATE_DIR")
195
+ if state_dir:
196
+ try:
197
+ os.makedirs(state_dir, exist_ok=True)
198
+ except Exception:
199
+ pass
200
+ return os.path.abspath(str(state_dir))
201
+
202
+ try:
203
+ import hashlib as _hashlib
204
+ import tempfile as _tempfile
205
+
206
+ # Normalize slashes and case for cross-runtime consistency (CLI vs Extension)
207
+ norm_root = _PROJECT_ROOT.replace("/", "\\").lower().rstrip("\\")
208
+ h = _hashlib.sha1(norm_root.encode("utf-8"), usedforsecurity=False).hexdigest()[
209
+ :8
210
+ ]
211
+ base_tmp = _tempfile.gettempdir()
212
+ # Use a collab-specific namespace for runtime state dirs.
213
+ current_prefix = "collab_runtime"
214
+ if _is_test_mode():
215
+ sd = os.path.join(base_tmp, f"{current_prefix}_{h}_test_{os.getpid()}")
216
+ else:
217
+ sd = os.path.join(base_tmp, f"{current_prefix}_{h}")
218
+
219
+ try:
220
+ os.makedirs(sd, exist_ok=True)
221
+ except Exception:
222
+ pass
223
+ return os.path.abspath(str(sd))
224
+ except Exception:
225
+ # Fallback: prefer the configured runtime root if available (keeps
226
+ # backwards-compatible test and import-time semantics), otherwise
227
+ # fall back to the project root or current working directory.
228
+ try:
229
+ fallback = globals().get("_COLLAB_ROOT")
230
+ if fallback:
231
+ return os.path.abspath(str(fallback))
232
+ except Exception:
233
+ pass
234
+ try:
235
+ return os.path.abspath(_PROJECT_ROOT)
236
+ except Exception:
237
+ return os.getcwd()
238
+
239
+
240
+ def _resolve_runtime_root(project_root: str) -> str:
241
+ """Resolve persistent runtime root for the current project.
242
+
243
+ Preference order:
244
+ 1. `COLLAB_HOME` env override
245
+ 2. Fallback to project root
246
+ """
247
+ home_override = _read_clean_env_path("COLLAB_HOME")
248
+ if home_override:
249
+ return os.path.abspath(home_override)
250
+
251
+ # Fallback to state dir for backwards compatibility in tests/custom setups
252
+ state_override = _read_clean_env_path("COLLAB_STATE_DIR")
253
+ if state_override:
254
+ return os.path.abspath(state_override)
255
+
256
+ return project_root
257
+
258
+
259
+ _COLLAB_ROOT = _resolve_runtime_root(_PROJECT_ROOT)
260
+ _RESOURCE_ROOT = _THIS_DIR
261
+ os.makedirs(_COLLAB_ROOT, exist_ok=True)
262
+
263
+
264
+ def _state_path(name: str) -> str:
265
+ # Ensure we use the normalized state directory
266
+ return os.path.join(_get_state_dir(), name)
267
+
268
+
269
+ def _resolve_executable_path(name: str) -> Optional[str]:
270
+ """Return an absolute executable path from PATH.
271
+
272
+ In explicit test mode only, fall back to the command name so unit tests can
273
+ monkeypatch subprocess calls without depending on host PATH contents.
274
+
275
+ Note: On Windows/Linux platform mismatches (e.g., running tests on Linux
276
+ that test Windows executables), shutil.which() may fail trying to check
277
+ Windows-specific APIs. We catch and gracefully degrade in that case.
278
+ """
279
+ try:
280
+ resolved = shutil.which(name)
281
+ except (AttributeError, OSError, ValueError):
282
+ # Platform mismatch (e.g., testing Windows code on Linux).
283
+ # shutil.which() tried to call _winapi functions that don't exist.
284
+ # Fall back as if the executable wasn't found.
285
+ resolved = None
286
+
287
+ if not resolved:
288
+ if _is_test_mode():
289
+ return name
290
+ return None
291
+ return os.path.abspath(resolved)
292
+
293
+
294
+ # Load .env from the project root (never modify .env)
295
+ load_dotenv(os.path.join(_PROJECT_ROOT, ".env"))
296
+
297
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
298
+ SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY")
299
+ SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
300
+ LOCK_STRICT = os.getenv("LOCK_STRICT", "0") == "1"
301
+
302
+ # Expiry semantics: this project enforces NO automatic expiry. Locks persist
303
+ # until released explicitly. The DB RPC ignores time-based expiry; the
304
+ # expires_at column is kept for audit but is not used for automatic
305
+ # replacement. Clients do not send an expires_at value.
306
+
307
+ # Developer id prefixes treated as ephemeral (do not persist locks to the DB).
308
+ # Enforced in code (not configurable via .env) to avoid accidental skips.
309
+ EPHEMERAL_PREFIXES = ["test_dev", "ci"]
310
+
311
+ # (Intentionally no repo-level toggle) Do not expose a runtime flag to
312
+ # enable/disable runtime-path locking.
313
+
314
+ # PID file lives at project root unless overridden.
315
+ # Tests can override this via COLLAB_PID_FILE env var to avoid interfering with
316
+ # the live production watcher.
317
+ # PID file location (transient state)
318
+ PID_FILE = os.getenv("COLLAB_PID_FILE") or os.path.join(_get_state_dir(), ".daemon.pid")
319
+
320
+ # Maximum retry attempts for network errors
321
+ MAX_RETRIES = 3
322
+
323
+ # ---------------------------------------------------------------------------
324
+ # Supabase client (lazy import)
325
+ # ---------------------------------------------------------------------------
326
+ _supabase_create_client = None
327
+
328
+
329
+ def _is_installed_package_origin(origin_abs: str) -> bool:
330
+ """Return True when an import origin points to an installed package location."""
331
+ origin_norm = os.path.normcase(origin_abs)
332
+ return (
333
+ f"{os.sep}site-packages{os.sep}" in origin_norm
334
+ or f"{os.sep}dist-packages{os.sep}" in origin_norm
335
+ )
336
+
337
+
338
+ def _get_create_client():
339
+ """Lazy-load the supabase create_client function."""
340
+ global _supabase_create_client
341
+ if _supabase_create_client is None:
342
+ # First: if tests or other harnesses have injected a fake module into
343
+ # ``sys.modules['supabase']``, prefer that module. Tests commonly
344
+ # monkeypatch sys.modules rather than relying on import machinery, and
345
+ # failing here causes fragile tests. If the injected module exposes a
346
+ # ``create_client`` symbol it will be used. If the injected module has
347
+ # a __file__ located inside the repository, treat that as accidental
348
+ # local shadowing and fail fast with a clear message.
349
+ supa_mod = sys.modules.get("supabase")
350
+ if supa_mod is not None:
351
+ # Honour any test-level import-time failures: if the import
352
+ # machinery (builtins.__import__) has been monkeypatched to raise
353
+ # ImportError for 'supabase' we should respect that and exit so
354
+ # tests that simulate missing packages behave deterministically.
355
+ try:
356
+ __import__("supabase")
357
+ except ImportError:
358
+ logger.error(
359
+ "supabase-py is not installed (import failed). "
360
+ "Install it with: pip install supabase"
361
+ )
362
+ sys.exit(1)
363
+
364
+ origin = None
365
+ try:
366
+ spec = getattr(supa_mod, "__spec__", None)
367
+ spec_origin = getattr(spec, "origin", None) if spec else None
368
+ origin = spec_origin or getattr(supa_mod, "__file__", None)
369
+ except Exception:
370
+ origin = None
371
+
372
+ try:
373
+ if origin:
374
+ origin_abs = os.path.abspath(origin)
375
+ is_repo_shadow = origin_abs.startswith(
376
+ _COLLAB_ROOT
377
+ ) and not _is_installed_package_origin(origin_abs)
378
+ if is_repo_shadow:
379
+ logger.error(
380
+ "Detected local module 'supabase' at %s "
381
+ "which shadows the installed package.",
382
+ origin_abs,
383
+ )
384
+ logger.error(
385
+ "Remove or rename this file/folder and re-run "
386
+ "tests / watcher."
387
+ )
388
+ sys.exit(1)
389
+ except Exception:
390
+ # Defensive: any unexpected error inspecting the fake module
391
+ # should not break tests; fall through and attempt to use it.
392
+ pass
393
+
394
+ create_fn = getattr(supa_mod, "create_client", None)
395
+ if create_fn is None:
396
+ logger.error(
397
+ "The 'supabase' module present in sys.modules "
398
+ "does not expose 'create_client'."
399
+ )
400
+ logger.error(
401
+ "If this is a test, ensure your fake module "
402
+ "provides 'create_client'."
403
+ )
404
+ sys.exit(1)
405
+
406
+ _supabase_create_client = create_fn
407
+ return _supabase_create_client
408
+
409
+ # No preloaded module in sys.modules — fall back to importing the
410
+ # real package. If it is missing, fail loudly with a helpful message.
411
+ try:
412
+ # This will call the import machinery and raise ImportError if
413
+ # the package is not available or tests have patched __import__.
414
+ from supabase import create_client as create_fn
415
+ except ImportError:
416
+ logger.error(
417
+ "supabase-py is not installed. Install it with: pip install supabase\n"
418
+ "See .env.example for required environment variables."
419
+ )
420
+ sys.exit(1)
421
+
422
+ # After a successful import, detect if the resolved module originates
423
+ # from the repository (e.g. supabase.py) which would indicate
424
+ # an accidental shadowing of the real package.
425
+ supa_mod = sys.modules.get("supabase")
426
+ spec_origin = None
427
+ if supa_mod is not None:
428
+ spec = getattr(supa_mod, "__spec__", None)
429
+ spec_origin = getattr(spec, "origin", None) if spec else None
430
+
431
+ if supa_mod is not None:
432
+ origin = spec_origin or getattr(supa_mod, "__file__", None)
433
+ else:
434
+ origin = None
435
+
436
+ try:
437
+ if origin:
438
+ origin_abs = os.path.abspath(origin)
439
+ is_repo_shadow = origin_abs.startswith(
440
+ _COLLAB_ROOT
441
+ ) and not _is_installed_package_origin(origin_abs)
442
+ if is_repo_shadow:
443
+ logger.error(
444
+ "Detected local module 'supabase' at %s "
445
+ "which shadows the installed package.",
446
+ origin_abs,
447
+ )
448
+ logger.error(
449
+ "Remove or rename this file/folder and re-run tests / watcher."
450
+ )
451
+ sys.exit(1)
452
+ except Exception:
453
+ pass
454
+
455
+ _supabase_create_client = create_fn
456
+ return _supabase_create_client
457
+
458
+
459
+ @contextmanager
460
+ def _quiet_console_loggers(names: Optional[List[str]] = None):
461
+ """Context manager to temporarily silence noisy console loggers while preserving
462
+ `collab` file-based logging. Useful for clean CLI output.
463
+
464
+ - Sets specified logger names to WARNING level.
465
+ - Temporarily disables propagation from the `collab` logger to the root
466
+ console handler so `collab.*` records are still written to `logs/`.
467
+ """
468
+ if names is None:
469
+ names = ["httpx", "httpcore", "urllib3", "postgrest", "supabase"]
470
+ old_levels: Dict[str, int] = {}
471
+ for n in names:
472
+ lg = logging.getLogger(n)
473
+ old_levels[n] = lg.level
474
+ try:
475
+ lg.setLevel(logging.WARNING)
476
+ except Exception:
477
+ pass
478
+
479
+ collab_logger = logging.getLogger("collab")
480
+ old_propagate = getattr(collab_logger, "propagate", True)
481
+ try:
482
+ # Prevent collab.* logs from propagating to the root console handler
483
+ # while still allowing file handlers attached to the collab logger to
484
+ # record messages.
485
+ collab_logger.propagate = False
486
+ yield
487
+ finally:
488
+ for n, lvl in old_levels.items():
489
+ try:
490
+ logging.getLogger(n).setLevel(lvl)
491
+ except Exception:
492
+ pass
493
+ try:
494
+ collab_logger.propagate = old_propagate
495
+ except Exception:
496
+ pass
497
+
498
+
499
+ def _validate_credentials() -> None:
500
+ """Validate that Supabase credentials are present, exit with clear error if not."""
501
+ if not SUPABASE_URL or not SUPABASE_ANON_KEY:
502
+ logger.error(
503
+ "Missing Supabase credentials.\n"
504
+ " SUPABASE_URL=%s\n"
505
+ " SUPABASE_ANON_KEY=%s\n\n"
506
+ "Please copy .env.example to .env at the project root\n"
507
+ "and fill in your Supabase project credentials.\n"
508
+ "See README.md for setup instructions.",
509
+ SUPABASE_URL or "(not set)",
510
+ "(set)" if SUPABASE_ANON_KEY else "(not set)",
511
+ )
512
+ sys.exit(1)
513
+
514
+
515
+ def _retry_on_network_error(func, *args, **kwargs) -> Any:
516
+ """Execute func with exponential backoff retry on network errors."""
517
+ last_error = None
518
+ for attempt in range(MAX_RETRIES):
519
+ try:
520
+ return func(*args, **kwargs)
521
+ except Exception as e:
522
+ last_error = e
523
+ err_str = str(e).lower()
524
+ # Only retry on network-related errors
525
+ if any(
526
+ kw in err_str
527
+ for kw in ("timeout", "connection", "network", "unreachable")
528
+ ):
529
+ wait = 2**attempt
530
+ logger.debug(
531
+ "Network error (attempt %d/%d), retrying in %ds: %s",
532
+ attempt + 1,
533
+ MAX_RETRIES,
534
+ wait,
535
+ e,
536
+ )
537
+ time.sleep(wait)
538
+ else:
539
+ raise
540
+ # Log the permanent failure with full traceback so operators can diagnose
541
+ # why retries exhausted (e.g. DNS resolution errors).
542
+ logger.exception("Permanent network failure after %d attempts", MAX_RETRIES)
543
+ raise last_error # type: ignore[misc]
544
+
545
+
546
+ # ---------------------------------------------------------------------------
547
+ # Supabase Lock Client
548
+ # ---------------------------------------------------------------------------
549
+ class LockClient:
550
+ """Supabase-backed file lock client.
551
+
552
+ All lock operations use the Supabase REST API with the official Python client. Lock
553
+ acquisition uses the atomic ``acquire_lock`` RPC function defined in ``schema.sql``
554
+ to prevent race conditions.
555
+ """
556
+
557
+ def __init__(
558
+ self, developer_id: Optional[str] = None, local_only: bool = False
559
+ ) -> None:
560
+ from typing import cast
561
+
562
+ self.local_only = local_only
563
+ self.developer_id = (
564
+ developer_id or os.getenv("COLLAB_DEVELOPER_ID") or self._get_git_username()
565
+ )
566
+ self._client: Optional[Any] = None
567
+ self._branch_name: Optional[str] = None
568
+ self._session_token: Optional[str] = None
569
+ self._parent_pid: Optional[int] = None
570
+ self._heartbeat_file: Optional[str] = None
571
+ self._heartbeat_grace_seconds: int = 10
572
+ # One-time soft-skip flag to tolerate a short heartbeat hiccup
573
+ self._heartbeat_soft_skipped: bool = False
574
+ # OS-level parent monitor status (Windows)
575
+ self._parent_monitor_started: bool = False
576
+ self._parent_monitor_handle: Optional[int] = None
577
+ self._parent_monitor_thread: Optional[threading.Thread] = None
578
+ self._is_admin: bool = bool(SUPABASE_SERVICE_ROLE_KEY)
579
+ # Treat certain developer ids as ephemeral (e.g. CI/test accounts) so
580
+ # they do not persist locks to the DB. This list is enforced in-code to
581
+ # avoid relying on environment configuration being correct.
582
+ self._ephemeral_developer_ids: set[str] = set(
583
+ # ephemeral (CI/test prefixes)
584
+ )
585
+ self._is_ephemeral: bool = False
586
+ if self.developer_id:
587
+ try:
588
+ for p in EPHEMERAL_PREFIXES:
589
+ if self.developer_id.startswith(p):
590
+ self._is_ephemeral = True
591
+ break
592
+ except Exception:
593
+ # Defensive: if developer_id is not a str for any reason
594
+ self._is_ephemeral = False
595
+
596
+ if not self.local_only and not getattr(self, "_is_ephemeral", False):
597
+ _validate_credentials()
598
+ key = SUPABASE_SERVICE_ROLE_KEY or SUPABASE_ANON_KEY
599
+ create_client = cast(Any, _get_create_client())
600
+ self._client = cast(Any, create_client(SUPABASE_URL, key))
601
+
602
+ def _normalize_file_path(self, file_path: str) -> str:
603
+ """Normalize a file path to a project-root relative Unix-style path.
604
+
605
+ This ensures that paths stored in Supabase match the paths produced by "git
606
+ status --porcelain" (which are relative paths with forward slashes).
607
+ """
608
+ try:
609
+ # If an absolute path was provided, make it relative to project root
610
+ if os.path.isabs(file_path):
611
+ rel = os.path.relpath(file_path, _PROJECT_ROOT)
612
+ else:
613
+ rel = file_path
614
+ # Normalise separators to forward-slash for consistency in the DB
615
+ rel = rel.replace("\\", "/")
616
+ if rel.startswith("./"):
617
+ rel = rel[2:]
618
+
619
+ return rel
620
+ except Exception:
621
+ return file_path.replace("\\", "/")
622
+
623
+ @property
624
+ def is_admin(self) -> bool:
625
+ """Return True if this client has admin privileges (service role key)."""
626
+ return self._is_admin
627
+
628
+ def _get_session_token(self) -> str:
629
+ """Return a stable session token for this machine, project and user.
630
+
631
+ Must NEVER fall back to a random value — a random token breaks cross-IDE re-
632
+ adoption because it cannot be reconstructed. If derivation fails for any
633
+ component, use a safe fallback value for that component rather than giving up
634
+ entirely.
635
+ """
636
+ try:
637
+ dev_id = (
638
+ str(self.developer_id).strip().lower()
639
+ if self.developer_id
640
+ else "unknown"
641
+ )
642
+ except Exception:
643
+ dev_id = "unknown"
644
+ try:
645
+ hostname = socket.gethostname().lower()
646
+ except Exception:
647
+ hostname = "localhost"
648
+ try:
649
+ p_root = os.path.abspath(_PROJECT_ROOT).lower().rstrip("\\/")
650
+ except Exception:
651
+ p_root = _PROJECT_ROOT.lower().rstrip("\\/") if _PROJECT_ROOT else "project"
652
+
653
+ seed = f"{dev_id}:{hostname}:{p_root}"
654
+ return hashlib.sha256(seed.encode()).hexdigest()[:16]
655
+
656
+ def _is_same_machine_token(self, stored_token: str) -> bool:
657
+ """Return True if stored_token looks like it was generated on this machine.
658
+
659
+ Tries multiple plausible developer ID and path variants to account for
660
+ environment differences between IDEs (e.g. VSCode vs PyCharm terminals may yield
661
+ slightly different git config outputs or working directories).
662
+ """
663
+ hostname = socket.gethostname().lower()
664
+ p_root = os.path.abspath(_PROJECT_ROOT).lower().rstrip("\\/")
665
+
666
+ # Gather candidate developer IDs to try
667
+ candidates: list[str] = []
668
+ if self.developer_id:
669
+ candidates.append(str(self.developer_id).lower())
670
+ # Also try stripped variants in case of whitespace differences
671
+ candidates.append(str(self.developer_id).strip().lower())
672
+
673
+ # Also try git config user.name directly from the current environment
674
+ try:
675
+ git_name = (
676
+ subprocess.check_output(
677
+ ["git", "config", "user.name"],
678
+ stderr=subprocess.DEVNULL,
679
+ )
680
+ .decode()
681
+ .strip()
682
+ .lower()
683
+ )
684
+ if git_name:
685
+ candidates.append(git_name)
686
+ except Exception:
687
+ pass
688
+
689
+ # Also try the system username as fallback
690
+ for env_var in ("USERNAME", "USER", "LOGNAME"):
691
+ val = os.getenv(env_var)
692
+ if val:
693
+ candidates.append(val.lower())
694
+
695
+ # Also try path variants (with/without trailing slash)
696
+ path_variants = [p_root, p_root.rstrip("/\\"), p_root + "/", p_root + "\\"]
697
+
698
+ seen_seeds: set[str] = set()
699
+ for dev_id in set(candidates):
700
+ for p in path_variants:
701
+ seed = f"{dev_id}:{hostname}:{p}"
702
+ if seed in seen_seeds:
703
+ continue
704
+ seen_seeds.add(seed)
705
+ token = hashlib.sha256(seed.encode()).hexdigest()[:16]
706
+ if token == stored_token:
707
+ logger.debug(
708
+ "Token matched same-machine variant: dev_id=%r path=%r",
709
+ dev_id,
710
+ p,
711
+ )
712
+ return True
713
+ return False
714
+
715
+ # ------------------------------------------------------------------
716
+ # Git helpers
717
+ # ------------------------------------------------------------------
718
+ @staticmethod
719
+ def _get_git_username() -> str:
720
+ """Derive developer identity from git config or environment."""
721
+ try:
722
+ name = (
723
+ subprocess.check_output(
724
+ ["git", "config", "user.name"], stderr=subprocess.DEVNULL
725
+ )
726
+ .decode()
727
+ .strip()
728
+ )
729
+ if name:
730
+ return name
731
+ except Exception:
732
+ pass
733
+ return os.getenv("USERNAME") or os.getenv("USER") or "unknown_user"
734
+
735
+ @staticmethod
736
+ def _get_current_branch() -> Optional[str]:
737
+ """Return the current git branch name, or None."""
738
+ try:
739
+ if sys.platform == "win32":
740
+ return (
741
+ subprocess.check_output(
742
+ ["git", "branch", "--show-current"],
743
+ stderr=subprocess.DEVNULL,
744
+ cwd=_PROJECT_ROOT,
745
+ creationflags=0x08000000,
746
+ )
747
+ .decode()
748
+ .strip()
749
+ )
750
+ else:
751
+ return (
752
+ subprocess.check_output(
753
+ ["git", "branch", "--show-current"],
754
+ stderr=subprocess.DEVNULL,
755
+ cwd=_PROJECT_ROOT,
756
+ )
757
+ .decode()
758
+ .strip()
759
+ )
760
+ except Exception:
761
+ return None
762
+
763
+ # ------------------------------------------------------------------
764
+ # Response parsing (handles varying supabase-py response shapes)
765
+ # ------------------------------------------------------------------
766
+ @staticmethod
767
+ def _parse_response(res) -> Tuple[Optional[int], Any, Any]:
768
+ """Normalize supabase-py response into (status, data, error)."""
769
+ status = getattr(res, "status_code", None) or getattr(res, "status", None)
770
+ data = getattr(res, "data", None)
771
+ error = getattr(res, "error", None)
772
+ if isinstance(res, dict):
773
+ status = status or res.get("status") or res.get("status_code")
774
+ data = data if data is not None else res.get("data")
775
+ error = error or res.get("error")
776
+ return (status, data, error)
777
+
778
+ # ------------------------------------------------------------------
779
+ # Remote lock scanning (like pycharm_watcher)
780
+ # ------------------------------------------------------------------
781
+ def _scan_remote_locks(self) -> None:
782
+ """Fetch all active locks and log those held by this developer.
783
+
784
+ This runs before reconciliation so the user sees [LOCKED] messages for existing
785
+ locks, matching pycharm_watcher behavior.
786
+ """
787
+ try:
788
+ client = self._client
789
+ assert client is not None
790
+ res = _retry_on_network_error(
791
+ lambda: client.table("file_locks").select("*").execute()
792
+ )
793
+ _, data, _ = self._parse_response(res)
794
+ if not data:
795
+ return
796
+
797
+ for lock in data:
798
+ owner = lock.get("developer_id", "")
799
+ fp = lock.get("file_path", "")
800
+ if not fp:
801
+ continue
802
+
803
+ # Only log locks owned by this developer
804
+ if owner == self.developer_id:
805
+ br = lock.get("branch_name") or "main"
806
+ reason = lock.get("reason") or "Auto-Watch Sync"
807
+ logger.debug(
808
+ "🔒 [LOCKED] %s — @%s (branch: %s, reason: %s)",
809
+ fp,
810
+ owner,
811
+ br,
812
+ reason,
813
+ )
814
+ except Exception as exc:
815
+ logger.debug("Remote lock scan failed: %s", exc)
816
+
817
+ # ------------------------------------------------------------------
818
+ # Public API
819
+ # ------------------------------------------------------------------
820
+ def acquire(
821
+ self,
822
+ file_path: str,
823
+ reason: Optional[str] = None,
824
+ branch_name: Optional[str] = None,
825
+ expires_minutes: Optional[int] = None,
826
+ ) -> Tuple[bool, str]:
827
+ """Acquire a lock on file_path using the atomic RPC function.
828
+
829
+ Returns (success: bool, message: str).
830
+ """
831
+ # Local validation — accept either project-relative or absolute paths.
832
+ full_path = (
833
+ file_path
834
+ if os.path.isabs(file_path)
835
+ else os.path.join(_PROJECT_ROOT, file_path)
836
+ )
837
+ if not os.path.exists(full_path):
838
+ # Deleted files can still be "in progress" (staged/unstaged delete
839
+ # or committed-but-unpushed delete). Keep them lockable so the
840
+ # dashboard still shows ownership until the lock is explicitly
841
+ # released (for example on push).
842
+ norm = self._normalize_file_path(file_path)
843
+ try:
844
+ in_progress = norm in set(self._get_modified_and_unpushed_files())
845
+ except Exception:
846
+ in_progress = False
847
+
848
+ if not in_progress:
849
+ return False, f"File or directory does not exist locally: {file_path}"
850
+
851
+ logger.info(
852
+ (
853
+ "🔒 [DELETED-PATH] %s — path missing locally but "
854
+ "tracked as in-progress"
855
+ ),
856
+ norm,
857
+ )
858
+
859
+ # Locking directories creates noisy, transient dashboard rows
860
+ # (for example runtime instance/ folders). Locks are file-oriented.
861
+ if os.path.isdir(full_path):
862
+ return False, f"Path is a directory and cannot be locked: {file_path}"
863
+
864
+ # Ephemeral developer IDs do not persist locks to the backend
865
+ # (useful for CI/test users). Short-circuit and return a local token.
866
+ if getattr(self, "_is_ephemeral", False):
867
+ token = f"ephemeral-{uuid.uuid4()}"
868
+ logger.info(
869
+ "🔒 [EPHEMERAL] %s (not persisted) — owner=%s",
870
+ file_path,
871
+ self.developer_id,
872
+ )
873
+ return True, token
874
+
875
+ branch = branch_name or self._get_current_branch()
876
+ token = self._get_session_token()
877
+
878
+ # Do not send expires_at: the RPC and DB intentionally ignore
879
+ # time-based expiry. This keeps acquisition atomic while ensuring
880
+ # locks persist until explicitly released.
881
+ # Normalize the stored file_path so the watcher and dashboard see the
882
+ # same canonical (project-relative, forward-slash) path.
883
+ rpc_params = {
884
+ "p_file_path": self._normalize_file_path(file_path),
885
+ "p_developer_id": self.developer_id,
886
+ "p_branch_name": branch,
887
+ "p_reason": reason,
888
+ "p_lock_token": token,
889
+ "p_is_ephemeral": bool(getattr(self, "_is_ephemeral", False)),
890
+ }
891
+
892
+ client = self._client
893
+ assert client is not None, "Supabase client not initialized"
894
+ try:
895
+ res = _retry_on_network_error(
896
+ lambda: client.rpc("acquire_lock", rpc_params).execute()
897
+ )
898
+ except Exception as e:
899
+ return False, f"API Error: {e}"
900
+
901
+ status, data, error = self._parse_response(res)
902
+
903
+ if error:
904
+ msg = (
905
+ error.get("message", str(error))
906
+ if isinstance(error, dict)
907
+ else str(error)
908
+ )
909
+ return False, f"API Error: {msg}"
910
+
911
+ # Parse RPC result
912
+ if isinstance(data, list) and len(data) > 0:
913
+ row = data[0]
914
+ if row.get("status") == "ok":
915
+ logger.info(
916
+ "🔒 [LOCKED] %s — @%s (branch: %s, reason: %s)",
917
+ self._normalize_file_path(file_path),
918
+ self.developer_id,
919
+ branch or "main",
920
+ reason or "No reason",
921
+ )
922
+ return True, token
923
+ if row.get("status") == "conflict":
924
+ owner = row.get("owner", "another developer")
925
+ logger.warning(
926
+ (
927
+ "⚠️ CONFLICT: %s is locked by @%s — your changes may "
928
+ "cause a merge conflict."
929
+ ),
930
+ self._normalize_file_path(file_path),
931
+ owner,
932
+ )
933
+ return False, (
934
+ f"⚠ {file_path} is locked by @{owner}. Editing is not recommended."
935
+ )
936
+
937
+ if status in (200, 201):
938
+ logger.info(
939
+ "🔒 [LOCKED] %s — @%s (branch: %s, reason: %s)",
940
+ self._normalize_file_path(file_path),
941
+ self.developer_id,
942
+ branch or "main",
943
+ reason or "No reason",
944
+ )
945
+ return True, token
946
+
947
+ return False, f"Unexpected response: status={status}, data={data}"
948
+
949
+ def release(self, file_path: str) -> Tuple[bool, str]:
950
+ """Release a lock on file_path owned by this developer.
951
+
952
+ Returns (success: bool, message: str).
953
+ """
954
+ # If ephemeral, nothing was persisted so there's nothing to delete.
955
+ if getattr(self, "_is_ephemeral", False):
956
+ logger.info(
957
+ "🔓 [EPHEMERAL-RELEASE] %s (no-op for %s)", file_path, self.developer_id
958
+ )
959
+ return True, "ephemeral-released"
960
+
961
+ client = self._client
962
+ assert client is not None, "Supabase client not initialized"
963
+ try:
964
+ norm = self._normalize_file_path(file_path)
965
+ res = _retry_on_network_error(
966
+ lambda: (
967
+ client.table("file_locks")
968
+ .delete()
969
+ .eq("file_path", norm)
970
+ .eq("developer_id", self.developer_id)
971
+ .execute()
972
+ )
973
+ )
974
+ except Exception as e:
975
+ return False, f"API Error: {e}"
976
+
977
+ status, data, error = self._parse_response(res)
978
+ if error:
979
+ return False, f"API Error: {error}"
980
+ if status in (200, 204) or data is not None:
981
+ logger.info(
982
+ "🔓 [RELEASED] %s — lock released", self._normalize_file_path(file_path)
983
+ )
984
+ return True, "released"
985
+ return False, "No lock released (not owner or lock does not exist)"
986
+
987
+ def active(self) -> List[Dict]:
988
+ """Return all currently active locks."""
989
+ client = self._client
990
+ assert client is not None, "Supabase client not initialized"
991
+ try:
992
+ res = _retry_on_network_error(
993
+ lambda: client.table("file_locks").select("*").execute()
994
+ )
995
+ except Exception as e:
996
+ logger.error("Exception in active() Supabase query: %s", e)
997
+ return []
998
+ _, data, error = self._parse_response(res)
999
+ if error:
1000
+ logger.error("Supabase error in active(): %s", error)
1001
+ return []
1002
+ return data or []
1003
+
1004
+ def get_lock_status(self, file_path: str) -> Dict:
1005
+ """Return the lock status for a specific file."""
1006
+ client = self._client
1007
+ assert client is not None, "Supabase client not initialized"
1008
+ try:
1009
+ norm = self._normalize_file_path(file_path)
1010
+ res = _retry_on_network_error(
1011
+ lambda: (
1012
+ client.table("file_locks")
1013
+ .select("*")
1014
+ .eq("file_path", norm)
1015
+ .execute()
1016
+ )
1017
+ )
1018
+ except Exception as e:
1019
+ return {"is_locked": False, "error": str(e)}
1020
+
1021
+ _, data, error = self._parse_response(res)
1022
+ if error:
1023
+ return {"is_locked": False, "error": str(error)}
1024
+
1025
+ rows = data or []
1026
+ if not rows:
1027
+ return {"is_locked": False, "can_edit": True}
1028
+
1029
+ lock = rows[0]
1030
+
1031
+ # With server-side expiry disabled, a present row implies an active
1032
+ # lock until it is explicitly released. Do not expose expires_at — it
1033
+ # was removed from the schema and is treated as audit-only historically.
1034
+ return {
1035
+ "is_locked": True,
1036
+ "locked_by": lock.get("developer_id"),
1037
+ "acquired_at": lock.get("acquired_at"),
1038
+ "reason": lock.get("reason"),
1039
+ "can_edit": lock.get("developer_id") == self.developer_id,
1040
+ }
1041
+
1042
+ def release_all(self) -> int:
1043
+ """Release all locks held by this developer.
1044
+
1045
+ Returns count released.
1046
+ """
1047
+ locks = self.active()
1048
+ my_locks = [lk for lk in locks if lk.get("developer_id") == self.developer_id]
1049
+ count = 0
1050
+ for lk in my_locks:
1051
+ ok, _ = self.release(lk.get("file_path", ""))
1052
+ if ok:
1053
+ count += 1
1054
+ return count
1055
+
1056
+ def force_release(self, file_path: str) -> Tuple[bool, str]:
1057
+ """Force-release a lock on file_path.
1058
+
1059
+ Non-admin users can only force-release their own locks. Admin users (with
1060
+ SUPABASE_SERVICE_ROLE_KEY) can force-release any lock.
1061
+
1062
+ Returns (success: bool, message: str).
1063
+ """
1064
+ if not self._is_admin:
1065
+ # Non-admin: verify the lock belongs to this developer
1066
+ status_info = self.get_lock_status(file_path)
1067
+ if (
1068
+ status_info.get("is_locked")
1069
+ and status_info.get("locked_by") != self.developer_id
1070
+ ):
1071
+ owner = status_info.get("locked_by", "another developer")
1072
+ return False, (
1073
+ f"Permission denied: {file_path} is locked by @{owner}. "
1074
+ "Only admins can force-release other developers' locks."
1075
+ )
1076
+
1077
+ client = self._client
1078
+ assert client is not None, "Supabase client not initialized"
1079
+ try:
1080
+ query = client.table("file_locks").delete().eq("file_path", file_path)
1081
+ if not self._is_admin:
1082
+ query = query.eq("developer_id", self.developer_id)
1083
+ res = _retry_on_network_error(lambda: query.execute())
1084
+ except Exception as e:
1085
+ return False, f"API Error: {e}"
1086
+ _, data, error = self._parse_response(res)
1087
+ if error:
1088
+ return False, f"API Error: {error}"
1089
+ if data is not None:
1090
+ return True, "force-released"
1091
+ return False, "No lock removed"
1092
+
1093
+ def force_release_all(self) -> int:
1094
+ """Force-release all locks (admin only).
1095
+
1096
+ Returns the number of locks released.
1097
+ """
1098
+ if not self._is_admin:
1099
+ logger.warning(
1100
+ "Attempted force_release_all without admin privileges (dev=%s)",
1101
+ self.developer_id,
1102
+ )
1103
+ return 0
1104
+
1105
+ try:
1106
+
1107
+ # Count existing locks and collect file paths
1108
+ locks = self.active()
1109
+ paths: List[str] = []
1110
+ for lk in locks or []:
1111
+ p = lk.get("file_path")
1112
+ if isinstance(p, str) and p:
1113
+ paths.append(p)
1114
+ count = len(paths)
1115
+ if count == 0:
1116
+ return 0
1117
+
1118
+ client = self._client
1119
+ assert client is not None, "Supabase client not initialized"
1120
+
1121
+ # PostgREST forbids DELETE without a WHERE clause. Delete by
1122
+ # file_path IN (<paths>) in reasonably-sized chunks to avoid URL
1123
+ # length limits for very large sets.
1124
+ def chunks(lst: List[str], n: int):
1125
+ for i in range(0, len(lst), n):
1126
+ yield lst[i : i + n]
1127
+
1128
+ deleted_total = 0
1129
+ for ch in chunks(paths, 200):
1130
+ try:
1131
+ res = _retry_on_network_error(
1132
+ lambda: client.table("file_locks")
1133
+ .delete()
1134
+ .in_("file_path", ch)
1135
+ .execute()
1136
+ )
1137
+ except Exception as e:
1138
+ logger.error("force_release_all chunk delete failed: %s", e)
1139
+ return deleted_total
1140
+ status, data, error = self._parse_response(res)
1141
+ if error:
1142
+ logger.error("force_release_all API error: %s", error)
1143
+ return deleted_total
1144
+ # If PostgREST returns the deleted rows, prefer that; otherwise
1145
+ # conservatively count the attempted paths in the chunk.
1146
+ if data is not None and isinstance(data, list):
1147
+ deleted_total += len(data)
1148
+ else:
1149
+ deleted_total += len(ch)
1150
+
1151
+ logger.info(
1152
+ "🔓 [FORCE-RELEASE-ALL] %d lock(s) released by admin", deleted_total
1153
+ )
1154
+ return deleted_total
1155
+ except Exception as e:
1156
+ logger.error("Failed to force_release_all: %s", e)
1157
+ return 0
1158
+
1159
+ def acquire_multiple(
1160
+ self,
1161
+ file_paths: List[str],
1162
+ reason: Optional[str] = None,
1163
+ branch_name: Optional[str] = None,
1164
+ ) -> Tuple[bool, List[str], str]:
1165
+ """Acquire locks for multiple files.
1166
+
1167
+ Returns (all_ok, failed_paths, message).
1168
+ """
1169
+ failed = []
1170
+ for fp in file_paths:
1171
+ ok, msg = self.acquire(fp, reason=reason, branch_name=branch_name)
1172
+ if not ok:
1173
+ failed.append(fp)
1174
+ logger.warning("Lock conflict: %s — %s", fp, msg)
1175
+ if failed:
1176
+ return False, failed, "Conflicts or errors"
1177
+ return True, [], "Success"
1178
+
1179
+ def release_multiple(self, file_paths: List[str]) -> Tuple[bool, int, str]:
1180
+ """Release locks for multiple files.
1181
+
1182
+ Returns (ok, count, message).
1183
+ """
1184
+ count = 0
1185
+ for fp in file_paths:
1186
+ ok, _ = self.release(fp)
1187
+ if ok:
1188
+ count += 1
1189
+ return True, count, "Success"
1190
+
1191
+ def history(self, file_path: Optional[str] = None, limit: int = 20) -> List[Dict]:
1192
+ """Fetch lock history records.
1193
+
1194
+ When *file_path* is provided, an exact match is tried first. If that returns
1195
+ nothing, a ``LIKE %<basename>%`` fallback query runs so the user does not have
1196
+ to remember the full stored path.
1197
+ """
1198
+ client = self._client
1199
+ assert client is not None, "Supabase client not initialized"
1200
+ try:
1201
+ q = client.table("file_locks_history").select("*")
1202
+ if file_path:
1203
+ q = q.eq("file_path", file_path)
1204
+ q = q.order("id", desc=True).limit(limit)
1205
+ res = q.execute()
1206
+ except Exception as exc:
1207
+ logger.error("Failed to fetch lock history: %s", exc)
1208
+ return []
1209
+
1210
+ _, data, error = self._parse_response(res)
1211
+ if error:
1212
+ logger.error("History query error: %s", error)
1213
+ return []
1214
+ rows = data or []
1215
+
1216
+ # Fallback: if exact match returned nothing, try a partial match
1217
+ if not rows and file_path:
1218
+ try:
1219
+ basename = file_path.replace("\\", "/").rsplit("/", 1)[-1]
1220
+ q2 = (
1221
+ client.table("file_locks_history")
1222
+ .select("*")
1223
+ .ilike("file_path", f"%{basename}%")
1224
+ .order("id", desc=True)
1225
+ .limit(limit)
1226
+ )
1227
+ res2 = q2.execute()
1228
+ _, data2, error2 = self._parse_response(res2)
1229
+ if not error2 and data2:
1230
+ rows = data2
1231
+ except Exception:
1232
+ pass # Fallback is best-effort
1233
+
1234
+ return rows
1235
+
1236
+ def prune_history(self, retention_days: int = 30) -> Tuple[bool, int, str]:
1237
+ """Delete lock history rows older than *retention_days* days.
1238
+
1239
+ Returns (ok, deleted_count, message).
1240
+ """
1241
+ if retention_days < 1:
1242
+ return False, 0, "retention_days must be >= 1"
1243
+
1244
+ client = self._client
1245
+ assert client is not None, "Supabase client not initialized"
1246
+
1247
+ # Preferred path: RPC in schema.sql (stable, server-side retention logic).
1248
+ try:
1249
+ res = _retry_on_network_error(
1250
+ lambda: client.rpc(
1251
+ "prune_lock_history", {"p_retention_days": retention_days}
1252
+ ).execute()
1253
+ )
1254
+ _, data, error = self._parse_response(res)
1255
+ if error:
1256
+ raise RuntimeError(str(error))
1257
+
1258
+ deleted = 0
1259
+ if isinstance(data, list) and data:
1260
+ row = data[0]
1261
+ if isinstance(row, dict):
1262
+ for k in ("prune_lock_history", "deleted_count", "count"):
1263
+ if k in row:
1264
+ try:
1265
+ deleted = int(row[k])
1266
+ break
1267
+ except Exception:
1268
+ pass
1269
+ elif isinstance(row, (int, float)):
1270
+ deleted = int(row)
1271
+ elif isinstance(data, (int, float)):
1272
+ deleted = int(data)
1273
+
1274
+ return True, deleted, "history-pruned"
1275
+ except Exception as exc:
1276
+ # Backward-compatible fallback when RPC isn't deployed yet.
1277
+ logger.warning(
1278
+ "History prune RPC unavailable, falling back to REST delete: %s", exc
1279
+ )
1280
+
1281
+ cutoff_iso = (
1282
+ _safe_now().astimezone(timezone.utc) - timedelta(days=retention_days)
1283
+ ).isoformat()
1284
+ try:
1285
+ res = _retry_on_network_error(
1286
+ lambda: (
1287
+ client.table("file_locks_history")
1288
+ .delete()
1289
+ .lt("released_at", cutoff_iso)
1290
+ .execute()
1291
+ )
1292
+ )
1293
+ _, data, error = self._parse_response(res)
1294
+ if error:
1295
+ return False, 0, f"API Error: {error}"
1296
+ deleted = len(data) if isinstance(data, list) else 0
1297
+ return True, deleted, "history-pruned-fallback"
1298
+ except Exception as exc:
1299
+ return False, 0, f"API Error: {exc}"
1300
+
1301
+ # ------------------------------------------------------------------
1302
+ # Daemon management
1303
+ # ------------------------------------------------------------------
1304
+ def daemon_start(
1305
+ self, interval: int = 5, timeout_mins: int = 0, open_dashboard: bool = False
1306
+ ) -> None:
1307
+ """Start the watcher as a background daemon process."""
1308
+ pid = self._read_pid()
1309
+ if pid and self._is_process_alive(pid):
1310
+ # Check if the watcher is orphaned (parent process dead)
1311
+ metadata = self._read_pid_file()
1312
+ if metadata:
1313
+ parent_pid = metadata.get("parent_pid")
1314
+ if parent_pid and not self._is_process_alive(parent_pid):
1315
+ # Orphaned watcher - kill it and start fresh
1316
+ print(
1317
+ f"Detected orphaned watcher (PID: {pid}, parent "
1318
+ f"{parent_pid} dead). Replacing..."
1319
+ )
1320
+ self._terminate_process(pid)
1321
+ time.sleep(0.5) # Give it time to terminate
1322
+ self._remove_pid()
1323
+ # Continue to start a new watcher
1324
+ else:
1325
+ # Parent is alive, watcher is valid
1326
+ entrypoint = metadata.get("entrypoint", "")
1327
+ if entrypoint:
1328
+ print(f"Watcher already running (PID: {pid}) — {entrypoint}")
1329
+ else:
1330
+ print(f"Watcher already running (PID: {pid})")
1331
+ return
1332
+ else:
1333
+ # Legacy PID file without metadata - verify cmdline
1334
+ cmdline = self._get_cmdline_for_pid(pid)
1335
+ if cmdline and self._cmdline_matches_watcher(cmdline):
1336
+ print(f"Watcher already running (PID: {pid})")
1337
+ return
1338
+ if cmdline is None:
1339
+ # If process metadata cannot be read (permission/race),
1340
+ # prefer assuming the watcher is running instead of
1341
+ # spawning a duplicate daemon.
1342
+ print(f"Watcher already running (PID: {pid})")
1343
+ return
1344
+ # cmdline doesn't match or unavailable - consider stale.
1345
+ # Continue to start new
1346
+
1347
+ print("Starting lock watcher in background...")
1348
+
1349
+ # Defensive: remove any stale stop-request file left behind by a previous
1350
+ # `daemon-stop` (otherwise the newly-started watcher will immediately
1351
+ # detect it and perform a graceful shutdown). This can happen if a
1352
+ # stop file was left in the state dir when no watcher was running.
1353
+ try:
1354
+ stop_file = _state_path(".stop_request")
1355
+ if os.path.exists(stop_file):
1356
+ logger.debug(
1357
+ (
1358
+ "Found stale stop request %s — removing before "
1359
+ "starting new watcher"
1360
+ ),
1361
+ stop_file,
1362
+ )
1363
+ try:
1364
+ os.remove(stop_file)
1365
+ except Exception:
1366
+ logger.debug("Failed to remove stale stop request: %s", stop_file)
1367
+ except Exception:
1368
+ # Best-effort — don't fail startup if we can't inspect/remove the file
1369
+ pass
1370
+ cmd = [
1371
+ sys.executable,
1372
+ "-m",
1373
+ "src.lock_client",
1374
+ "watch",
1375
+ "--interval",
1376
+ str(interval),
1377
+ "--timeout",
1378
+ str(timeout_mins),
1379
+ "--daemon",
1380
+ "--pid-file",
1381
+ PID_FILE,
1382
+ ]
1383
+
1384
+ # Tie to parent PID for clean termination
1385
+ parent_pid, parent_method = self._get_parent_ide_pid()
1386
+ if parent_pid:
1387
+ cmd.extend(["--parent-pid", str(parent_pid)])
1388
+ # Get process name for better logging
1389
+ parent_name, _ = self._get_process_info_local(parent_pid)
1390
+ parent_name_str = parent_name or "unknown"
1391
+ # Pass parent name + detection method to child for better logging
1392
+ cmd.extend(["--parent-name", parent_name_str])
1393
+ cmd.extend(["--parent-method", parent_method or "unknown"])
1394
+ # Demote verbose parent-tying messages to DEBUG so they don't
1395
+ # clutter interactive console output when the user runs
1396
+ # `collab daemon-start`.
1397
+ logger.debug(
1398
+ "Tying watcher to parent process: %s (PID: %d) via %s",
1399
+ parent_name_str,
1400
+ parent_pid,
1401
+ parent_method or "unknown",
1402
+ )
1403
+ else:
1404
+ logger.debug("No parent IDE detected - watcher will run independently")
1405
+
1406
+ if open_dashboard:
1407
+ cmd.append("--open-dashboard")
1408
+
1409
+ if sys.platform == "win32":
1410
+ pythonw = os.path.join(os.path.dirname(sys.executable), "pythonw.exe")
1411
+ # CREATE_NO_WINDOW (0x08000000) - hide console window
1412
+ # Only use DETACHED_PROCESS if we DON'T have a parent to track
1413
+ # DETACHED_PROCESS would orphan the process,
1414
+ # preventing IDE shutdown detection
1415
+ if parent_pid:
1416
+ # Tied to parent - use only CREATE_NO_WINDOW, not DETACHED_PROCESS
1417
+ # This ensures the process terminates when the parent IDE closes
1418
+ creation_flags = 0x08000000
1419
+ logger.debug(
1420
+ "Starting watcher tied to parent PID %d (no DETACHED)", parent_pid
1421
+ )
1422
+ else:
1423
+ # No parent to track - can safely detach
1424
+ creation_flags = (
1425
+ 0x00000008 | 0x08000000
1426
+ ) # DETACHED_PROCESS + CREATE_NO_WINDOW
1427
+ logger.debug("Starting detached watcher (no parent to track)")
1428
+
1429
+ # CRITICAL: Don't pass file handles from parent to child!
1430
+ # The child process will open its own log files via logging_config.py.
1431
+ # Passing parent file handles causes NUL corruption and file locking issues.
1432
+ if os.path.exists(pythonw):
1433
+ proc = subprocess.Popen(
1434
+ [pythonw] + cmd[1:],
1435
+ creationflags=creation_flags,
1436
+ stdout=subprocess.DEVNULL,
1437
+ stderr=subprocess.DEVNULL,
1438
+ close_fds=True,
1439
+ cwd=_PROJECT_ROOT,
1440
+ )
1441
+ else:
1442
+ proc = subprocess.Popen(
1443
+ cmd,
1444
+ creationflags=creation_flags,
1445
+ stdout=subprocess.DEVNULL,
1446
+ stderr=subprocess.DEVNULL,
1447
+ close_fds=True,
1448
+ cwd=_PROJECT_ROOT,
1449
+ )
1450
+ else:
1451
+ # Unix/Linux/Mac: only use start_new_session if NOT tracking a parent
1452
+ # start_new_session creates a new process group, detaching from parent
1453
+ if not parent_pid:
1454
+ # No parent to track - can safely create new session
1455
+ logger.debug("Starting detached watcher (new session)")
1456
+ proc = subprocess.Popen(
1457
+ cmd,
1458
+ stdout=subprocess.DEVNULL,
1459
+ stderr=subprocess.DEVNULL,
1460
+ cwd=_PROJECT_ROOT,
1461
+ start_new_session=True,
1462
+ )
1463
+ else:
1464
+ # Tied to parent - stay in same process group
1465
+ logger.debug(
1466
+ "Starting watcher tied to parent %d (same session)", parent_pid
1467
+ )
1468
+ proc = subprocess.Popen(
1469
+ cmd,
1470
+ stdout=subprocess.DEVNULL,
1471
+ stderr=subprocess.DEVNULL,
1472
+ cwd=_PROJECT_ROOT,
1473
+ )
1474
+ if sys.platform != "win32":
1475
+ # On Linux/Mac, the spawned proc.pid is the real child.
1476
+ # We record it immediately for tracking, though the child
1477
+ # will soon overwrite it with its own metadata.
1478
+ self._write_pid(proc.pid)
1479
+
1480
+ # Wait up to 10 seconds for the child loop to start and write its true PID.
1481
+ # On Windows venv, pythonw.exe is a wrapper that exits quickly.
1482
+ # On Linux/Mac or non-venv Windows, it stays identical to proc.pid.
1483
+ actual_pid = None
1484
+ for i in range(100): # 10 seconds max
1485
+ pid = self._read_pid()
1486
+ if pid and self._is_process_alive(pid):
1487
+ if sys.platform != "win32" or pid != proc.pid:
1488
+ # Successfully found the real child (different PID from launcher)
1489
+ actual_pid = pid
1490
+ break
1491
+ # On Windows, if pid == proc.pid, it might be the launcher or a
1492
+ # non-wrapped pythonw.exe process.
1493
+ # If it stays stable for 1.5s, assume it's the real process.
1494
+ if i > 15:
1495
+ actual_pid = pid
1496
+ break
1497
+ time.sleep(0.1)
1498
+
1499
+ if actual_pid:
1500
+ print(f"✅ Started (PID: {actual_pid})")
1501
+ else:
1502
+ print(
1503
+ "❌ Watcher process exited or failed to record PID. "
1504
+ f"(Launcher PID: {proc.pid})"
1505
+ )
1506
+ print(" Check logs/collab.log for details.")
1507
+ pid = self._read_pid()
1508
+ if pid == proc.pid:
1509
+ self._remove_pid()
1510
+
1511
+ def daemon_stop(self) -> None:
1512
+ """Stop the running watcher daemon."""
1513
+ # Ensure file-based collab logging is configured for CLI actions,
1514
+ # then temporarily prevent collab.* logs from propagating to the root
1515
+ # console handler so INFO-level records produced by this command are
1516
+ # still written to the collab log file but do not echo to the
1517
+ # user's terminal. Restore the original propagation setting at the end.
1518
+ try:
1519
+ from .logging_config import setup_collab_logging
1520
+
1521
+ setup_collab_logging(collab_dir=_COLLAB_ROOT)
1522
+ except Exception:
1523
+ # Best-effort: continue even if logging setup fails
1524
+ pass
1525
+
1526
+ collab_logger = logging.getLogger("collab")
1527
+ _old_prop = getattr(collab_logger, "propagate", True)
1528
+ collab_logger.propagate = False
1529
+ try:
1530
+
1531
+ # Try PID file first, but fall back to discovering running watcher
1532
+ # processes for this workspace if the PID file is missing or stale.
1533
+ pid = self._read_pid()
1534
+ pids_to_stop: List[int] = []
1535
+
1536
+ if pid and self._is_process_alive(pid):
1537
+ pids_to_stop = [pid]
1538
+ else:
1539
+ # Safety rail: during tests, never discover/stop external watcher
1540
+ # processes when the module is still using the production PID file.
1541
+ default_pid = os.path.join(_COLLAB_ROOT, ".daemon.pid")
1542
+ if _is_test_mode() and os.path.abspath(PID_FILE) == os.path.abspath(
1543
+ default_pid
1544
+ ):
1545
+ print("No running watcher found.")
1546
+ logger.info(
1547
+ (
1548
+ "Test mode with default PID file detected; "
1549
+ "skipping watcher discovery fallback"
1550
+ )
1551
+ )
1552
+ self._remove_pid()
1553
+ return
1554
+
1555
+ # Attempt to discover live watcher processes related to this repo
1556
+ try:
1557
+ found = self._discover_running_watchers()
1558
+ if found:
1559
+ pids_to_stop = found
1560
+ else:
1561
+ print("No running watcher found.")
1562
+ logger.info("No running watcher found for this workspace")
1563
+ self._remove_pid()
1564
+ return
1565
+ except Exception as e:
1566
+ logger.debug("Watcher discovery failed: %s", e)
1567
+ print("No running watcher found.")
1568
+ self._remove_pid()
1569
+ return
1570
+
1571
+ # Stop each discovered watcher PID (soft stop first, then force)
1572
+ for target_pid in pids_to_stop:
1573
+ try:
1574
+ print(f"Stopping lock watcher (PID: {target_pid})...")
1575
+ except Exception:
1576
+ pass
1577
+
1578
+ stop_file = _state_path(".stop_request")
1579
+ # Prefer token-based stop requests when available to avoid
1580
+ # accidentally stopping unrelated watcher processes that happen
1581
+ # to share PIDs (PID reuse) or when multiple watchers exist.
1582
+ try:
1583
+ pid_meta = self._read_pid_file()
1584
+ token = None
1585
+ if pid_meta and isinstance(pid_meta, dict):
1586
+ token = pid_meta.get("token")
1587
+ if token:
1588
+ payload = f"TOKEN:{token}"
1589
+ else:
1590
+ payload = f"PID:{target_pid}"
1591
+
1592
+ with open(stop_file, "w", encoding="utf-8") as sf:
1593
+ sf.write(payload)
1594
+ sf.flush()
1595
+ try:
1596
+ os.fsync(sf.fileno())
1597
+ except Exception:
1598
+ pass
1599
+ logger.info(
1600
+ "Wrote stop request file: %s (payload: %s)", stop_file, payload
1601
+ )
1602
+ except Exception as _e:
1603
+ logger.exception("Failed to write stop request file: %s", _e)
1604
+
1605
+ # Wait up to ~8 seconds for watcher to exit gracefully
1606
+ for _ in range(16):
1607
+ if not self._is_process_alive(target_pid):
1608
+ break
1609
+ time.sleep(0.5)
1610
+
1611
+ if not self._is_process_alive(target_pid):
1612
+ # Wait briefly for the shutdown marker
1613
+ shutdown_file = _state_path(".shutdown_complete")
1614
+ for _ in range(20):
1615
+ if os.path.exists(shutdown_file):
1616
+ break
1617
+ time.sleep(0.1)
1618
+
1619
+ # Do NOT remove the stop request here; the IDE extension
1620
+ # needs to see it to avoid triggering an auto-restart.
1621
+ # The next watcher startup will clean it up.
1622
+
1623
+ # If the stopped PID matched the canonical PID file, remove it
1624
+ try:
1625
+ canonical_pid = self._read_pid()
1626
+ if canonical_pid == target_pid:
1627
+ self._remove_pid()
1628
+ except Exception:
1629
+ logger.debug(
1630
+ "Failed to remove canonical PID after stop: %s", target_pid
1631
+ )
1632
+
1633
+ logger.info("Stopped watcher (PID: %d)", target_pid)
1634
+ print("✅ Stopped.")
1635
+ continue
1636
+
1637
+ # Soft stop did not work — fallback to forced termination
1638
+ if sys.platform == "win32":
1639
+ subprocess.run(
1640
+ ["taskkill", "/F", "/T", "/PID", str(target_pid)],
1641
+ capture_output=True,
1642
+ creationflags=0x08000000,
1643
+ )
1644
+ else:
1645
+ try:
1646
+ os.kill(-target_pid, signal.SIGTERM)
1647
+ except (ProcessLookupError, OSError):
1648
+ try:
1649
+ os.kill(target_pid, signal.SIGTERM)
1650
+ except ProcessLookupError:
1651
+ pass
1652
+
1653
+ # Wait up to 5 seconds for clean exit
1654
+ for _ in range(10):
1655
+ if not self._is_process_alive(target_pid):
1656
+ break
1657
+ time.sleep(0.5)
1658
+ else:
1659
+ # Force kill if still running (Unix only)
1660
+ if sys.platform != "win32":
1661
+ try:
1662
+ os.kill(-target_pid, signal.SIGKILL)
1663
+ except (ProcessLookupError, OSError):
1664
+ try:
1665
+ os.kill(target_pid, signal.SIGKILL)
1666
+ except ProcessLookupError:
1667
+ pass
1668
+
1669
+ # Clean up PID file if it referenced the killed process
1670
+ try:
1671
+ canonical_pid = self._read_pid()
1672
+ if canonical_pid == target_pid:
1673
+ self._remove_pid()
1674
+ except Exception:
1675
+ logger.debug(
1676
+ "Failed to remove canonical PID after forced kill: %s",
1677
+ target_pid,
1678
+ )
1679
+
1680
+ logger.info("Stopped watcher (PID: %d) (forced)", target_pid)
1681
+ print("✅ Stopped.")
1682
+
1683
+ # Final cleanup: ensure canonical PID file removed
1684
+ try:
1685
+ self._remove_pid()
1686
+ except Exception:
1687
+ pass
1688
+ finally:
1689
+ try:
1690
+ collab_logger.propagate = _old_prop
1691
+ except Exception:
1692
+ pass
1693
+
1694
+ def daemon_status(self) -> bool:
1695
+ """Check if the watcher daemon is running.
1696
+
1697
+ Checks both the primary PID file and the legacy PyCharm watcher PID file for
1698
+ backward compatibility.
1699
+ """
1700
+ pid = self._read_pid()
1701
+ local_only_mode = bool(getattr(self, "local_only", False))
1702
+ if pid and self._is_process_alive(pid):
1703
+ # Attempt to read PID metadata (entrypoint) and prefer it for
1704
+ # human-facing output when available. When the PID file is the
1705
+ # legacy plain-integer format we avoid strict cmdline verification
1706
+ # to reduce false negatives in environments where reconstructing
1707
+ # a cmdline is unreliable (tests, limited containers, etc.).
1708
+ entrypoint: Optional[str] = None
1709
+ had_metadata = False
1710
+ try:
1711
+ if os.path.exists(PID_FILE):
1712
+ with open(PID_FILE, "r", encoding="utf-8") as fh:
1713
+ raw = fh.read().strip()
1714
+ if raw.startswith("{"):
1715
+ had_metadata = True
1716
+ obj = json.loads(raw)
1717
+ entrypoint = obj.get("entrypoint")
1718
+ except Exception:
1719
+ entrypoint = None
1720
+
1721
+ # If an entrypoint is present in the PID metadata, prefer it.
1722
+ if entrypoint:
1723
+ print(f"✅ Lock watcher is RUNNING (PID: {pid}) — {entrypoint}")
1724
+ return True
1725
+
1726
+ # If we have no richer metadata (legacy plain-PID) preserve the
1727
+ # historical, lenient behaviour: older clients only wrote an integer PID
1728
+ # and callers expect a live PID to indicate the watcher is running.
1729
+ # Do NOT mark such PIDs stale solely because the reconstructed
1730
+ # command-line doesn't match — this avoids false negatives in tests
1731
+ # and constrained environments where cmdline inspection is unreliable.
1732
+ if not had_metadata:
1733
+ # If this is the legacy plain-PID file, preserve the historical
1734
+ # behavior: if the PID matches the current process, confidently
1735
+ # report running. Otherwise fall through and attempt a
1736
+ # best-effort cmdline verification below to avoid treating an
1737
+ # unrelated process as the watcher.
1738
+ if pid == os.getpid():
1739
+ print(f"✅ Lock watcher is RUNNING (PID: {pid}) (cmdline unknown)")
1740
+ return True
1741
+
1742
+ # Fallback: try to verify the process command-line to avoid false positives
1743
+ cmdline = self._get_cmdline_for_pid(pid)
1744
+ if cmdline:
1745
+ if not self._cmdline_matches_watcher(cmdline):
1746
+ logger.debug("PID %d cmdline: %s", pid, cmdline)
1747
+ else:
1748
+ print(f"✅ Lock watcher is RUNNING (PID: {pid}) — {cmdline}")
1749
+ return True
1750
+ else:
1751
+ # Can't verify cmdline — assume running
1752
+ print(f"✅ Lock watcher is RUNNING (PID: {pid}) (cmdline unknown)")
1753
+ return True
1754
+
1755
+ # Stale or repurposed PID in canonical file; in local-only CLI mode,
1756
+ # try process discovery before reporting NOT running.
1757
+ if local_only_mode:
1758
+ try:
1759
+ found = self._discover_running_watchers()
1760
+ for found_pid in found:
1761
+ if self._is_process_alive(found_pid):
1762
+ found_cmd = self._get_cmdline_for_pid(found_pid)
1763
+ if found_cmd and self._cmdline_matches_watcher(found_cmd):
1764
+ print(
1765
+ "✅ Lock watcher is RUNNING "
1766
+ f"(PID: {found_pid}) — {found_cmd}"
1767
+ )
1768
+ else:
1769
+ print(
1770
+ "✅ Lock watcher is RUNNING "
1771
+ f"(PID: {found_pid}) (discovered)"
1772
+ )
1773
+ return True
1774
+ except Exception as e:
1775
+ logger.debug("Watcher discovery fallback failed: %s", e)
1776
+
1777
+ return False
1778
+
1779
+ # In local-only CLI mode, if no canonical PID was available/alive,
1780
+ # fall back to watcher process discovery.
1781
+ if local_only_mode:
1782
+ try:
1783
+ found = self._discover_running_watchers()
1784
+ for found_pid in found:
1785
+ if self._is_process_alive(found_pid):
1786
+ found_cmd = self._get_cmdline_for_pid(found_pid)
1787
+ if found_cmd and self._cmdline_matches_watcher(found_cmd):
1788
+ print(
1789
+ "✅ Lock watcher is RUNNING "
1790
+ f"(PID: {found_pid}) — {found_cmd}"
1791
+ )
1792
+ else:
1793
+ print(
1794
+ "✅ Lock watcher is RUNNING "
1795
+ f"(PID: {found_pid}) (discovered)"
1796
+ )
1797
+ return True
1798
+ except Exception as e:
1799
+ logger.debug("Watcher discovery fallback failed: %s", e)
1800
+
1801
+ # Fallback: check legacy PyCharm watcher PID file
1802
+ _legacy_pid_file = os.path.join(_COLLAB_ROOT, ".pycharm_watcher.pid")
1803
+ if os.path.exists(_legacy_pid_file):
1804
+ try:
1805
+ with open(_legacy_pid_file, "r") as f:
1806
+ legacy_pid = int(f.read().strip())
1807
+ if self._is_process_alive(legacy_pid):
1808
+ print(f"✅ Lock watcher is RUNNING (PID: {legacy_pid})")
1809
+ return True
1810
+ except (ValueError, OSError):
1811
+ pass
1812
+ print("❌ Lock watcher is NOT running.")
1813
+ return False
1814
+
1815
+ def cleanup_orphaned_processes(self) -> None:
1816
+ """Find and kill all orphaned lock_client.py processes.
1817
+
1818
+ This is useful when log files are locked by zombie processes.
1819
+ Locks are PRESERVED - only the watcher processes are terminated.
1820
+ """
1821
+ print("Scanning for orphaned lock_client processes...")
1822
+ killed = 0
1823
+ pids_to_check: set[int] = set()
1824
+
1825
+ is_test = _is_test_mode()
1826
+
1827
+ def _should_kill(cmdline: str) -> bool:
1828
+ cmd = cmdline.lower()
1829
+ if "lock_client" not in cmd:
1830
+ return False
1831
+
1832
+ # Safeguard: prevent test runs from killing production daemons.
1833
+ is_test_watcher = (
1834
+ "pytest-of-" in cmd
1835
+ or "collab_test_" in cmd
1836
+ or "mockcmms_pytest_collab_" in cmd
1837
+ )
1838
+ return is_test_watcher if is_test else not is_test_watcher
1839
+
1840
+ if sys.platform == "win32":
1841
+ # Check multiple Python executable names
1842
+ python_images = ["python.exe", "pythonw.exe", "python3.exe"]
1843
+ for image in python_images:
1844
+ try:
1845
+ result = subprocess.run(
1846
+ [
1847
+ "tasklist",
1848
+ "/FI",
1849
+ f"IMAGENAME eq {image}",
1850
+ "/FO",
1851
+ "CSV",
1852
+ "/NH",
1853
+ ],
1854
+ capture_output=True,
1855
+ text=True,
1856
+ creationflags=0x08000000,
1857
+ )
1858
+ for line in result.stdout.strip().split("\n"):
1859
+ if not line.strip():
1860
+ continue
1861
+ parts = line.strip().strip('"').split('","')
1862
+ if len(parts) >= 2:
1863
+ try:
1864
+ pid = int(parts[1])
1865
+ # Don't kill ourselves
1866
+ if pid != os.getpid():
1867
+ pids_to_check.add(pid)
1868
+ except (ValueError, IndexError):
1869
+ pass
1870
+ except Exception as e:
1871
+ logger.debug("Error scanning %s processes: %s", image, e)
1872
+
1873
+ # Inspect command-lines (prefer psutil); fall back to WMIC if available.
1874
+ for pid in list(pids_to_check):
1875
+ try:
1876
+ inspected = False
1877
+ try:
1878
+ import psutil
1879
+
1880
+ try:
1881
+ p = psutil.Process(pid)
1882
+ cmd = (
1883
+ " ".join(p.cmdline())
1884
+ if isinstance(p.cmdline(), (list, tuple))
1885
+ else str(p.cmdline())
1886
+ )
1887
+ inspected = True
1888
+ except psutil.NoSuchProcess:
1889
+ continue
1890
+ except Exception:
1891
+ inspected = False
1892
+ except Exception:
1893
+ inspected = False
1894
+
1895
+ if inspected and cmd and _should_kill(cmd):
1896
+ print(f"Killing orphaned lock_client (PID: {pid})")
1897
+ subprocess.run(
1898
+ ["taskkill", "/F", "/T", "/PID", str(pid)],
1899
+ capture_output=True,
1900
+ creationflags=0x08000000,
1901
+ )
1902
+ killed += 1
1903
+ continue
1904
+
1905
+ # psutil not available or didn't identify commandline;
1906
+ # try WMIC if present
1907
+ if shutil.which("wmic"):
1908
+ try:
1909
+ result = subprocess.run(
1910
+ [
1911
+ "wmic",
1912
+ "process",
1913
+ "where",
1914
+ f"ProcessId={pid}",
1915
+ "get",
1916
+ "CommandLine",
1917
+ "/value",
1918
+ ],
1919
+ capture_output=True,
1920
+ text=True,
1921
+ creationflags=0x08000000,
1922
+ errors="ignore",
1923
+ )
1924
+ out = (result.stdout or "").lower()
1925
+ if _should_kill(out):
1926
+ print(f"Killing orphaned lock_client (PID: {pid})")
1927
+ subprocess.run(
1928
+ ["taskkill", "/F", "/T", "/PID", str(pid)],
1929
+ capture_output=True,
1930
+ creationflags=0x08000000,
1931
+ )
1932
+ killed += 1
1933
+ except Exception as e:
1934
+ logger.debug("Error checking PID %d via WMIC: %s", pid, e)
1935
+ else:
1936
+ # Cannot reliably inspect command-line on this host
1937
+ logger.debug(
1938
+ (
1939
+ "Skipping command-line inspection for PID %d "
1940
+ "(no psutil or wmic)"
1941
+ ),
1942
+ pid,
1943
+ )
1944
+ except Exception as e:
1945
+ logger.debug("Error checking PID %d: %s", pid, e)
1946
+ else:
1947
+ # Unix: use ps and grep
1948
+ try:
1949
+ result = subprocess.run(
1950
+ ["ps", "aux"],
1951
+ capture_output=True,
1952
+ text=True,
1953
+ )
1954
+ for line in result.stdout.split("\n"):
1955
+ if "python" in line.lower() and _should_kill(line):
1956
+ parts = line.split()
1957
+ if len(parts) >= 2:
1958
+ try:
1959
+ pid = int(parts[1])
1960
+ # Don't kill ourselves
1961
+ if pid != os.getpid():
1962
+ print(f" Killing orphaned process (PID: {pid})")
1963
+ try:
1964
+ os.kill(pid, signal.SIGTERM)
1965
+ killed += 1
1966
+ except ProcessLookupError:
1967
+ pass
1968
+ except (ValueError, IndexError):
1969
+ pass
1970
+ except Exception as e:
1971
+ logger.warning("Error scanning for orphaned processes: %s", e)
1972
+
1973
+ if killed > 0:
1974
+ print(f"✅ Killed {killed} orphaned process(es).")
1975
+ print("Log files should now be unlocked.")
1976
+ # Also clean up PID file if present
1977
+ self._remove_pid()
1978
+ else:
1979
+ print("No orphaned lock_client processes found.")
1980
+ # Try to identify what's holding the log files
1981
+ if sys.platform == "win32":
1982
+ print("\nChecking what's holding log files...")
1983
+ for log_file in ["application.log", "errors.log"]:
1984
+ log_path = os.path.join(_COLLAB_ROOT, "logs", log_file)
1985
+ if os.path.exists(log_path):
1986
+ try:
1987
+ # Try to open the file to see if it's locked
1988
+ with open(log_path, "a"):
1989
+ pass # File is accessible
1990
+ except PermissionError:
1991
+ print(f" {log_file} is LOCKED by another process")
1992
+ print(f" Run: handle.exe {log_path} (from Sysinternals)")
1993
+ except Exception as e:
1994
+ print(f" {log_file}: {e}")
1995
+
1996
+ # ------------------------------------------------------------------
1997
+ # Dashboard
1998
+ # ------------------------------------------------------------------
1999
+ def dashboard(self) -> None:
2000
+ """Open the collaborative dashboard in the default browser."""
2001
+ url, _ = self._prepare_dashboard_server()
2002
+ if not url:
2003
+ return
2004
+ try:
2005
+ import webbrowser
2006
+
2007
+ webbrowser.open(url)
2008
+ except Exception:
2009
+ print(f"Open in browser manually: {url}")
2010
+
2011
+ def _prepare_dashboard_server(self) -> Tuple[Optional[str], Optional[str]]:
2012
+ """Create temp HTML with injected config, start local HTTP server.
2013
+
2014
+ Returns (url, tmp_path) or (None, None) on error.
2015
+ """
2016
+ html_path = os.path.join(_RESOURCE_ROOT, "dashboard", "index.html")
2017
+ if not os.path.exists(html_path):
2018
+ logger.error("Dashboard file not found at %s", html_path)
2019
+ return None, None
2020
+
2021
+ try:
2022
+ with open(html_path, "r", encoding="utf-8") as fh:
2023
+ content = fh.read()
2024
+ except Exception as e:
2025
+ logger.error("Error reading dashboard template: %s", e)
2026
+ return None, None
2027
+
2028
+ injected = {
2029
+ "url": SUPABASE_URL or "",
2030
+ "anonKey": SUPABASE_ANON_KEY or "",
2031
+ "serviceKey": SUPABASE_SERVICE_ROLE_KEY or None,
2032
+ "user": self.developer_id or "",
2033
+ }
2034
+ inject_script = (
2035
+ f"<script>window.__SUPABASE_CONFIG__ = {json.dumps(injected)};</script>\n"
2036
+ )
2037
+
2038
+ try:
2039
+ tmp = tempfile.NamedTemporaryFile(
2040
+ mode="w", delete=False, suffix=".html", encoding="utf-8"
2041
+ )
2042
+ tmp.write(inject_script)
2043
+ tmp.write(content)
2044
+ tmp.flush()
2045
+ tmp.close()
2046
+ except Exception as e:
2047
+ logger.error("Error creating temp dashboard file: %s", e)
2048
+ return None, None
2049
+
2050
+ try:
2051
+ import http.server
2052
+ from functools import partial
2053
+
2054
+ tmp_dir = os.path.dirname(tmp.name)
2055
+ filename = os.path.basename(tmp.name)
2056
+
2057
+ Handler = partial(http.server.SimpleHTTPRequestHandler, directory=tmp_dir)
2058
+
2059
+ # Silence request logging
2060
+ RequestHandler = http.server.SimpleHTTPRequestHandler
2061
+ RequestHandler.log_message = lambda *a, **k: None # type: ignore # noqa
2062
+
2063
+ server = http.server.ThreadingHTTPServer(("127.0.0.1", 0), Handler)
2064
+ port = server.server_address[1]
2065
+
2066
+ thread = threading.Thread(target=server.serve_forever, daemon=True)
2067
+ thread.start()
2068
+
2069
+ def _safe_shutdown() -> None:
2070
+ """Best-effort dashboard server shutdown for process exit."""
2071
+ try:
2072
+ server.shutdown()
2073
+ except BaseException:
2074
+ pass
2075
+ try:
2076
+ server.server_close()
2077
+ except Exception:
2078
+ pass
2079
+
2080
+ atexit.register(_safe_shutdown)
2081
+
2082
+ url = f"http://127.0.0.1:{port}/{filename}"
2083
+
2084
+ # Probe until ready
2085
+ import socket as _socket
2086
+
2087
+ for _ in range(20):
2088
+ try:
2089
+ with _socket.create_connection(("127.0.0.1", port), timeout=0.3):
2090
+ break
2091
+ except Exception:
2092
+ time.sleep(0.05)
2093
+
2094
+ return url, tmp.name
2095
+ except Exception as e:
2096
+ try:
2097
+ os.unlink(tmp.name)
2098
+ except Exception:
2099
+ pass
2100
+ logger.error("Failed to start local dashboard server: %s", e)
2101
+ return None, None
2102
+
2103
+ # ------------------------------------------------------------------
2104
+ # Watcher (foreground process)
2105
+ # ------------------------------------------------------------------
2106
+ def watch(
2107
+ self,
2108
+ interval: int = 5,
2109
+ timeout_mins: int = 0,
2110
+ open_dashboard: bool = False,
2111
+ daemon_mode: bool = False,
2112
+ parent_pid: Optional[int] = None,
2113
+ parent_name: Optional[str] = None,
2114
+ parent_method: Optional[str] = None,
2115
+ heartbeat_file: Optional[str] = None,
2116
+ heartbeat_grace_seconds: int = 10,
2117
+ ) -> None:
2118
+ """Run the file-watching loop (foreground).
2119
+
2120
+ Called by daemon_start. When *daemon_mode* is True the parent- PID liveness
2121
+ check is skipped (detached daemons have no meaningful parent).
2122
+ """
2123
+ # Ensure file-based logging is wired so watch output goes to logs/
2124
+ from .logging_config import setup_collab_logging
2125
+
2126
+ setup_collab_logging(collab_dir=_COLLAB_ROOT)
2127
+
2128
+ if not daemon_mode:
2129
+ self._parent_pid = parent_pid or os.getppid()
2130
+ else:
2131
+ self._parent_pid = parent_pid
2132
+
2133
+ self._heartbeat_file = heartbeat_file
2134
+ self._heartbeat_grace_seconds = heartbeat_grace_seconds
2135
+ # Reset soft-skip on (re)start of the watch loop
2136
+ self._heartbeat_soft_skipped = False
2137
+
2138
+ # Include a short session token in PID metadata so stop requests can
2139
+ # target the intended watcher instance instead of relying solely on PIDs.
2140
+ try:
2141
+ token = self._get_session_token()
2142
+ except Exception:
2143
+ token = None
2144
+ self._write_pid(os.getpid(), parent_pid=self._parent_pid, token=token)
2145
+ logger.info("Wrote PID metadata to %s (PID: %d)", PID_FILE, os.getpid())
2146
+
2147
+ # Defensive: remove any stale stop-request file on startup so we don't
2148
+ # immediately shut down. The IDE extension or CLI may have left this
2149
+ # behind from a previous session.
2150
+ try:
2151
+ stop_file = _state_path(".stop_request")
2152
+ if os.path.exists(stop_file):
2153
+ os.remove(stop_file)
2154
+ logger.info("Removed stale stop request on watch loop entry.")
2155
+ except Exception:
2156
+ pass
2157
+
2158
+ self._register_signal_handlers()
2159
+ # Start a low-latency OS-level parent monitor (Windows) to detect
2160
+ # parent termination without relying on WMIC/tasklist polling.
2161
+ try:
2162
+ self._start_parent_monitor_thread()
2163
+ except Exception:
2164
+ # Best-effort: continue if monitor can't be started
2165
+ logger.debug("Parent monitor thread not started or failed to initialize")
2166
+
2167
+ # NOTE: Job Object is disabled to allow graceful shutdown
2168
+ # The Job Object kills the process immediately when parent dies,
2169
+ # preventing signal handlers and atexit from running.
2170
+ # We rely on parent death detection and signal handlers instead.
2171
+
2172
+ # Startup banner matching pycharm_watcher format exactly
2173
+ timeout_label = f"{timeout_mins}m" if timeout_mins > 0 else "disabled"
2174
+ logger.info("=" * 60)
2175
+ logger.info("Collab Locks -- Lock Client Watcher")
2176
+ logger.info("Developer: %s", self.developer_id)
2177
+ logger.info("Interval: %ds | Timeout: %s", interval, timeout_label)
2178
+ # Dashboard URL or command (like pycharm_watcher)
2179
+ dashboard_url, _ = self._prepare_dashboard_server()
2180
+ if dashboard_url:
2181
+ logger.info("Dashboard: %s", dashboard_url)
2182
+ else:
2183
+ logger.info("Dashboard: collab dashboard")
2184
+ # Optionally open the dashboard in the default browser when requested.
2185
+ if open_dashboard:
2186
+ try:
2187
+ self.dashboard()
2188
+ except Exception:
2189
+ logger.exception("Failed to open dashboard")
2190
+ logger.info("=" * 60)
2191
+
2192
+ # Log session token (truncated) for debugging cross-IDE token divergence
2193
+ session_token = self._get_session_token()
2194
+ logger.debug(
2195
+ "Session token: %s... (dev=%s, host=%s)",
2196
+ session_token[:8],
2197
+ self.developer_id,
2198
+ socket.gethostname(),
2199
+ )
2200
+
2201
+ # Initialize parent PID tracking for adoption detection (debug only)
2202
+ self._initial_ppid = os.getppid()
2203
+ logger.debug(
2204
+ "Initial parent PID recorded for adoption detection: %d", self._initial_ppid
2205
+ )
2206
+
2207
+ last_modified: set = set()
2208
+ last_change_time = _safe_now()
2209
+ last_parent_check = _safe_now()
2210
+
2211
+ # Initialize WMIC resolution failure streak counter for zombie process detection
2212
+ _parent_name_unknown_streak = 0
2213
+ _last_known_parent_name = parent_name
2214
+
2215
+ # Initial remote lock scan (logs [LOCKED] for existing locks)
2216
+ self._scan_remote_locks()
2217
+
2218
+ # Startup reconciliation: sync Supabase lock state with local git
2219
+ last_modified = self._reconcile()
2220
+
2221
+ # Short grace window after startup where a missing heartbeat should
2222
+ # not immediately trigger shutdown. This avoids a race where the
2223
+ # extension spawns the watcher and the heartbeat file is created
2224
+ # a few milliseconds later.
2225
+ startup_time = time.time()
2226
+
2227
+ # Normalize parent detection method if not provided by caller. This
2228
+ # ensures logs can state how the parent PID was inferred.
2229
+ if parent_method is None:
2230
+ try:
2231
+ # If VSCODE_PID matches the provided parent_pid, mark accordingly
2232
+ vspid = os.getenv("VSCODE_PID")
2233
+ if (
2234
+ vspid
2235
+ and vspid.isdigit()
2236
+ and parent_pid
2237
+ and int(vspid) == int(parent_pid)
2238
+ ):
2239
+ parent_method = "vscode_pid"
2240
+ elif os.getenv("PYCHARM_HOSTED") == "1":
2241
+ parent_method = "pycharm_hosted"
2242
+ else:
2243
+ detected_pid, detected_method = self._get_parent_ide_pid()
2244
+ if detected_method:
2245
+ parent_method = detected_method
2246
+ else:
2247
+ parent_method = "unknown"
2248
+ except Exception:
2249
+ parent_method = "unknown"
2250
+
2251
+ try:
2252
+ while True:
2253
+ try:
2254
+ # Parent process liveness check every 2 seconds
2255
+ # (faster zombie detection)
2256
+ if (_safe_now() - last_parent_check).total_seconds() > 2:
2257
+ last_parent_check = _safe_now()
2258
+
2259
+ # Soft-stop request support: if a .stop_request file is
2260
+ # present, the watcher should perform a graceful
2261
+ # shutdown instead of being forcibly killed.
2262
+ try:
2263
+ stop_file = _state_path(".stop_request")
2264
+ if os.path.exists(stop_file):
2265
+ try:
2266
+ with open(stop_file, "r", encoding="utf-8") as sf:
2267
+ txt = sf.read().strip()
2268
+ except Exception:
2269
+ txt = ""
2270
+
2271
+ # Determine this watcher's PID (actual running pid)
2272
+ try:
2273
+ actual_pid = self._read_pid() or os.getpid()
2274
+ except Exception:
2275
+ actual_pid = os.getpid()
2276
+
2277
+ matched = False
2278
+
2279
+ # TOKEN:<token> takes precedence
2280
+ if txt.startswith("TOKEN:"):
2281
+ requested_token = txt.split(":", 1)[1]
2282
+ try:
2283
+ my_token = self._get_session_token()
2284
+ except Exception:
2285
+ my_token = None
2286
+ if (
2287
+ requested_token
2288
+ and my_token
2289
+ and requested_token == my_token
2290
+ ):
2291
+ matched = True
2292
+ elif txt.startswith("PID:"):
2293
+ try:
2294
+ requested_pid = int(txt.split(":", 1)[1])
2295
+ if requested_pid in (actual_pid, os.getpid()):
2296
+ matched = True
2297
+ except Exception:
2298
+ matched = False
2299
+ else:
2300
+ # Backwards-compatible numeric-only payload
2301
+ try:
2302
+ if txt:
2303
+ requested_pid_opt = int(txt)
2304
+ if requested_pid_opt in (
2305
+ actual_pid,
2306
+ os.getpid(),
2307
+ ):
2308
+ matched = True
2309
+ except Exception:
2310
+ matched = False
2311
+
2312
+ if matched:
2313
+ logger.info(
2314
+ (
2315
+ "Stop request detected (%s). "
2316
+ "Initiating graceful shutdown."
2317
+ ),
2318
+ stop_file,
2319
+ )
2320
+ # Do NOT remove the stop_file here. The IDE
2321
+ # extension needs to see it after the process
2322
+ # exits to avoid an automatic restart.
2323
+ # The next watcher startup (via daemon_start)
2324
+ # will clean it up.
2325
+ self._graceful_shutdown(reason="stop_requested")
2326
+ return
2327
+ except Exception as exc:
2328
+ # Best-effort - don't crash the watcher over the stop file
2329
+ logger.debug("Stop-request polling failed: %s", exc)
2330
+
2331
+ # VSCode heartbeat support: if the heartbeat stops updating,
2332
+ # treat it as IDE/window termination and shut down.
2333
+ # NOTE: Check heartbeat even when an OS-level parent monitor
2334
+ # exists. Some IDE reloads may not terminate the parent PID
2335
+ # but will stop the extension/heartbeat; checking the
2336
+ # heartbeat makes the watcher more robust to fast reloads.
2337
+ if self._heartbeat_file:
2338
+ try:
2339
+ # DEBUG: Log heartbeat check
2340
+ now_ts = time.time()
2341
+ logger.debug(
2342
+ "Heartbeat check: file=%s exists=%s",
2343
+ self._heartbeat_file,
2344
+ os.path.exists(self._heartbeat_file),
2345
+ )
2346
+
2347
+ # If the heartbeat file is missing, allow a short
2348
+ # startup grace window to avoid races with the
2349
+ # extension creating the heartbeat immediately
2350
+ # after spawning the watcher.
2351
+ if not os.path.exists(self._heartbeat_file):
2352
+ if now_ts - startup_time < 3.0:
2353
+ logger.debug(
2354
+ (
2355
+ "Heartbeat missing but within startup "
2356
+ "grace (%.2fs) — ignoring"
2357
+ ),
2358
+ now_ts - startup_time,
2359
+ )
2360
+ else:
2361
+ logger.info(
2362
+ (
2363
+ "Heartbeat file missing (%s). "
2364
+ "Shutting down..."
2365
+ ),
2366
+ self._heartbeat_file,
2367
+ )
2368
+ self._graceful_shutdown(
2369
+ reason="heartbeat_missing"
2370
+ )
2371
+ return
2372
+
2373
+ # If the heartbeat file exists, ensure it has been
2374
+ # updated recently according to the configured
2375
+ # grace window.
2376
+ age = now_ts - os.path.getmtime(self._heartbeat_file)
2377
+ logger.debug(
2378
+ "Heartbeat age: %.1fs (threshold: %ss)",
2379
+ age,
2380
+ self._heartbeat_grace_seconds,
2381
+ )
2382
+ # Allow a small one-time soft skip when the parent
2383
+ # IDE process is still alive. This helps tolerate
2384
+ # brief extension-host hiccups (file system delays,
2385
+ # quick reloads) while preserving safety.
2386
+ soft_extra = 5.0
2387
+ if age > float(self._heartbeat_grace_seconds):
2388
+ parent_alive = bool(
2389
+ self._parent_pid
2390
+ and self._is_process_alive(self._parent_pid)
2391
+ )
2392
+ if parent_alive and not getattr(
2393
+ self, "_heartbeat_soft_skipped", False
2394
+ ):
2395
+ logger.warning(
2396
+ (
2397
+ "Heartbeat stale (%.1fs > %ss). "
2398
+ "Parent alive; allowing "
2399
+ "one-time extra %.1fs grace."
2400
+ ),
2401
+ age,
2402
+ self._heartbeat_grace_seconds,
2403
+ soft_extra,
2404
+ )
2405
+ self._heartbeat_soft_skipped = True
2406
+ elif (
2407
+ age
2408
+ > float(self._heartbeat_grace_seconds)
2409
+ + soft_extra
2410
+ ):
2411
+ # Final failure: log file contents for debugging
2412
+ try:
2413
+ with open(
2414
+ self._heartbeat_file,
2415
+ "r",
2416
+ encoding="utf-8",
2417
+ ) as hf:
2418
+ content = hf.read().strip()
2419
+ logger.debug(
2420
+ "Heartbeat file content: %s", content
2421
+ )
2422
+ except Exception:
2423
+ pass
2424
+ logger.info(
2425
+ (
2426
+ "Heartbeat stale (%.1fs > %ss) at %s. "
2427
+ "Shutting down..."
2428
+ ),
2429
+ age,
2430
+ self._heartbeat_grace_seconds,
2431
+ self._heartbeat_file,
2432
+ )
2433
+ self._graceful_shutdown(
2434
+ reason="heartbeat_stale"
2435
+ )
2436
+ return
2437
+ except Exception as e:
2438
+ logger.debug("Heartbeat check exception: %s", e)
2439
+ pass
2440
+
2441
+ # Parent diagnostics are useful during debugging but too noisy
2442
+ # for normal collab.log operation, so keep them at DEBUG.
2443
+ parent_alive = (
2444
+ self._is_process_alive(self._parent_pid)
2445
+ if self._parent_pid
2446
+ else False
2447
+ )
2448
+ parent_name = "unknown"
2449
+ if self._parent_pid:
2450
+ try:
2451
+ name, _ = self._get_process_info_local(self._parent_pid)
2452
+ if name:
2453
+ parent_name = name
2454
+ except Exception:
2455
+ pass
2456
+
2457
+ # Track WMIC resolution failures for zombie process detection
2458
+ if parent_name == "unknown":
2459
+ _parent_name_unknown_streak += 1
2460
+ # First transient failure: log at DEBUG
2461
+ # to avoid noisy warnings
2462
+ if (
2463
+ _last_known_parent_name
2464
+ and _parent_name_unknown_streak == 1
2465
+ ):
2466
+ logger.debug(
2467
+ (
2468
+ "Parent PID %d name no longer resolvable "
2469
+ "(was '%s'). Streak: %d"
2470
+ ),
2471
+ self._parent_pid,
2472
+ _last_known_parent_name,
2473
+ _parent_name_unknown_streak,
2474
+ )
2475
+ # Escalate to WARNING on the second consecutive failure
2476
+ elif (
2477
+ _last_known_parent_name
2478
+ and _parent_name_unknown_streak == 2
2479
+ ):
2480
+ logger.warning(
2481
+ (
2482
+ "Parent PID %d name unresolvable for %d "
2483
+ "consecutive checks (was '%s'). May indicate "
2484
+ "IDE is shutting down."
2485
+ ),
2486
+ self._parent_pid,
2487
+ _parent_name_unknown_streak,
2488
+ _last_known_parent_name,
2489
+ )
2490
+ else:
2491
+ if _parent_name_unknown_streak > 0:
2492
+ logger.info(
2493
+ (
2494
+ "Parent PID %d name resolved again as '%s'. "
2495
+ "Resetting streak."
2496
+ ),
2497
+ self._parent_pid,
2498
+ parent_name,
2499
+ )
2500
+ _parent_name_unknown_streak = 0
2501
+ _last_known_parent_name = parent_name
2502
+
2503
+ # If parent is reported alive but name has been
2504
+ # unresolvable for 2+ checks,
2505
+ # treat it as a zombie process and shut down
2506
+ # (2 checks @ 2s interval = 4s max wait)
2507
+ if parent_alive and _parent_name_unknown_streak >= 2:
2508
+ parent_name_str = _last_known_parent_name or "unknown"
2509
+ logger.info(
2510
+ (
2511
+ "Parent process %s (PID: %d) confirmed "
2512
+ "terminated after %d unresolvable checks. "
2513
+ "Initiating shutdown."
2514
+ ),
2515
+ parent_name_str,
2516
+ self._parent_pid,
2517
+ _parent_name_unknown_streak,
2518
+ )
2519
+ logger.info(
2520
+ (
2521
+ "Parent PID %d name unresolvable for %d "
2522
+ "consecutive checks — treating as terminated. "
2523
+ "Shutting down..."
2524
+ ),
2525
+ self._parent_pid,
2526
+ _parent_name_unknown_streak,
2527
+ )
2528
+ # Console printing is redundant with logging; keep it in
2529
+ # the logs only to avoid duplicate terminal lines.
2530
+ self._graceful_shutdown()
2531
+ return
2532
+
2533
+ current_ppid = os.getppid()
2534
+
2535
+ # DEBUG: Always log the comparison
2536
+ logger.debug(
2537
+ "adoption check: initial=%d current=%d match=%s",
2538
+ self._initial_ppid,
2539
+ current_ppid,
2540
+ current_ppid == self._initial_ppid,
2541
+ )
2542
+
2543
+ # Check if adopted by a new parent (original parent died)
2544
+ if current_ppid != self._initial_ppid:
2545
+ logger.info(
2546
+ (
2547
+ "Detected adoption by new parent (was %d, now %d). "
2548
+ "Original parent died. Shutting down..."
2549
+ ),
2550
+ self._initial_ppid,
2551
+ current_ppid,
2552
+ )
2553
+ # avoid printing duplicate messages to console
2554
+ self._graceful_shutdown()
2555
+ return
2556
+
2557
+ # Resolve immediate parent process name for clearer logs
2558
+ immediate_parent_name = None
2559
+ try:
2560
+ if current_ppid:
2561
+ immediate_parent_name, _ = self._get_process_info_local(
2562
+ current_ppid
2563
+ )
2564
+ except Exception:
2565
+ immediate_parent_name = None
2566
+
2567
+ # Include detection method for clarity
2568
+ if self._parent_pid:
2569
+ logger.debug(
2570
+ (
2571
+ "Parent check — detected IDE: %s (PID: %s) via=%s "
2572
+ "alive=%s; immediate parent: %s (PID: %d)"
2573
+ ),
2574
+ parent_name or "unknown",
2575
+ self._parent_pid,
2576
+ parent_method or "unknown",
2577
+ parent_alive,
2578
+ immediate_parent_name or "unknown",
2579
+ current_ppid,
2580
+ )
2581
+ else:
2582
+ logger.debug(
2583
+ (
2584
+ "Parent check — immediate parent: %s (PID: %d) "
2585
+ "via=%s alive=%s"
2586
+ ),
2587
+ immediate_parent_name or "unknown",
2588
+ current_ppid,
2589
+ parent_method or "unknown",
2590
+ parent_alive,
2591
+ )
2592
+
2593
+ # Check if we have a parent PID and it's dead
2594
+ if self._parent_pid:
2595
+ if not self._is_process_alive(self._parent_pid):
2596
+ logger.info(
2597
+ "Parent process (PID: %d) terminated. "
2598
+ "Shutting down...",
2599
+ self._parent_pid,
2600
+ )
2601
+ # Avoid duplicate console prints;
2602
+ # logging is authoritative
2603
+ self._graceful_shutdown()
2604
+ return
2605
+ else:
2606
+ # No explicit parent PID - check for orphan status
2607
+ current_ppid = os.getppid()
2608
+ # On Windows, orphaned processes may get
2609
+ # adopted by system processes
2610
+ # On Unix, they get adopted by init (PID 1)
2611
+ if sys.platform == "win32":
2612
+ # Windows: check if adopted by a low-PID system process
2613
+ if (
2614
+ current_ppid <= 4
2615
+ ): # System, smss.exe, csrss.exe, etc.
2616
+ logger.info(
2617
+ (
2618
+ "Detected orphaned watcher (adopted "
2619
+ "by system PID: %d). "
2620
+ "Shutting down..."
2621
+ ),
2622
+ current_ppid,
2623
+ )
2624
+ # Avoid printing to console redundantly
2625
+ self._graceful_shutdown()
2626
+ return
2627
+ else:
2628
+ # Unix: check if adopted by init
2629
+ if current_ppid == 1:
2630
+ logger.info(
2631
+ (
2632
+ "Detected orphaned watcher (adopted "
2633
+ "by init). Shutting down..."
2634
+ ),
2635
+ )
2636
+ # Avoid printing to console redundantly
2637
+ self._graceful_shutdown()
2638
+ return
2639
+
2640
+ out = self._get_modified_and_unpushed_files()
2641
+ current_modified = set(out)
2642
+
2643
+ if current_modified != last_modified:
2644
+ last_change_time = _safe_now()
2645
+ new_files = current_modified - last_modified
2646
+ if new_files:
2647
+ logger.info("Detected local changes: %s", list(new_files))
2648
+ branch = self._get_current_branch()
2649
+ ok, failed, msg = self.acquire_multiple(
2650
+ list(new_files),
2651
+ branch_name=branch,
2652
+ reason="Auto-Watch Sync",
2653
+ )
2654
+ if not ok:
2655
+ logger.warning("⚠️ CONFLICT ALERT: %s", msg)
2656
+
2657
+ released = last_modified - current_modified
2658
+ if released:
2659
+ ok, count, _ = self.release_multiple(list(released))
2660
+ if ok and count > 0:
2661
+ logger.info("🔓 [RELEASED] %d file(s) released", count)
2662
+
2663
+ last_modified = current_modified
2664
+ else:
2665
+ # Idle timeout
2666
+ idle = _safe_now() - last_change_time
2667
+ if timeout_mins > 0 and idle > timedelta(minutes=timeout_mins):
2668
+ logger.info(
2669
+ "Watcher timed out after %dm inactivity.", timeout_mins
2670
+ )
2671
+ break
2672
+
2673
+ time.sleep(interval)
2674
+ except Exception as e:
2675
+ logger.error("Error in watcher loop: %s", e, exc_info=True)
2676
+ time.sleep(interval)
2677
+ except KeyboardInterrupt:
2678
+ logger.info("Watcher stopped by user.")
2679
+ finally:
2680
+ self._graceful_shutdown()
2681
+
2682
+ # ------------------------------------------------------------------
2683
+ # Internal helpers
2684
+ # ------------------------------------------------------------------
2685
+ def _register_signal_handlers(self) -> None:
2686
+ """Register cleanup handlers for clean shutdown."""
2687
+ logger.debug("_register_signal_handlers called")
2688
+
2689
+ if os.getenv("COLLAB_TEST_MODE") != "1":
2690
+ logger.debug("Registering atexit handler")
2691
+ atexit.register(self._graceful_shutdown)
2692
+
2693
+ def _handle_signal(signum, frame):
2694
+ logger.debug("Signal handler called: signum=%d", signum)
2695
+ logger.info("Received signal %d, shutting down...", signum)
2696
+ try:
2697
+ self._graceful_shutdown(reason=f"signal_{signum}")
2698
+ except Exception:
2699
+ logger.exception("Error during graceful shutdown for signal %s", signum)
2700
+ sys.exit(0)
2701
+
2702
+ if sys.platform != "win32":
2703
+ logger.debug("Registering SIGTERM handler")
2704
+ signal.signal(signal.SIGTERM, _handle_signal)
2705
+ logger.debug("Registering SIGINT handler")
2706
+ signal.signal(signal.SIGINT, _handle_signal)
2707
+
2708
+ # Windows-specific handlers: SIGBREAK and a console control handler.
2709
+ # These improve the chance that we run graceful shutdown when the
2710
+ # extension host or window closes (CTRL_CLOSE_EVENT, SHUTDOWN, etc.).
2711
+ if sys.platform == "win32":
2712
+ if hasattr(signal, "SIGBREAK"):
2713
+ try:
2714
+ logger.debug("Registering SIGBREAK handler")
2715
+ signal.signal(signal.SIGBREAK, _handle_signal)
2716
+ except Exception as _e:
2717
+ logger.debug("Failed to register SIGBREAK handler: %s", _e)
2718
+
2719
+ try:
2720
+ import ctypes
2721
+ from ctypes import wintypes
2722
+
2723
+ HandlerRoutine = ctypes.WINFUNCTYPE(wintypes.BOOL, wintypes.DWORD)
2724
+
2725
+ def _console_handler(dwCtrlType):
2726
+ try:
2727
+ logger.debug("Console control event: %s", dwCtrlType)
2728
+ # Attempt graceful shutdown
2729
+ try:
2730
+ self._graceful_shutdown(reason=f"console_ctrl_{dwCtrlType}")
2731
+ except Exception:
2732
+ logger.exception(
2733
+ "Exception during graceful shutdown in console handler"
2734
+ )
2735
+ except Exception:
2736
+ logger.exception("Exception in console handler")
2737
+ return True
2738
+
2739
+ ctypes.windll.kernel32.SetConsoleCtrlHandler(
2740
+ HandlerRoutine(_console_handler), True
2741
+ )
2742
+ logger.debug("Registered Windows console ctrl handler")
2743
+ except Exception as _e:
2744
+ logger.debug("Failed to register console ctrl handler: %s", _e)
2745
+
2746
+ logger.debug("Signal handlers registered")
2747
+
2748
+ def _start_parent_monitor_thread(self) -> None:
2749
+ """Start a background thread that waits on the parent process handle (Windows).
2750
+
2751
+ This uses OpenProcess + WaitForSingleObject so we can be notified the instant
2752
+ the parent process exits, avoiding fragile polling or WMIC queries. The thread
2753
+ is daemonized so it won't block shutdown.
2754
+ """
2755
+ if sys.platform != "win32":
2756
+ return
2757
+ parent = getattr(self, "_parent_pid", None)
2758
+ if not parent:
2759
+ return
2760
+ try:
2761
+ import ctypes
2762
+
2763
+ # SYNCHRONIZE | PROCESS_QUERY_LIMITED_INFORMATION
2764
+ SYNCHRONIZE = 0x00100000
2765
+ PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
2766
+ desired_access = SYNCHRONIZE | PROCESS_QUERY_LIMITED_INFORMATION
2767
+
2768
+ handle = ctypes.windll.kernel32.OpenProcess(
2769
+ desired_access, False, int(parent)
2770
+ )
2771
+ if not handle:
2772
+ try:
2773
+ err = ctypes.windll.kernel32.GetLastError()
2774
+ except Exception:
2775
+ err = None
2776
+ logger.debug(
2777
+ "OpenProcess failed for parent PID %s: err=%s", parent, err
2778
+ )
2779
+ return
2780
+
2781
+ def _waiter(hndl, ppid):
2782
+ try:
2783
+ INFINITE = 0xFFFFFFFF
2784
+ res = ctypes.windll.kernel32.WaitForSingleObject(hndl, INFINITE)
2785
+ logger.info(
2786
+ (
2787
+ "Parent PID %s handle signaled "
2788
+ "(WaitForSingleObject returned %s). "
2789
+ "Initiating shutdown."
2790
+ ),
2791
+ ppid,
2792
+ res,
2793
+ )
2794
+ try:
2795
+ ctypes.windll.kernel32.CloseHandle(hndl)
2796
+ except Exception as exc:
2797
+ logger.debug("CloseHandle failed for parent monitor: %s", exc)
2798
+ # mark monitor as stopped to avoid races
2799
+ self._parent_monitor_started = False
2800
+ self._parent_monitor_handle = None
2801
+ self._parent_monitor_thread = None
2802
+ # Trigger graceful shutdown with a reason
2803
+ try:
2804
+ self._graceful_shutdown(reason=f"parent_exit_{ppid}")
2805
+ except Exception:
2806
+ logger.exception("Error while shutting down after parent exit")
2807
+ except Exception as e:
2808
+ logger.debug("Parent monitor waiter failed: %s", e)
2809
+
2810
+ th = threading.Thread(
2811
+ target=_waiter, args=(handle, int(parent)), daemon=True
2812
+ )
2813
+ # Record diagnostics before starting
2814
+ self._parent_monitor_handle = handle
2815
+ self._parent_monitor_started = True
2816
+ self._parent_monitor_thread = th
2817
+ logger.info("Parent monitor listening for parent PID %s", parent)
2818
+ th.start()
2819
+ except Exception as e:
2820
+ logger.debug("Failed to start parent monitor thread: %s", e)
2821
+ self._parent_monitor_started = False
2822
+ self._parent_monitor_handle = None
2823
+ self._parent_monitor_thread = None
2824
+
2825
+ def _graceful_shutdown(self, reason: Optional[str] = None) -> None:
2826
+ """Cleanup the local daemon state on shutdown.
2827
+
2828
+ IMPORTANT: This handler strictly DOES NOT release any Supabase locks.
2829
+ Locks are preserved to ensure they persist across IDE restarts and
2830
+ terminal sessions. They are only released automatically during 'git push'
2831
+ (via pre-push hook) or manual release-all.
2832
+ """
2833
+ logger.debug("_graceful_shutdown called (reason=%s)", reason)
2834
+
2835
+ # Flush immediately so we see this even if process dies
2836
+ for handler in logging.getLogger().handlers:
2837
+ try:
2838
+ handler.flush()
2839
+ except Exception:
2840
+ pass
2841
+
2842
+ if getattr(self, "_shutdown_done", False):
2843
+ logger.debug("shutdown already done, returning (reason=%s)", reason)
2844
+ return
2845
+ self._shutdown_done = True
2846
+
2847
+ # Never touch real Supabase OR local PID file in test mode
2848
+ if os.getenv("COLLAB_TEST_MODE") == "1":
2849
+ logger.debug("COLLAB_TEST_MODE=1 - skipping real shutdown actions")
2850
+ return
2851
+
2852
+ # Log shutdown start (clear, stepwise messages)
2853
+ if reason:
2854
+ logger.info(
2855
+ (
2856
+ "Shutdown initiated — received shutdown signal (%s). "
2857
+ "Beginning graceful shutdown."
2858
+ ),
2859
+ reason,
2860
+ )
2861
+ else:
2862
+ logger.info(
2863
+ (
2864
+ "Shutdown initiated — received shutdown signal. "
2865
+ "Beginning graceful shutdown."
2866
+ )
2867
+ )
2868
+
2869
+ # Flush again
2870
+ for handler in logging.getLogger().handlers:
2871
+ try:
2872
+ handler.flush()
2873
+ except Exception:
2874
+ pass
2875
+
2876
+ # Log kept locks (matching pycharm_watcher format)
2877
+ n_kept = 0
2878
+ try:
2879
+ active_locks = self.active()
2880
+ logger.debug(
2881
+ "Graceful shutdown: fetched %d active locks from Supabase. "
2882
+ "My dev ID: %s",
2883
+ len(active_locks),
2884
+ self.developer_id,
2885
+ )
2886
+ my_locks = [
2887
+ lk for lk in active_locks if lk.get("developer_id") == self.developer_id
2888
+ ]
2889
+ for lock in sorted(my_locks, key=lambda x: x.get("file_path", "")):
2890
+ fp = lock.get("file_path", "")
2891
+ if fp:
2892
+ n_kept += 1
2893
+ logger.info(
2894
+ "🔒 [PRESERVED] %s — still has local edits, lock preserved", fp
2895
+ )
2896
+ except Exception as e:
2897
+ logger.error(
2898
+ "Exception while enumerating active locks during shutdown: %s", e
2899
+ )
2900
+
2901
+ logger.info(
2902
+ "Shutdown complete. Preserved %d lock(s); released 0 lock(s).", n_kept
2903
+ )
2904
+ # Emit a concise stdout marker for the extension to detect.
2905
+ try:
2906
+ print(
2907
+ f"Shutdown complete. Preserved {n_kept} lock(s); released 0 lock(s).",
2908
+ flush=True,
2909
+ )
2910
+ except Exception:
2911
+ pass
2912
+
2913
+ # Write shutdown marker early into the per-workspace state dir so
2914
+ # external tools can detect shutdown without placing transient files
2915
+ # inside the repository working tree.
2916
+ try:
2917
+ shutdown_file = _state_path(".shutdown_complete")
2918
+ with open(shutdown_file, "w") as f:
2919
+ f.write(f"{n_kept}\n")
2920
+ f.flush()
2921
+ try:
2922
+ os.fsync(f.fileno())
2923
+ except Exception:
2924
+ pass
2925
+ logger.info("Wrote shutdown marker to %s", shutdown_file)
2926
+ # Remove any stray shutdown/startup markers that may exist in the
2927
+ # repository runtime root from older runs.
2928
+ try:
2929
+ repo_shutdown = os.path.join(_COLLAB_ROOT, ".shutdown_complete")
2930
+ repo_summary = os.path.join(_COLLAB_ROOT, ".startup_summary.json")
2931
+ for p in (repo_shutdown, repo_summary):
2932
+ try:
2933
+ if os.path.exists(p):
2934
+ os.remove(p)
2935
+ logger.info("Removed stray runtime marker in repo: %s", p)
2936
+ except Exception as _e:
2937
+ logger.debug("Failed to remove stray repo marker %s: %s", p, _e)
2938
+ except Exception:
2939
+ pass
2940
+ except Exception as _e:
2941
+ logger.debug("Failed to write shutdown marker early: %s", _e)
2942
+
2943
+ # Remove PID file with logging (matching pycharm_watcher)
2944
+ for _attempt in range(3):
2945
+ try:
2946
+ if os.path.exists(PID_FILE):
2947
+ os.remove(PID_FILE)
2948
+ logger.info("Removed PID file: %s", PID_FILE)
2949
+ break
2950
+ except OSError:
2951
+ if _attempt < 2:
2952
+ time.sleep(0.1)
2953
+ pass
2954
+
2955
+ # Flush all logging handlers to ensure shutdown logs are written
2956
+ # Flush handlers attached to the 'collab' logger (file handlers)
2957
+ try:
2958
+ collab_logger = logging.getLogger("collab")
2959
+ for handler in getattr(collab_logger, "handlers", []):
2960
+ try:
2961
+ handler.flush()
2962
+ except Exception:
2963
+ pass
2964
+ except Exception:
2965
+ pass
2966
+
2967
+ # Also flush and fsync file-backed handlers as a best-effort so that
2968
+ # logs are persisted to disk even if the parent IDE reloads quickly.
2969
+ try:
2970
+ # First, handle collab-specific handlers
2971
+ collab_logger = logging.getLogger("collab")
2972
+ for handler in getattr(collab_logger, "handlers", []):
2973
+ try:
2974
+ handler.flush()
2975
+ except Exception:
2976
+ pass
2977
+ try:
2978
+ stream = getattr(handler, "stream", None)
2979
+ if stream and hasattr(stream, "fileno"):
2980
+ try:
2981
+ os.fsync(stream.fileno())
2982
+ except Exception:
2983
+ pass
2984
+ except Exception:
2985
+ pass
2986
+ except Exception:
2987
+ pass
2988
+
2989
+ # Then root handlers
2990
+ try:
2991
+ for handler in logging.getLogger().handlers:
2992
+ try:
2993
+ handler.flush()
2994
+ except Exception:
2995
+ pass
2996
+ try:
2997
+ stream = getattr(handler, "stream", None)
2998
+ if stream and hasattr(stream, "fileno"):
2999
+ try:
3000
+ os.fsync(stream.fileno())
3001
+ except Exception:
3002
+ pass
3003
+ except Exception:
3004
+ pass
3005
+ except Exception:
3006
+ pass
3007
+
3008
+ # Ensure all logging resources are flushed and closed before exit.
3009
+ try:
3010
+ logging.shutdown()
3011
+ except Exception:
3012
+ pass
3013
+
3014
+ # Ensure stdout is flushed for console output
3015
+ try:
3016
+ sys.stdout.flush()
3017
+ except Exception:
3018
+ pass
3019
+
3020
+ # Small delay to ensure file writes complete before process exit
3021
+ time.sleep(0.5)
3022
+
3023
+ def _reconcile(self) -> set:
3024
+ """Sync Supabase locks with local git status and upstream state."""
3025
+ try:
3026
+ modified_files = self._get_modified_and_unpushed_files()
3027
+ git_modified = set(modified_files)
3028
+ except Exception as e:
3029
+ logger.error("Error identifying modified files (skipping reconcile): %s", e)
3030
+ # DANGEROUS: Returning set() here would cause it to think we should
3031
+ # release EVERYTHING we currently have. Instead, return our currently
3032
+ # known locks so reconciliation essentially becomes a no-op for this cycle.
3033
+ try:
3034
+ active = self.active()
3035
+ return {
3036
+ lk["file_path"]
3037
+ for lk in active
3038
+ if lk.get("developer_id") == self.developer_id
3039
+ }
3040
+ except Exception:
3041
+ return set()
3042
+
3043
+ try:
3044
+ active = self.active()
3045
+ my_locks = {
3046
+ lk["file_path"]
3047
+ for lk in active
3048
+ if lk.get("developer_id") == self.developer_id
3049
+ }
3050
+ # Build lock_map for token checking
3051
+ lock_map: dict[str, dict] = {}
3052
+ for lk in active:
3053
+ if lk.get("developer_id") == self.developer_id:
3054
+ fp = lk.get("file_path", "")
3055
+ if fp:
3056
+ lock_map[fp] = lk
3057
+ except Exception as e:
3058
+ logger.error("Error getting Supabase locks: %s", e)
3059
+ return git_modified
3060
+
3061
+ # Calculate lock categories
3062
+ stale = my_locks - git_modified
3063
+ missing = git_modified - my_locks
3064
+ still_valid = my_locks & git_modified
3065
+
3066
+ # Count categories for summary
3067
+ current_token = self._get_session_token()
3068
+ resumed_locks = []
3069
+ refreshed_locks = []
3070
+ multi_session_locks = []
3071
+
3072
+ for fp in sorted(still_valid):
3073
+ lock = lock_map.get(fp, {})
3074
+ stored_token = lock.get("lock_token", "")
3075
+
3076
+ if stored_token and stored_token != current_token:
3077
+ if self._is_same_machine_token(stored_token):
3078
+ resumed_locks.append(fp)
3079
+ else:
3080
+ multi_session_locks.append(fp)
3081
+ elif stored_token == current_token:
3082
+ resumed_locks.append(fp)
3083
+ else:
3084
+ refreshed_locks.append(fp)
3085
+
3086
+ # Calculate counts for summary
3087
+ n_released = len(stale)
3088
+ n_newly_locked = len(missing)
3089
+ n_readopted = len(resumed_locks)
3090
+ n_refreshed = len(refreshed_locks)
3091
+ n_multi = len(multi_session_locks)
3092
+
3093
+ # Only log start message if there's work to do
3094
+ if any([n_released, n_newly_locked, n_readopted, n_refreshed, n_multi]):
3095
+ logger.debug("Starting lock reconciliation...")
3096
+
3097
+ # Process stale locks
3098
+ if stale:
3099
+ for fp in sorted(stale):
3100
+ logger.info(
3101
+ "🔓 [STALE-RELEASED] %s — locked but file is now clean, releasing",
3102
+ fp,
3103
+ )
3104
+ self.release_multiple(list(stale))
3105
+
3106
+ # Process RESUMED locks: use direct table update (preserves acquired_at)
3107
+ # This prevents the timer from resetting when switching IDEs
3108
+ if resumed_locks:
3109
+ for fp in sorted(resumed_locks):
3110
+ logger.info("🔒 [RESUMED] %s — lock re-adopted from this machine", fp)
3111
+ try:
3112
+ # Use direct update to ONLY change lock_token, NOT acquired_at
3113
+ client = self._client
3114
+ assert client is not None
3115
+ client.table("file_locks").update({"lock_token": current_token}).eq(
3116
+ "file_path", fp
3117
+ ).eq("developer_id", self.developer_id).execute()
3118
+ except Exception:
3119
+ logger.debug("Failed to update lock_token for %s (non-fatal)", fp)
3120
+
3121
+ # Process multi-session locks (different machine) - just log, don't touch
3122
+ if multi_session_locks:
3123
+ for fp in sorted(multi_session_locks):
3124
+ lock = lock_map.get(fp, {})
3125
+ stored_token = lock.get("lock_token", "")
3126
+ logger.warning(
3127
+ (
3128
+ "⚠️ [MULTI-SESSION] %s — token mismatch (stored: %s..., "
3129
+ "current: %s...). "
3130
+ "Lock left untouched — use 'collab release-all' "
3131
+ "if stale."
3132
+ ),
3133
+ fp,
3134
+ stored_token[:8] if stored_token else "none",
3135
+ current_token[:8],
3136
+ )
3137
+
3138
+ # Process REFRESHED locks (no stored token) - use acquire RPC
3139
+ if refreshed_locks:
3140
+ for fp in sorted(refreshed_locks):
3141
+ logger.info("🔒 [REFRESHED] %s — token refreshed", fp)
3142
+ branch = self._get_current_branch()
3143
+ self.acquire_multiple(
3144
+ list(refreshed_locks), branch_name=branch, reason="Auto-Watch Sync"
3145
+ )
3146
+
3147
+ # Process NEW locks (missing) - use acquire RPC
3148
+ if missing:
3149
+ branch = self._get_current_branch()
3150
+ self.acquire_multiple(
3151
+ list(missing), branch_name=branch, reason="Auto-Watch Sync"
3152
+ )
3153
+
3154
+ # Always log startup reconciliation summary for notification detection
3155
+ # Ensure a clear stdout marker so the VS Code extension (which
3156
+ # monitors the watcher's stdout) reliably detects startup completion.
3157
+ print("Startup reconciliation complete.")
3158
+ logger.info("Startup reconciliation complete.")
3159
+ if n_readopted:
3160
+ logger.info(" Re-adopted: %d lock(s)", n_readopted)
3161
+ if n_released:
3162
+ logger.info(" Stale released: %d lock(s)", n_released)
3163
+ if n_newly_locked:
3164
+ logger.info(" Newly locked: %d file(s)", n_newly_locked)
3165
+ if n_multi:
3166
+ logger.info(" Conflicts: %d file(s)", n_multi)
3167
+ if n_refreshed:
3168
+ logger.info(" Token refresh: %d lock(s)", n_refreshed)
3169
+
3170
+ # Write startup summary to file for VSCode extension notification
3171
+ # Skip if silencing is requested (e.g., during tests)
3172
+ if os.environ.get("COLLAB_SILENT_DAEMON"):
3173
+ logger.debug("Skipping startup summary (COLLAB_SILENT_DAEMON set)")
3174
+ return git_modified
3175
+
3176
+ try:
3177
+ import json
3178
+
3179
+ summary_file = _state_path(".startup_summary.json")
3180
+ summary_data = {
3181
+ "readopted": n_readopted,
3182
+ "stale_released": n_released,
3183
+ "newly_locked": n_newly_locked,
3184
+ "conflicts": n_multi,
3185
+ "refreshed": n_refreshed,
3186
+ "timestamp": time.time(),
3187
+ }
3188
+ with open(summary_file, "w") as f:
3189
+ json.dump(summary_data, f)
3190
+
3191
+ # For backward compatibility with older extension instances that
3192
+ # expect `.startup_summary.json` inside the repository root,
3193
+ # also write a short-lived copy there. Schedule its removal after
3194
+ # a short grace period so the git tree is not polluted long-term.
3195
+ try:
3196
+ repo_summary = os.path.join(_COLLAB_ROOT, ".startup_summary.json")
3197
+ try:
3198
+ with open(repo_summary, "w") as rf:
3199
+ json.dump(summary_data, rf)
3200
+ except Exception as _e:
3201
+ logger.debug("Failed to write repo startup summary: %s", _e)
3202
+
3203
+ def _cleanup_repo_markers(paths, delay=30):
3204
+ def _worker():
3205
+ try:
3206
+ time.sleep(delay)
3207
+ for p in paths:
3208
+ try:
3209
+ if os.path.exists(p):
3210
+ os.remove(p)
3211
+ _emit_log_resilient(
3212
+ logger,
3213
+ logging.INFO,
3214
+ "Removed stray repo marker: %s",
3215
+ p,
3216
+ )
3217
+ except Exception:
3218
+ _emit_log_resilient(
3219
+ logger,
3220
+ logging.DEBUG,
3221
+ "Failed to remove stray repo marker: %s",
3222
+ p,
3223
+ )
3224
+ except Exception:
3225
+ pass
3226
+
3227
+ th = threading.Thread(target=_worker, daemon=True)
3228
+ th.start()
3229
+
3230
+ # Schedule removal of both startup and shutdown markers (if present)
3231
+ repo_shutdown = os.path.join(_COLLAB_ROOT, ".shutdown_complete")
3232
+ _cleanup_repo_markers([repo_summary, repo_shutdown], delay=30)
3233
+ except Exception:
3234
+ pass
3235
+ except Exception:
3236
+ pass
3237
+
3238
+ return git_modified
3239
+
3240
+ @staticmethod
3241
+ def _run_git_status() -> str:
3242
+ """Run git status --porcelain and return output."""
3243
+ args = ["git", "status", "--porcelain"]
3244
+ if sys.platform == "win32":
3245
+ return (
3246
+ subprocess.check_output(
3247
+ args, stderr=subprocess.DEVNULL, creationflags=0x08000000
3248
+ )
3249
+ .decode()
3250
+ .strip()
3251
+ )
3252
+ else:
3253
+ return (
3254
+ subprocess.check_output(args, stderr=subprocess.DEVNULL)
3255
+ .decode()
3256
+ .strip()
3257
+ )
3258
+
3259
+ def _get_modified_and_unpushed_files(self) -> List[str]:
3260
+ """Return files that are either dirty (status) or have unpushed commits
3261
+ (diff)."""
3262
+ modified = set()
3263
+
3264
+ # 1. Get Dirty/Staged files
3265
+ try:
3266
+ out = self._run_git_status()
3267
+ if out:
3268
+ for line in out.splitlines():
3269
+ if len(line) > 3:
3270
+ p = self._normalize_file_path(self._parse_git_status_path(line))
3271
+ if p.endswith("/"):
3272
+ continue
3273
+ if not self._should_ignore_path(p):
3274
+ modified.add(p)
3275
+ except Exception as e:
3276
+ logger.debug("Git status failed: %s", e)
3277
+
3278
+ # 2. Get Unpushed Files (diff against upstream)
3279
+ try:
3280
+ # Check if upstream exists
3281
+ args_rev = [
3282
+ "git",
3283
+ "rev-parse",
3284
+ "--abbrev-ref",
3285
+ "--symbolic-full-name",
3286
+ "@{u}",
3287
+ ]
3288
+ if sys.platform == "win32":
3289
+ subprocess.check_output(
3290
+ args_rev, stderr=subprocess.DEVNULL, creationflags=0x08000000
3291
+ )
3292
+ else:
3293
+ subprocess.check_output(args_rev, stderr=subprocess.DEVNULL)
3294
+
3295
+ # If upstream exists, get names/statuses of files that differ from it.
3296
+ # Keep deleted paths as "in progress" so lock ownership remains
3297
+ # visible in the dashboard until explicit release.
3298
+ args_diff = ["git", "diff", "--name-status", "@{u}..HEAD"]
3299
+ if sys.platform == "win32":
3300
+ diff_out = (
3301
+ subprocess.check_output(
3302
+ args_diff, stderr=subprocess.DEVNULL, creationflags=0x08000000
3303
+ )
3304
+ .decode()
3305
+ .strip()
3306
+ )
3307
+ else:
3308
+ diff_out = (
3309
+ subprocess.check_output(args_diff, stderr=subprocess.DEVNULL)
3310
+ .decode()
3311
+ .strip()
3312
+ )
3313
+
3314
+ if diff_out:
3315
+ for line in diff_out.splitlines():
3316
+ raw = line.strip()
3317
+ if not raw:
3318
+ continue
3319
+ parts = raw.split(None, 1)
3320
+ if len(parts) != 2:
3321
+ continue
3322
+ status, payload = parts
3323
+ payload = payload.strip()
3324
+ if "\t" in payload:
3325
+ payload = payload.split("\t")[-1].strip()
3326
+ if " -> " in payload:
3327
+ payload = payload.split(" -> ")[-1].strip()
3328
+ path = self._normalize_file_path(payload)
3329
+ if path.endswith("/"):
3330
+ continue
3331
+ if path and not self._should_ignore_path(path):
3332
+ modified.add(path)
3333
+ except Exception:
3334
+ # No upstream or command failed - fallback to status-only
3335
+ pass
3336
+
3337
+ return list(modified)
3338
+
3339
+ @staticmethod
3340
+ def _parse_git_status_path(line: str) -> str:
3341
+ """Extract file path from git status --porcelain, handling renames."""
3342
+ p = line[3:].strip()
3343
+ if " -> " in p:
3344
+ p = p.split(" -> ")[-1].strip()
3345
+ if p.startswith('"') and p.endswith('"'):
3346
+ p = p[1:-1]
3347
+ try:
3348
+ p = p.encode("utf-8").decode("unicode_escape")
3349
+ except Exception:
3350
+ pass
3351
+ return p
3352
+
3353
+ @staticmethod
3354
+ def _should_ignore_path(path: str) -> bool:
3355
+ """Return True for paths the watcher should skip."""
3356
+ norm = path.replace("\\", "/")
3357
+ if "/.git/" in norm or norm.startswith(".git/"):
3358
+ return True
3359
+ # Ignore runtime instance folders: they are environment artifacts and
3360
+ # should not produce collaborative file locks.
3361
+ if (
3362
+ norm == "instance"
3363
+ or norm.startswith("instance/")
3364
+ or norm.endswith("/instance")
3365
+ or "/instance/" in norm
3366
+ ):
3367
+ return True
3368
+ # Ignore collab metadata files that the watcher itself creates
3369
+ if ".startup_summary.json" in norm or ".shutdown_complete" in norm:
3370
+ return True
3371
+ # Do not ignore other runtime-relative project paths here.
3372
+ return False
3373
+
3374
+ @staticmethod
3375
+ def _read_pid() -> Optional[int]:
3376
+ """Read daemon PID from the PID file.
3377
+
3378
+ Supports two formats for backward compatibility:
3379
+ - Plain integer stored in `.daemon.pid` (legacy)
3380
+ - JSON object stored in `.daemon.pid` containing a numeric "pid" field
3381
+
3382
+ Returns the pid as an int, or None if the file is missing or malformed.
3383
+ """
3384
+ if not os.path.exists(PID_FILE):
3385
+ return None
3386
+ try:
3387
+ with open(PID_FILE, "r", encoding="utf-8") as f:
3388
+ raw = f.read().strip()
3389
+ if not raw:
3390
+ return None
3391
+ # Try JSON first (richer metadata), fall back to int
3392
+ if raw.startswith("{"):
3393
+ try:
3394
+ obj = json.loads(raw)
3395
+ pid = obj.get("pid")
3396
+ if isinstance(pid, int):
3397
+ return pid
3398
+ except Exception:
3399
+ logger.debug("PID file contains invalid JSON: %s", raw)
3400
+ return None
3401
+ # Fallback: plain integer
3402
+ return int(raw)
3403
+ except ValueError:
3404
+ logger.debug("PID file does not contain an integer: %s", PID_FILE)
3405
+ return None
3406
+ except OSError as e:
3407
+ logger.debug("Could not read PID file %s: %s", PID_FILE, e)
3408
+ return None
3409
+
3410
+ @staticmethod
3411
+ def _get_cmdline_for_pid(pid: int) -> Optional[str]:
3412
+ """Return the command-line string for a process, or None if unavailable.
3413
+
3414
+ Uses psutil when available. If psutil is not installed or access fails, returns
3415
+ None which indicates we couldn't verify the cmdline.
3416
+ """
3417
+ # Prefer psutil when available (robust cross-platform). If unavailable,
3418
+ # fall back to lightweight platform-specific methods (procfs on Unix,
3419
+ # WMIC/tasklist on Windows) so we can verify PID command-lines even
3420
+ # in minimal environments.
3421
+ try:
3422
+ import psutil
3423
+
3424
+ try:
3425
+ p = psutil.Process(pid)
3426
+ cmd = p.cmdline()
3427
+ if isinstance(cmd, (list, tuple)):
3428
+ return " ".join(cmd)
3429
+ return str(cmd)
3430
+ except Exception:
3431
+ pass
3432
+ except Exception:
3433
+ # psutil not installed — continue to platform fallbacks
3434
+ pass
3435
+
3436
+ # Platform-specific fallbacks
3437
+ if sys.platform == "win32":
3438
+ # Prefer modern PowerShell CIM query when WMIC is not present.
3439
+ # Only call WMIC if it is actually available on PATH to avoid
3440
+ # repeated FileNotFoundError/WinError logs on newer Windows.
3441
+ try:
3442
+ if shutil.which("wmic"):
3443
+ try:
3444
+ out = subprocess.check_output(
3445
+ [
3446
+ "wmic",
3447
+ "process",
3448
+ "where",
3449
+ f"ProcessId={pid}",
3450
+ "get",
3451
+ "CommandLine",
3452
+ ],
3453
+ stderr=subprocess.DEVNULL,
3454
+ text=True,
3455
+ )
3456
+ lines = [
3457
+ line.strip() for line in out.splitlines() if line.strip()
3458
+ ]
3459
+ if len(lines) >= 2:
3460
+ return " ".join(lines[1:]).strip()
3461
+ except Exception:
3462
+ # If WMIC fails, continue to PowerShell fallback
3463
+ logger.debug("WMIC command-line query failed for PID %d", pid)
3464
+ # PowerShell CIM fallback (works on recent Windows)
3465
+ try:
3466
+ cmd_str = (
3467
+ "(Get-CimInstance Win32_Process -Filter "
3468
+ '"ProcessId=%d").CommandLine'
3469
+ ) % pid
3470
+ ps_cmd = ("-NoProfile", "-Command", cmd_str)
3471
+ out = subprocess.check_output(
3472
+ ["powershell", *ps_cmd], stderr=subprocess.DEVNULL, text=True
3473
+ )
3474
+ out = out.strip()
3475
+ if out:
3476
+ return out
3477
+ except Exception:
3478
+ logger.debug("PowerShell command-line query failed for PID %d", pid)
3479
+ except Exception:
3480
+ # Defensive: if shutil or other checks fail, give up gracefully
3481
+ logger.debug("Windows cmdline fallback failed for PID %d", pid)
3482
+ # As a last resort on Windows we cannot reliably get a cmdline
3483
+ return None
3484
+ else:
3485
+ # Unix-like systems: read /proc/<pid>/cmdline if available
3486
+ proc_path = f"/proc/{pid}/cmdline"
3487
+ try:
3488
+ if os.path.exists(proc_path):
3489
+ with open(proc_path, "rb") as fh:
3490
+ data = fh.read()
3491
+ if not data:
3492
+ return None
3493
+ # cmdline entries are null-separated
3494
+ raw_parts = data.split(b"\x00")
3495
+ parts = [
3496
+ part.decode(errors="replace") for part in raw_parts if part
3497
+ ]
3498
+ return " ".join(parts)
3499
+ except Exception:
3500
+ pass
3501
+ return None
3502
+
3503
+ @staticmethod
3504
+ def _cmdline_matches_watcher(cmdline: str) -> bool:
3505
+ """Heuristic: return True if the command-line looks like our watcher.
3506
+
3507
+ Matches supported watcher entrypoints, including legacy path-based invocations
3508
+ and the current module/CLI forms.
3509
+ """
3510
+ if not cmdline:
3511
+ return False
3512
+ s = cmdline.lower()
3513
+ return (
3514
+ "live_locks_watcher" in s
3515
+ or ("lock_client.py" in s and "watch" in s)
3516
+ or ("collab.core.lock_client" in s and "watch" in s)
3517
+ or ("collab" in s and "watch" in s)
3518
+ )
3519
+
3520
+ @staticmethod
3521
+ def _extract_pid_file_from_cmdline(cmdline: str) -> Optional[str]:
3522
+ """Extract a --pid-file argument from cmdline when present.
3523
+
3524
+ Returns the parsed value as-is (possibly quoted), or None when missing.
3525
+ """
3526
+ if not cmdline:
3527
+ return None
3528
+ # Match either:
3529
+ # --pid-file VALUE
3530
+ # --pid-file="VALUE"
3531
+ # --pid-file='VALUE'
3532
+ m = re.search(r"--pid-file(?:=|\s+)(\"[^\"]+\"|'[^']+'|\S+)", cmdline)
3533
+ if not m:
3534
+ return None
3535
+ raw = m.group(1).strip()
3536
+ if (raw.startswith('"') and raw.endswith('"')) or (
3537
+ raw.startswith("'") and raw.endswith("'")
3538
+ ):
3539
+ raw = raw[1:-1]
3540
+ return raw
3541
+
3542
+ def _cmdline_matches_current_pid_namespace(self, cmdline: str) -> bool:
3543
+ """Return True when a watcher cmdline belongs to this client's PID file scope.
3544
+
3545
+ Rules:
3546
+ - If cmdline contains --pid-file, it must match current PID_FILE exactly.
3547
+ - If cmdline has no --pid-file (legacy watcher), only accept it for the
3548
+ default production PID file while *not* in test mode.
3549
+ """
3550
+ parsed = self._extract_pid_file_from_cmdline(cmdline)
3551
+ current = os.path.abspath(PID_FILE)
3552
+ default_pid = os.path.abspath(os.path.join(_COLLAB_ROOT, ".daemon.pid"))
3553
+ if parsed:
3554
+ try:
3555
+ return os.path.abspath(parsed) == current
3556
+ except Exception:
3557
+ return False
3558
+ # Legacy watcher without explicit namespace tag.
3559
+ if _is_test_mode():
3560
+ return False
3561
+ return current == default_pid
3562
+
3563
+ @staticmethod
3564
+ def _write_pid(
3565
+ pid: int, parent_pid: Optional[int] = None, token: Optional[str] = None
3566
+ ) -> None:
3567
+ """Write daemon PID metadata to the PID file as JSON.
3568
+
3569
+ Historically this file contained a plain integer. Newer clients write a small
3570
+ JSON object with fields useful for diagnostics. The reader already supports both
3571
+ formats for backward compatibility.
3572
+ """
3573
+ meta = {
3574
+ "pid": int(pid),
3575
+ # Use _safe_now to accommodate tests that monkeypatch the module
3576
+ # level `datetime` symbol. Ensure the stored time is in UTC.
3577
+ "started_at": _safe_now().astimezone(timezone.utc).isoformat(),
3578
+ # Use a human-friendly entrypoint string so other tools can display
3579
+ # a concise description without reconstructing the full cmdline.
3580
+ "entrypoint": "python lock_client.py",
3581
+ "cmdline": " ".join([sys.executable] + sys.argv),
3582
+ "cwd": os.getcwd(),
3583
+ }
3584
+ if parent_pid:
3585
+ meta["parent_pid"] = parent_pid
3586
+ if token:
3587
+ # Small session token to uniquely identify this watcher instance
3588
+ meta["token"] = str(token)
3589
+
3590
+ try:
3591
+ # Write atomically where possible
3592
+ tmp = PID_FILE + ".tmp"
3593
+ with open(tmp, "w", encoding="utf-8") as f:
3594
+ f.write(json.dumps(meta))
3595
+ f.flush()
3596
+ try:
3597
+ os.fsync(f.fileno())
3598
+ except Exception:
3599
+ pass
3600
+ try:
3601
+ os.replace(tmp, PID_FILE)
3602
+ except Exception:
3603
+ # Fallback to non-atomic write
3604
+ with open(PID_FILE, "w", encoding="utf-8") as f2:
3605
+ f2.write(json.dumps(meta))
3606
+ except OSError as e:
3607
+ logger.warning("Could not write PID file: %s", e)
3608
+
3609
+ @staticmethod
3610
+ def _remove_pid() -> None:
3611
+ """Remove the PID file if it exists.
3612
+
3613
+ Suppressed in COLLAB_TEST_MODE to prevent test processes from accidentally
3614
+ deleting the production watcher's PID file.
3615
+ """
3616
+ if os.getenv("COLLAB_TEST_MODE") == "1":
3617
+ return
3618
+
3619
+ try:
3620
+ if os.path.exists(PID_FILE):
3621
+ os.remove(PID_FILE)
3622
+ except OSError:
3623
+ pass
3624
+
3625
+ @staticmethod
3626
+ def _assign_to_job_object() -> None:
3627
+ """Assign current process to a Job Object that terminates children when parent
3628
+ dies.
3629
+
3630
+ This is a Windows-specific mechanism to ensure the watcher dies with its parent
3631
+ IDE. If the parent process terminates, all processes in the job are
3632
+ automatically killed.
3633
+ """
3634
+ if sys.platform != "win32":
3635
+ return
3636
+
3637
+ try:
3638
+ import ctypes
3639
+ from ctypes import wintypes
3640
+
3641
+ # Windows constants
3642
+ JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE = 0x2000
3643
+ JOB_OBJECT_EXTENDED_LIMIT_INFORMATION = 9
3644
+
3645
+ # Create a job object
3646
+ job_handle = ctypes.windll.kernel32.CreateJobObjectW(None, None)
3647
+ if not job_handle:
3648
+ logger.debug("Failed to create Job Object")
3649
+ return
3650
+
3651
+ # Configure the job to kill processes when the job handle is closed
3652
+ class JOBOBJECT_BASIC_LIMIT_INFORMATION(ctypes.Structure):
3653
+ _fields_ = [
3654
+ ("PerProcessUserTimeLimit", wintypes.LARGE_INTEGER),
3655
+ ("PerJobUserTimeLimit", wintypes.LARGE_INTEGER),
3656
+ ("LimitFlags", wintypes.DWORD),
3657
+ ("MinimumWorkingSetSize", ctypes.c_size_t),
3658
+ ("MaximumWorkingSetSize", ctypes.c_size_t),
3659
+ ("ActiveProcessLimit", wintypes.DWORD),
3660
+ ("Affinity", ctypes.c_void_p),
3661
+ ("PriorityClass", wintypes.DWORD),
3662
+ ("SchedulingClass", wintypes.DWORD),
3663
+ ]
3664
+
3665
+ class IO_COUNTERS(ctypes.Structure):
3666
+ _fields_ = [
3667
+ ("ReadOperationCount", wintypes.ULARGE_INTEGER),
3668
+ ("WriteOperationCount", wintypes.ULARGE_INTEGER),
3669
+ ("OtherOperationCount", wintypes.ULARGE_INTEGER),
3670
+ ("ReadTransferCount", wintypes.ULARGE_INTEGER),
3671
+ ("WriteTransferCount", wintypes.ULARGE_INTEGER),
3672
+ ("OtherTransferCount", wintypes.ULARGE_INTEGER),
3673
+ ]
3674
+
3675
+ class JOBOBJECT_EXTENDED_LIMIT_INFORMATION(ctypes.Structure):
3676
+ _fields_ = [
3677
+ ("BasicLimitInformation", JOBOBJECT_BASIC_LIMIT_INFORMATION),
3678
+ ("IoInfo", IO_COUNTERS),
3679
+ ("ProcessMemoryLimit", ctypes.c_size_t),
3680
+ ("JobMemoryLimit", ctypes.c_size_t),
3681
+ ("PeakProcessMemoryUsed", ctypes.c_size_t),
3682
+ ("PeakJobMemoryUsed", ctypes.c_size_t),
3683
+ ]
3684
+
3685
+ info = JOBOBJECT_EXTENDED_LIMIT_INFORMATION()
3686
+ info.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE
3687
+
3688
+ # Set the job information
3689
+ result = ctypes.windll.kernel32.SetInformationJobObject(
3690
+ job_handle,
3691
+ JOB_OBJECT_EXTENDED_LIMIT_INFORMATION,
3692
+ ctypes.byref(info),
3693
+ ctypes.sizeof(info),
3694
+ )
3695
+
3696
+ if not result:
3697
+ logger.debug("Failed to set Job Object information")
3698
+ ctypes.windll.kernel32.CloseHandle(job_handle)
3699
+ return
3700
+
3701
+ # Assign current process to the job
3702
+ current_process = ctypes.windll.kernel32.GetCurrentProcess()
3703
+ result = ctypes.windll.kernel32.AssignProcessToJobObject(
3704
+ job_handle, current_process
3705
+ )
3706
+
3707
+ if result:
3708
+ logger.info(
3709
+ "Assigned watcher to Job Object for automatic cleanup "
3710
+ "on parent exit"
3711
+ )
3712
+ else:
3713
+ logger.debug(
3714
+ "Failed to assign process to Job Object (may already be in a job)"
3715
+ )
3716
+
3717
+ # Keep the job handle open - it will be closed when the process exits,
3718
+ # triggering termination of all processes in the job
3719
+ except Exception as e:
3720
+ logger.debug("Job Object setup failed (non-critical): %s", e)
3721
+
3722
+ @staticmethod
3723
+ def _is_process_alive(pid: int) -> bool:
3724
+ """Check if a process with the given PID is currently running."""
3725
+ if sys.platform == "win32":
3726
+ # Try psutil first for most accurate status check
3727
+ try:
3728
+ import psutil
3729
+ except ImportError:
3730
+ pass
3731
+ else:
3732
+ try:
3733
+ p = psutil.Process(pid)
3734
+ status = p.status()
3735
+ if status in (psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD):
3736
+ return False
3737
+ return True
3738
+ except psutil.NoSuchProcess:
3739
+ return False
3740
+ except psutil.AccessDenied:
3741
+ return True # exists but we can't query it
3742
+ except Exception as exc:
3743
+ logger.debug("psutil status check failed for PID %s: %s", pid, exc)
3744
+
3745
+ # Win32 API with GetExitCodeProcess to detect zombies
3746
+ try:
3747
+ import ctypes
3748
+
3749
+ # PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
3750
+ process_handle = ctypes.windll.kernel32.OpenProcess(0x1000, False, pid)
3751
+ if process_handle:
3752
+ try:
3753
+ exit_code = ctypes.c_ulong(0)
3754
+ result = ctypes.windll.kernel32.GetExitCodeProcess(
3755
+ process_handle, ctypes.byref(exit_code)
3756
+ )
3757
+ # STILL_ACTIVE = 259
3758
+ if result and exit_code.value != 259:
3759
+ return False # Process has exited
3760
+ return True
3761
+ finally:
3762
+ ctypes.windll.kernel32.CloseHandle(process_handle)
3763
+ else:
3764
+ # Access denied (5) often means the process exists but
3765
+ # is a high-privileged system process.
3766
+ error = ctypes.windll.kernel32.GetLastError()
3767
+ if error == 5:
3768
+ return True
3769
+ return False
3770
+ except Exception as exc:
3771
+ logger.debug("Win32 API process check failed for PID %s: %s", pid, exc)
3772
+
3773
+ # Fallback: psutil pid_exists only (no status check)
3774
+ try:
3775
+ import psutil
3776
+
3777
+ return bool(psutil.pid_exists(pid))
3778
+ except ImportError:
3779
+ pass
3780
+ except Exception as exc:
3781
+ logger.debug("psutil pid_exists failed for PID %s: %s", pid, exc)
3782
+
3783
+ # Final Fallback: tasklist (slow but usually present)
3784
+ try:
3785
+ tasklist_exe = _resolve_executable_path("tasklist")
3786
+ if not tasklist_exe:
3787
+ return False
3788
+ out = subprocess.check_output(
3789
+ [tasklist_exe, "/FI", f"PID eq {pid}", "/NH"],
3790
+ text=True,
3791
+ creationflags=0x08000000,
3792
+ )
3793
+ return str(pid) in out
3794
+ except Exception as exc:
3795
+ logger.debug("tasklist process check failed for PID %s: %s", pid, exc)
3796
+ return False
3797
+ else:
3798
+ try:
3799
+ os.kill(pid, 0)
3800
+ return True
3801
+ except (ProcessLookupError, OSError):
3802
+ return False
3803
+
3804
+ def _discover_running_watchers(self) -> List[int]:
3805
+ """Discover running watcher PIDs that appear to belong to this workspace.
3806
+
3807
+ Tries psutil first for speed, then falls back to platform- specific process
3808
+ enumeration. Returns a list of candidate PIDs (may be empty).
3809
+ """
3810
+ candidates: set[int] = set()
3811
+
3812
+ # Fast path: psutil if available
3813
+ try:
3814
+ import psutil
3815
+
3816
+ for p in psutil.process_iter(attrs=("pid", "cmdline")):
3817
+ try:
3818
+ pid = int(p.info.get("pid") or 0)
3819
+ if pid == os.getpid():
3820
+ continue
3821
+ cmdline = p.info.get("cmdline")
3822
+ if not cmdline:
3823
+ continue
3824
+ cmd_str = (
3825
+ " ".join(cmdline)
3826
+ if isinstance(cmdline, (list, tuple))
3827
+ else str(cmdline)
3828
+ )
3829
+ if self._cmdline_matches_watcher(cmd_str):
3830
+ if not self._cmdline_matches_current_pid_namespace(cmd_str):
3831
+ continue
3832
+ # Ensure the process references this repo (cwd or path)
3833
+ s = cmd_str.lower()
3834
+ if (
3835
+ _PROJECT_ROOT.lower() in s
3836
+ or _COLLAB_ROOT.lower() in s
3837
+ or ".collab" in s
3838
+ ):
3839
+ candidates.add(pid)
3840
+ except Exception:
3841
+ continue
3842
+ return sorted(candidates)
3843
+ except Exception as exc:
3844
+ # No psutil — fallback to platform enumeration
3845
+ logger.debug("psutil process_iter unavailable/failed: %s", exc)
3846
+
3847
+ if sys.platform == "win32":
3848
+ tasklist_exe = _resolve_executable_path("tasklist")
3849
+ if not tasklist_exe:
3850
+ logger.debug("tasklist executable not found; skipping Windows fallback")
3851
+ tasklist_exe = None
3852
+ if not tasklist_exe:
3853
+ return sorted(candidates)
3854
+ python_images = ["python.exe", "pythonw.exe", "python3.exe"]
3855
+ for image in python_images:
3856
+ try:
3857
+ result = subprocess.run(
3858
+ [
3859
+ tasklist_exe,
3860
+ "/FI",
3861
+ f"IMAGENAME eq {image}",
3862
+ "/FO",
3863
+ "CSV",
3864
+ "/NH",
3865
+ ],
3866
+ capture_output=True,
3867
+ text=True,
3868
+ creationflags=0x08000000,
3869
+ )
3870
+ for line in (result.stdout or "").splitlines():
3871
+ line = line.strip()
3872
+ if not line:
3873
+ continue
3874
+ parts = line.strip().strip('"').split('","')
3875
+ if len(parts) >= 2:
3876
+ try:
3877
+ pid = int(parts[1])
3878
+ if pid != os.getpid():
3879
+ candidates.add(pid)
3880
+ except Exception as exc:
3881
+ logger.debug(
3882
+ "Failed parsing tasklist row for image %s: %s",
3883
+ image,
3884
+ exc,
3885
+ )
3886
+ except Exception as exc:
3887
+ logger.debug(
3888
+ "tasklist fallback failed for image %s: %s", image, exc
3889
+ )
3890
+ continue
3891
+ else:
3892
+ try:
3893
+ ps_exe = _resolve_executable_path("ps") or "ps"
3894
+ result = subprocess.run(
3895
+ [ps_exe, "-eo", "pid,cmd"], capture_output=True, text=True
3896
+ )
3897
+ for line in (result.stdout or "").splitlines():
3898
+ line = line.strip()
3899
+ if not line:
3900
+ continue
3901
+ parts = line.split(None, 1)
3902
+ if len(parts) >= 2:
3903
+ try:
3904
+ pid = int(parts[0])
3905
+ if pid != os.getpid():
3906
+ candidates.add(pid)
3907
+ except Exception as exc:
3908
+ logger.debug("Failed parsing ps output row: %s", exc)
3909
+ except Exception as exc:
3910
+ logger.debug("ps fallback failed: %s", exc)
3911
+
3912
+ found: List[int] = []
3913
+ for pid in sorted(candidates):
3914
+ try:
3915
+ cmd = self._get_cmdline_for_pid(pid)
3916
+ if not cmd:
3917
+ continue
3918
+ if not self._cmdline_matches_watcher(cmd):
3919
+ continue
3920
+ if not self._cmdline_matches_current_pid_namespace(cmd):
3921
+ continue
3922
+ s = cmd.lower()
3923
+ if (
3924
+ _PROJECT_ROOT.lower() in s
3925
+ or _COLLAB_ROOT.lower() in s
3926
+ or ".collab" in s
3927
+ ):
3928
+ found.append(pid)
3929
+ except Exception:
3930
+ continue
3931
+ return found
3932
+
3933
+ def _read_pid_file(self) -> Optional[Dict[str, Any]]:
3934
+ """Read the PID file and return the metadata dictionary if available."""
3935
+ if not os.path.exists(PID_FILE):
3936
+ return None
3937
+ try:
3938
+ with open(PID_FILE, "r", encoding="utf-8") as fh:
3939
+ raw = fh.read().strip()
3940
+ if raw.startswith("{"):
3941
+ metadata = json.loads(raw)
3942
+ if isinstance(metadata, dict):
3943
+ return metadata
3944
+ except Exception as exc:
3945
+ logger.debug("Failed reading PID metadata file %s: %s", PID_FILE, exc)
3946
+ return None
3947
+
3948
+ def _terminate_process(self, pid: int) -> None:
3949
+ """Forcefully terminate a process by PID."""
3950
+ if sys.platform == "win32":
3951
+ taskkill_exe = _resolve_executable_path("taskkill")
3952
+ if not taskkill_exe:
3953
+ logger.debug("taskkill not found while terminating PID %s", pid)
3954
+ return
3955
+ subprocess.run(
3956
+ [taskkill_exe, "/F", "/PID", str(pid)],
3957
+ capture_output=True,
3958
+ creationflags=0x08000000,
3959
+ )
3960
+ else:
3961
+ try:
3962
+ # Use getattr or numeric 9 for SIGKILL fallback on Windows
3963
+ sig = getattr(signal, "SIGKILL", 9)
3964
+ os.kill(pid, sig)
3965
+ except ProcessLookupError:
3966
+ pass
3967
+
3968
+ def _get_process_info_local(self, pid: int) -> Tuple[Optional[str], Optional[int]]:
3969
+ """Fetch process name and parent PID via various Windows tools."""
3970
+ if sys.platform != "win32":
3971
+ return None, None
3972
+ # Prefer psutil when available - it's the most reliable cross-platform
3973
+ try:
3974
+ import psutil
3975
+
3976
+ try:
3977
+ p = psutil.Process(pid)
3978
+ name = p.name()
3979
+ ppid = p.ppid()
3980
+ if name and not name.lower().endswith(".exe"):
3981
+ name = name + ".exe"
3982
+ return name, ppid
3983
+ except psutil.NoSuchProcess:
3984
+ return None, None
3985
+ except Exception:
3986
+ # psutil present but failed for this PID; fall through to fallbacks
3987
+ pass
3988
+ except Exception:
3989
+ # psutil not available - continue to platform fallbacks
3990
+ pass
3991
+
3992
+ # If WMIC is available, prefer it for name+PPID. Otherwise fall back
3993
+ # to tasklist for a name-only result.
3994
+ try:
3995
+ wmic_exe = _resolve_executable_path("wmic")
3996
+ if wmic_exe:
3997
+ result = subprocess.run(
3998
+ [
3999
+ wmic_exe,
4000
+ "process",
4001
+ "where",
4002
+ f"ProcessId={pid}",
4003
+ "get",
4004
+ "Name,ParentProcessId",
4005
+ "/value",
4006
+ ],
4007
+ capture_output=True,
4008
+ text=True,
4009
+ creationflags=0x08000000,
4010
+ timeout=5,
4011
+ errors="ignore",
4012
+ )
4013
+ logger.debug(
4014
+ "WMIC result for PID %d: rc=%d stdout=%r stderr=%r",
4015
+ pid,
4016
+ result.returncode,
4017
+ result.stdout[:200] if result.stdout else None,
4018
+ result.stderr[:200] if result.stderr else None,
4019
+ )
4020
+ if result.returncode == 0 and result.stdout:
4021
+ name_match = re.search(r"Name=(\S+)", result.stdout)
4022
+ parent_match = re.search(r"ParentProcessId=(\d+)", result.stdout)
4023
+ logger.debug(
4024
+ "WMIC parse for PID %d: name_match=%s parent_match=%s",
4025
+ pid,
4026
+ name_match.group(0) if name_match else None,
4027
+ parent_match.group(0) if parent_match else None,
4028
+ )
4029
+ if name_match:
4030
+ name = name_match.group(1)
4031
+ parent_id = int(parent_match.group(1)) if parent_match else None
4032
+ if not name.lower().endswith(".exe"):
4033
+ name += ".exe"
4034
+ logger.info(
4035
+ "WMIC success: PID %d = %s, parent = %s",
4036
+ pid,
4037
+ name,
4038
+ parent_id,
4039
+ )
4040
+ return name, parent_id
4041
+ except Exception as e:
4042
+ logger.debug("WMIC query failed for PID %d: %s", pid, e)
4043
+
4044
+ # Fallback: tasklist for name only
4045
+ try:
4046
+ tasklist_exe = _resolve_executable_path("tasklist")
4047
+ if not tasklist_exe:
4048
+ return None, None
4049
+ args = [tasklist_exe, "/FI", f"PID eq {pid}", "/NH", "/FO", "CSV"]
4050
+ out = (
4051
+ subprocess.check_output(
4052
+ args, stderr=subprocess.DEVNULL, creationflags=0x08000000, timeout=5
4053
+ )
4054
+ .decode("utf-8", errors="ignore")
4055
+ .strip()
4056
+ )
4057
+ # Format: "Image Name","PID","Session Name","Session#","Mem Usage"
4058
+ if out.startswith('"'):
4059
+ parts = [p.strip('"') for p in out.split(",")]
4060
+ if len(parts) >= 2:
4061
+ name = parts[0]
4062
+ return name, None
4063
+ except Exception as e:
4064
+ logger.debug("tasklist query failed for PID %d: %s", pid, e)
4065
+
4066
+ return None, None
4067
+
4068
+ def _get_parent_ide_pid(self) -> Tuple[Optional[int], Optional[str]]:
4069
+ """Identify the IDE or terminal process that owns this session.
4070
+
4071
+ Returns a tuple: (pid, detection_method).
4072
+
4073
+ Detection order (priority):
4074
+ - VSCODE_PID env var -> method = "vscode_pid"
4075
+ - PYCHARM_HOSTED env -> method = "pycharm_hosted"
4076
+ - Process-tree detection (Code.exe / PyCharm) -> method = "process_tree"
4077
+ - Simple parent-name walk -> method = "simple_walk"
4078
+ - Fallback to immediate parent -> method = "immediate_parent"
4079
+ - Unknown -> (None, "unknown")
4080
+ """
4081
+ # Priority 1: VSCODE_PID environment variable (most reliable)
4082
+ vspid = os.getenv("VSCODE_PID")
4083
+ logger.debug("VSCODE_PID env var: %s", vspid)
4084
+ if vspid and vspid.isdigit():
4085
+ vspid_int = int(vspid)
4086
+ if self._is_process_alive(vspid_int):
4087
+ logger.info("Detected VSCode via VSCODE_PID: %d", vspid_int)
4088
+ return vspid_int, "vscode_pid"
4089
+ else:
4090
+ logger.debug("VSCODE_PID %d is not alive", vspid_int)
4091
+
4092
+ if os.getenv("PYCHARM_HOSTED") == "1":
4093
+ hosted_ppid = os.getppid()
4094
+ if self._is_process_alive(hosted_ppid):
4095
+ logger.debug("Tying to PyCharm hosted session (PID: %d)", hosted_ppid)
4096
+ return hosted_ppid, "pycharm_hosted"
4097
+
4098
+ # Priority 2: Walk up process tree looking for IDE window process
4099
+ # For VSCode: walk past conhost/node to find the actual Code.exe
4100
+ try:
4101
+ current_pid: Optional[int] = os.getpid()
4102
+ visited: set[int] = set()
4103
+ code_exe_pid: Optional[int] = None
4104
+ process_chain = [] # For debugging
4105
+
4106
+ logger.debug("Walking process tree starting from PID: %d", current_pid)
4107
+ while current_pid and current_pid not in visited:
4108
+ visited.add(current_pid)
4109
+ active_pid = current_pid
4110
+ if active_pid is None:
4111
+ break
4112
+ name, ppid = self._get_process_info_local(active_pid)
4113
+
4114
+ if not name:
4115
+ logger.debug("PID %d: no name found, stopping walk", current_pid)
4116
+ break
4117
+
4118
+ name_lower = name.lower()
4119
+ process_chain.append(f"{name}({current_pid})")
4120
+ logger.debug("PID %d: %s (parent: %s)", current_pid, name, ppid)
4121
+
4122
+ # Track the outermost terminal we find
4123
+ if name_lower in (
4124
+ "windowsterminal.exe",
4125
+ "conhost.exe",
4126
+ "cmd.exe",
4127
+ "powershell.exe",
4128
+ ):
4129
+ pass
4130
+
4131
+ # Found Code.exe - this is the actual IDE window
4132
+ # Use the FIRST one found (closest to terminal), not the deepest one
4133
+ if (
4134
+ name_lower
4135
+ in ("code.exe", "antigravity.exe", "cursor.exe", "vscodium.exe")
4136
+ and code_exe_pid is None
4137
+ ):
4138
+ code_exe_pid = current_pid
4139
+ logger.debug(
4140
+ "Found outermost Code.exe in process tree (PID: %d)",
4141
+ current_pid,
4142
+ )
4143
+ # Don't break - continue walking to find if there's a closer one
4144
+
4145
+ # Found node.exe extension host - walk up to find Code.exe
4146
+ if name_lower == "node.exe" and ppid:
4147
+ next_name, next_ppid = self._get_process_info_local(ppid)
4148
+ if next_name and any(
4149
+ x in next_name.lower()
4150
+ for x in ("code", "antigravity", "cursor", "vscodium")
4151
+ ):
4152
+ logger.debug(
4153
+ "Detected VSCode-like IDE via node.exe parent (PID: %d)",
4154
+ ppid,
4155
+ )
4156
+ return ppid, "node_parent"
4157
+
4158
+ # Found PyCharm
4159
+ if name_lower in (
4160
+ "pycharm64.exe",
4161
+ "pycharm.exe",
4162
+ "idea64.exe",
4163
+ "idea.exe",
4164
+ ):
4165
+ logger.debug("Detected %s (PID: %d)", name, current_pid)
4166
+ return current_pid, "pycharm_process"
4167
+
4168
+ if not ppid or ppid == current_pid:
4169
+ break
4170
+ current_pid = ppid
4171
+
4172
+ logger.debug("Process chain: %s", " -> ".join(process_chain))
4173
+
4174
+ # Return Code.exe if we found it (it's the outermost IDE window)
4175
+ if code_exe_pid:
4176
+ logger.debug("Tying to VSCode Code.exe (PID: %d)", code_exe_pid)
4177
+ return code_exe_pid, "process_tree"
4178
+
4179
+ except Exception as e:
4180
+ logger.debug("Process tree walk failed: %s", e)
4181
+
4182
+ # Fallback: Simple parent chain walking using os.getppid()
4183
+ # This works when WMIC fails in subprocess contexts
4184
+ try:
4185
+ logger.debug("Using simple parent chain fallback")
4186
+ current = os.getpid()
4187
+ visited = set()
4188
+ while current and current not in visited and len(visited) < 20:
4189
+ visited.add(current)
4190
+ try:
4191
+ parent = os.getppid()
4192
+ if parent <= 0 or parent == current:
4193
+ break
4194
+ # Get process name using tasklist (simpler than WMIC)
4195
+ name = self._get_process_name_via_tasklist(parent)
4196
+ logger.info(
4197
+ "Simple walk: PID %d -> parent %d (%s)",
4198
+ current,
4199
+ parent,
4200
+ name or "unknown",
4201
+ )
4202
+ if name:
4203
+ name_lower = name.lower()
4204
+ if name_lower in (
4205
+ "code.exe",
4206
+ "antigravity.exe",
4207
+ "cursor.exe",
4208
+ "vscodium.exe",
4209
+ ):
4210
+ logger.info(
4211
+ "Found VSCode-like IDE %s via simple walk (PID: %d)",
4212
+ name,
4213
+ parent,
4214
+ )
4215
+ return parent, "simple_walk"
4216
+ if name_lower in ("pycharm64.exe", "pycharm.exe"):
4217
+ logger.info(
4218
+ "Found PyCharm via simple walk (PID: %d)", parent
4219
+ )
4220
+ return parent, "simple_walk"
4221
+ current = parent
4222
+ except Exception as e:
4223
+ logger.debug("Simple walk error at PID %d: %s", current, e)
4224
+ break
4225
+ except Exception as e:
4226
+ logger.debug("Simple parent walk failed: %s", e)
4227
+
4228
+ # Fallback 2: Return immediate parent if alive (last resort)
4229
+ try:
4230
+ ppid = os.getppid()
4231
+ if ppid > 0 and self._is_process_alive(ppid):
4232
+ logger.info("Falling back to immediate parent (PID: %d)", ppid)
4233
+ return ppid, "immediate_parent"
4234
+ except Exception as e:
4235
+ logger.debug("Immediate parent fallback failed: %s", e)
4236
+
4237
+ logger.warning("Could not determine parent IDE/terminal PID")
4238
+ return None, "unknown"
4239
+
4240
+ def _get_process_name_via_tasklist(self, pid: int) -> Optional[str]:
4241
+ """Get process name using tasklist - simpler and more reliable than WMIC."""
4242
+ try:
4243
+ tasklist_exe = _resolve_executable_path("tasklist")
4244
+ if not tasklist_exe:
4245
+ return None
4246
+ result = subprocess.run(
4247
+ [tasklist_exe, "/FI", f"PID eq {pid}", "/NH", "/FO", "CSV"],
4248
+ capture_output=True,
4249
+ text=True,
4250
+ creationflags=0x08000000,
4251
+ timeout=3,
4252
+ errors="ignore",
4253
+ )
4254
+ if result.returncode == 0 and result.stdout:
4255
+ # Format: "Image Name","PID","Session Name","Session#","Mem Usage"
4256
+ lines = result.stdout.strip().split("\n")
4257
+ for line in lines:
4258
+ if line.startswith('"'):
4259
+ parts = [p.strip('"') for p in line.split(",")]
4260
+ if len(parts) >= 2:
4261
+ return parts[0]
4262
+ except Exception as exc:
4263
+ logger.debug("tasklist name lookup failed for PID %s: %s", pid, exc)
4264
+ return None
4265
+
4266
+
4267
+ if __name__ == "__main__":
4268
+ main()