collab-runtime 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- collab/__init__.py +77 -0
- collab/__main__.py +11 -0
- collab_runtime-0.2.9.dist-info/METADATA +218 -0
- collab_runtime-0.2.9.dist-info/RECORD +82 -0
- collab_runtime-0.2.9.dist-info/WHEEL +5 -0
- collab_runtime-0.2.9.dist-info/entry_points.txt +3 -0
- collab_runtime-0.2.9.dist-info/licenses/LICENSE +21 -0
- collab_runtime-0.2.9.dist-info/top_level.txt +10 -0
- scripts/cleanup.py +395 -0
- scripts/collab_git_hook.py +190 -0
- scripts/format_code.py +594 -0
- scripts/generate_tests.py +560 -0
- scripts/validate_code.py +1397 -0
- src/__init__.py +4 -0
- src/dashboard/index.html +1131 -0
- src/live_locks_watcher.py +1982 -0
- src/lock_client.py +4268 -0
- src/logging_config.py +259 -0
- src/main.py +436 -0
- tests/backend/__init__.py +0 -0
- tests/backend/functional/__init__.py +0 -0
- tests/backend/functional/test_package_imports.py +43 -0
- tests/backend/integration/__init__.py +0 -0
- tests/backend/integration/test_cli_contract_parity.py +220 -0
- tests/backend/performance/__init__.py +0 -0
- tests/backend/reliability/__init__.py +0 -0
- tests/backend/security/__init__.py +0 -0
- tests/backend/unit/live_locks_watcher/__init__.py +5 -0
- tests/backend/unit/live_locks_watcher/_helpers.py +123 -0
- tests/backend/unit/live_locks_watcher/conftest.py +18 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_dashboard.py +188 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_developer.py +56 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_graceful_shutdown.py +459 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_main.py +1925 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_module.py +187 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_multi_session.py +320 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_notify.py +67 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_parsing.py +155 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_process_helpers.py +684 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_processing.py +173 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_prompt_abort.py +71 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_reconcile.py +516 -0
- tests/backend/unit/live_locks_watcher/test_live_locks_watcher_scan.py +296 -0
- tests/backend/unit/lock_client/__init__.py +1 -0
- tests/backend/unit/lock_client/_helpers.py +132 -0
- tests/backend/unit/lock_client/test_lock_client_acquire.py +214 -0
- tests/backend/unit/lock_client/test_lock_client_active.py +104 -0
- tests/backend/unit/lock_client/test_lock_client_api.py +63 -0
- tests/backend/unit/lock_client/test_lock_client_cli.py +682 -0
- tests/backend/unit/lock_client/test_lock_client_daemon.py +3730 -0
- tests/backend/unit/lock_client/test_lock_client_dashboard.py +438 -0
- tests/backend/unit/lock_client/test_lock_client_discover.py +241 -0
- tests/backend/unit/lock_client/test_lock_client_force_release.py +354 -0
- tests/backend/unit/lock_client/test_lock_client_helper_branches.py +1890 -0
- tests/backend/unit/lock_client/test_lock_client_history.py +301 -0
- tests/backend/unit/lock_client/test_lock_client_isolation.py +316 -0
- tests/backend/unit/lock_client/test_lock_client_pid.py +75 -0
- tests/backend/unit/lock_client/test_lock_client_reconcile.py +464 -0
- tests/backend/unit/lock_client/test_lock_client_release.py +77 -0
- tests/backend/unit/lock_client/test_lock_client_shutdown.py +1110 -0
- tests/backend/unit/lock_client/test_lock_client_utils.py +474 -0
- tests/backend/unit/lock_client/test_lock_client_watch.py +866 -0
- tests/backend/unit/scripts/__init__.py +1 -0
- tests/backend/unit/scripts/_helpers.py +42 -0
- tests/backend/unit/scripts/test_cleanup.py +285 -0
- tests/backend/unit/scripts/test_collab_git_hook.py +280 -0
- tests/backend/unit/scripts/test_collab_git_hook_ported.py +50 -0
- tests/backend/unit/scripts/test_format_code.py +368 -0
- tests/backend/unit/scripts/test_format_code_ported.py +177 -0
- tests/backend/unit/scripts/test_generate_tests.py +305 -0
- tests/backend/unit/scripts/test_hook_templates.py +357 -0
- tests/backend/unit/scripts/test_setup_hook_overlay.py +95 -0
- tests/backend/unit/scripts/test_validate_code.py +867 -0
- tests/backend/unit/scripts/test_validate_code_ported.py +237 -0
- tests/backend/unit/test_entrypoints_main_run.py +83 -0
- tests/backend/unit/test_logging_config.py +529 -0
- tests/backend/unit/test_main_watch_pid_file.py +278 -0
- tests/conftest.py +167 -0
- tests/frontend/__init__.py +0 -0
- tests/frontend/jest/__init__.py +0 -0
- tests/frontend/playwright/__init__.py +0 -0
- tests/packaging/test_smoke_install.py +76 -0
src/lock_client.py
ADDED
|
@@ -0,0 +1,4268 @@
|
|
|
1
|
+
"""Supabase-backed collaborative file lock client.
|
|
2
|
+
|
|
3
|
+
Provides atomic lock acquisition, release, and daemon management for preventing merge
|
|
4
|
+
conflicts in multi-developer workflows.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import atexit
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
import shutil
|
|
16
|
+
import signal
|
|
17
|
+
import socket
|
|
18
|
+
import subprocess
|
|
19
|
+
import sys
|
|
20
|
+
import tempfile
|
|
21
|
+
import threading
|
|
22
|
+
import time
|
|
23
|
+
import uuid
|
|
24
|
+
from contextlib import contextmanager
|
|
25
|
+
from datetime import datetime, timedelta, timezone
|
|
26
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
27
|
+
|
|
28
|
+
from dotenv import load_dotenv
|
|
29
|
+
|
|
30
|
+
# CLI entrypoint (collab = "src.lock_client:main" in pyproject.toml).
|
|
31
|
+
# Main orchestration is in src/main.py; import here for backward compatibility.
|
|
32
|
+
from .main import _run_cli, main # noqa: F401
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _safe_now() -> datetime:
|
|
36
|
+
"""Return the current datetime using the (possibly monkeypatched) ``datetime``
|
|
37
|
+
symbol imported into this module.
|
|
38
|
+
|
|
39
|
+
Tests patch ``datetime`` with a fake class/instance and some replacement objects may
|
|
40
|
+
present a ``now`` attribute that behaves oddly when bound. This helper attempts to
|
|
41
|
+
call the patched ``now`` safely and falls back to the real datetime on failure.
|
|
42
|
+
"""
|
|
43
|
+
try:
|
|
44
|
+
return datetime.now()
|
|
45
|
+
except TypeError:
|
|
46
|
+
# If the patched datetime is an instance, try to fetch the class-level
|
|
47
|
+
# attribute and call it as an unbound function (avoids implicit binding)
|
|
48
|
+
try:
|
|
49
|
+
cls = datetime if isinstance(datetime, type) else datetime.__class__
|
|
50
|
+
now_attr = getattr(cls, "now", None)
|
|
51
|
+
if callable(now_attr):
|
|
52
|
+
# Call the class-level now and ensure we return a real datetime
|
|
53
|
+
try:
|
|
54
|
+
res = now_attr()
|
|
55
|
+
except TypeError:
|
|
56
|
+
# If calling now as an unbound function failed, continue to fallback
|
|
57
|
+
res = None
|
|
58
|
+
# Use the real stdlib datetime type for isinstance checks to avoid
|
|
59
|
+
# confusion when the module-level `datetime` has been monkeypatched
|
|
60
|
+
from datetime import datetime as _real_dt
|
|
61
|
+
|
|
62
|
+
if isinstance(res, _real_dt):
|
|
63
|
+
return res
|
|
64
|
+
except Exception:
|
|
65
|
+
pass
|
|
66
|
+
# Last-resort: use the real datetime type from the stdlib
|
|
67
|
+
from datetime import datetime as _real_dt
|
|
68
|
+
|
|
69
|
+
return _real_dt.now()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Logging configuration
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
logger = logging.getLogger("collab.lock_client")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _emit_log_resilient(log: logging.Logger, level: int, msg: str, *args: Any) -> None:
|
|
79
|
+
"""Emit a log record while tolerating interpreter-shutdown handler states.
|
|
80
|
+
|
|
81
|
+
Daemon threads can outlive normal application flow, and by the time they log, some
|
|
82
|
+
handlers may already have closed streams. Python's logging module reports those as
|
|
83
|
+
noisy "Logging error" tracebacks. This helper keeps normal logging behavior for
|
|
84
|
+
healthy handlers, skips closed streams, and suppresses handler-level failures.
|
|
85
|
+
"""
|
|
86
|
+
try:
|
|
87
|
+
if log.disabled or level < log.getEffectiveLevel():
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
record = log.makeRecord(
|
|
91
|
+
log.name,
|
|
92
|
+
level,
|
|
93
|
+
__file__,
|
|
94
|
+
0,
|
|
95
|
+
msg,
|
|
96
|
+
args,
|
|
97
|
+
None,
|
|
98
|
+
None,
|
|
99
|
+
None,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
current: Optional[logging.Logger] = log
|
|
103
|
+
emitted = False
|
|
104
|
+
while current is not None:
|
|
105
|
+
for handler in current.handlers:
|
|
106
|
+
try:
|
|
107
|
+
if record.levelno < handler.level:
|
|
108
|
+
continue
|
|
109
|
+
if not handler.filter(record):
|
|
110
|
+
continue
|
|
111
|
+
stream = getattr(handler, "stream", None)
|
|
112
|
+
if stream is not None and getattr(stream, "closed", False):
|
|
113
|
+
continue
|
|
114
|
+
handler.handle(record)
|
|
115
|
+
emitted = True
|
|
116
|
+
except Exception:
|
|
117
|
+
# Best-effort: never let late-shutdown logging fail noisily.
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
if not current.propagate:
|
|
121
|
+
break
|
|
122
|
+
current = current.parent
|
|
123
|
+
|
|
124
|
+
if not emitted:
|
|
125
|
+
# Last fallback for debugging sessions with no available handlers.
|
|
126
|
+
try:
|
|
127
|
+
if sys.stderr is not None and not sys.stderr.closed:
|
|
128
|
+
sys.stderr.write(f"{record.levelname}: {record.getMessage()}\n")
|
|
129
|
+
except Exception:
|
|
130
|
+
pass
|
|
131
|
+
except Exception:
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
# Environment
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
_THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _read_clean_env_path(name: str) -> Optional[str]:
|
|
142
|
+
"""Return a sanitized path-like environment override.
|
|
143
|
+
|
|
144
|
+
Treat empty values and comment-only values as unset. Inline comments are stripped so
|
|
145
|
+
values like ``X=path # comment`` remain usable.
|
|
146
|
+
"""
|
|
147
|
+
raw = os.getenv(name)
|
|
148
|
+
if raw is None:
|
|
149
|
+
return None
|
|
150
|
+
cleaned = raw.strip()
|
|
151
|
+
if not cleaned:
|
|
152
|
+
return None
|
|
153
|
+
if "#" in cleaned:
|
|
154
|
+
cleaned = cleaned.split("#", 1)[0].strip()
|
|
155
|
+
if not cleaned or cleaned.startswith("#"):
|
|
156
|
+
return None
|
|
157
|
+
return cleaned
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _resolve_project_root() -> str:
|
|
161
|
+
"""Resolve project root for runtime operations.
|
|
162
|
+
|
|
163
|
+
Priority:
|
|
164
|
+
1) COLLAB_PROJECT_ROOT env var
|
|
165
|
+
2) Current working directory
|
|
166
|
+
"""
|
|
167
|
+
override = _read_clean_env_path("COLLAB_PROJECT_ROOT")
|
|
168
|
+
if override:
|
|
169
|
+
return os.path.abspath(override)
|
|
170
|
+
return os.path.abspath(os.getcwd())
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# Resolve project root first — used by state-dir helpers below
|
|
174
|
+
_PROJECT_ROOT = _resolve_project_root()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _is_test_mode() -> bool:
|
|
178
|
+
"""Return True when running under pytest/test harness context."""
|
|
179
|
+
return (
|
|
180
|
+
os.getenv("COLLAB_TEST_MODE") == "1"
|
|
181
|
+
or os.getenv("TESTING") == "1"
|
|
182
|
+
or "PYTEST_CURRENT_TEST" in os.environ
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _get_state_dir() -> str:
|
|
187
|
+
"""Return a per-workspace state directory outside the repo for non-essential runtime
|
|
188
|
+
markers (heartbeat, shutdown marker, startup summary). This avoids creating
|
|
189
|
+
transient files inside the workspace tree.
|
|
190
|
+
|
|
191
|
+
The location can be overridden with the `COLLAB_STATE_DIR` env var for testing or
|
|
192
|
+
custom setups.
|
|
193
|
+
"""
|
|
194
|
+
state_dir = _read_clean_env_path("COLLAB_STATE_DIR")
|
|
195
|
+
if state_dir:
|
|
196
|
+
try:
|
|
197
|
+
os.makedirs(state_dir, exist_ok=True)
|
|
198
|
+
except Exception:
|
|
199
|
+
pass
|
|
200
|
+
return os.path.abspath(str(state_dir))
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
import hashlib as _hashlib
|
|
204
|
+
import tempfile as _tempfile
|
|
205
|
+
|
|
206
|
+
# Normalize slashes and case for cross-runtime consistency (CLI vs Extension)
|
|
207
|
+
norm_root = _PROJECT_ROOT.replace("/", "\\").lower().rstrip("\\")
|
|
208
|
+
h = _hashlib.sha1(norm_root.encode("utf-8"), usedforsecurity=False).hexdigest()[
|
|
209
|
+
:8
|
|
210
|
+
]
|
|
211
|
+
base_tmp = _tempfile.gettempdir()
|
|
212
|
+
# Use a collab-specific namespace for runtime state dirs.
|
|
213
|
+
current_prefix = "collab_runtime"
|
|
214
|
+
if _is_test_mode():
|
|
215
|
+
sd = os.path.join(base_tmp, f"{current_prefix}_{h}_test_{os.getpid()}")
|
|
216
|
+
else:
|
|
217
|
+
sd = os.path.join(base_tmp, f"{current_prefix}_{h}")
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
os.makedirs(sd, exist_ok=True)
|
|
221
|
+
except Exception:
|
|
222
|
+
pass
|
|
223
|
+
return os.path.abspath(str(sd))
|
|
224
|
+
except Exception:
|
|
225
|
+
# Fallback: prefer the configured runtime root if available (keeps
|
|
226
|
+
# backwards-compatible test and import-time semantics), otherwise
|
|
227
|
+
# fall back to the project root or current working directory.
|
|
228
|
+
try:
|
|
229
|
+
fallback = globals().get("_COLLAB_ROOT")
|
|
230
|
+
if fallback:
|
|
231
|
+
return os.path.abspath(str(fallback))
|
|
232
|
+
except Exception:
|
|
233
|
+
pass
|
|
234
|
+
try:
|
|
235
|
+
return os.path.abspath(_PROJECT_ROOT)
|
|
236
|
+
except Exception:
|
|
237
|
+
return os.getcwd()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _resolve_runtime_root(project_root: str) -> str:
|
|
241
|
+
"""Resolve persistent runtime root for the current project.
|
|
242
|
+
|
|
243
|
+
Preference order:
|
|
244
|
+
1. `COLLAB_HOME` env override
|
|
245
|
+
2. Fallback to project root
|
|
246
|
+
"""
|
|
247
|
+
home_override = _read_clean_env_path("COLLAB_HOME")
|
|
248
|
+
if home_override:
|
|
249
|
+
return os.path.abspath(home_override)
|
|
250
|
+
|
|
251
|
+
# Fallback to state dir for backwards compatibility in tests/custom setups
|
|
252
|
+
state_override = _read_clean_env_path("COLLAB_STATE_DIR")
|
|
253
|
+
if state_override:
|
|
254
|
+
return os.path.abspath(state_override)
|
|
255
|
+
|
|
256
|
+
return project_root
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
_COLLAB_ROOT = _resolve_runtime_root(_PROJECT_ROOT)
|
|
260
|
+
_RESOURCE_ROOT = _THIS_DIR
|
|
261
|
+
os.makedirs(_COLLAB_ROOT, exist_ok=True)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _state_path(name: str) -> str:
|
|
265
|
+
# Ensure we use the normalized state directory
|
|
266
|
+
return os.path.join(_get_state_dir(), name)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _resolve_executable_path(name: str) -> Optional[str]:
|
|
270
|
+
"""Return an absolute executable path from PATH.
|
|
271
|
+
|
|
272
|
+
In explicit test mode only, fall back to the command name so unit tests can
|
|
273
|
+
monkeypatch subprocess calls without depending on host PATH contents.
|
|
274
|
+
|
|
275
|
+
Note: On Windows/Linux platform mismatches (e.g., running tests on Linux
|
|
276
|
+
that test Windows executables), shutil.which() may fail trying to check
|
|
277
|
+
Windows-specific APIs. We catch and gracefully degrade in that case.
|
|
278
|
+
"""
|
|
279
|
+
try:
|
|
280
|
+
resolved = shutil.which(name)
|
|
281
|
+
except (AttributeError, OSError, ValueError):
|
|
282
|
+
# Platform mismatch (e.g., testing Windows code on Linux).
|
|
283
|
+
# shutil.which() tried to call _winapi functions that don't exist.
|
|
284
|
+
# Fall back as if the executable wasn't found.
|
|
285
|
+
resolved = None
|
|
286
|
+
|
|
287
|
+
if not resolved:
|
|
288
|
+
if _is_test_mode():
|
|
289
|
+
return name
|
|
290
|
+
return None
|
|
291
|
+
return os.path.abspath(resolved)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
# Load .env from the project root (never modify .env)
|
|
295
|
+
load_dotenv(os.path.join(_PROJECT_ROOT, ".env"))
|
|
296
|
+
|
|
297
|
+
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
|
298
|
+
SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY")
|
|
299
|
+
SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
|
|
300
|
+
LOCK_STRICT = os.getenv("LOCK_STRICT", "0") == "1"
|
|
301
|
+
|
|
302
|
+
# Expiry semantics: this project enforces NO automatic expiry. Locks persist
|
|
303
|
+
# until released explicitly. The DB RPC ignores time-based expiry; the
|
|
304
|
+
# expires_at column is kept for audit but is not used for automatic
|
|
305
|
+
# replacement. Clients do not send an expires_at value.
|
|
306
|
+
|
|
307
|
+
# Developer id prefixes treated as ephemeral (do not persist locks to the DB).
|
|
308
|
+
# Enforced in code (not configurable via .env) to avoid accidental skips.
|
|
309
|
+
EPHEMERAL_PREFIXES = ["test_dev", "ci"]
|
|
310
|
+
|
|
311
|
+
# (Intentionally no repo-level toggle) Do not expose a runtime flag to
|
|
312
|
+
# enable/disable runtime-path locking.
|
|
313
|
+
|
|
314
|
+
# PID file lives at project root unless overridden.
|
|
315
|
+
# Tests can override this via COLLAB_PID_FILE env var to avoid interfering with
|
|
316
|
+
# the live production watcher.
|
|
317
|
+
# PID file location (transient state)
|
|
318
|
+
PID_FILE = os.getenv("COLLAB_PID_FILE") or os.path.join(_get_state_dir(), ".daemon.pid")
|
|
319
|
+
|
|
320
|
+
# Maximum retry attempts for network errors
|
|
321
|
+
MAX_RETRIES = 3
|
|
322
|
+
|
|
323
|
+
# ---------------------------------------------------------------------------
|
|
324
|
+
# Supabase client (lazy import)
|
|
325
|
+
# ---------------------------------------------------------------------------
|
|
326
|
+
_supabase_create_client = None
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _is_installed_package_origin(origin_abs: str) -> bool:
|
|
330
|
+
"""Return True when an import origin points to an installed package location."""
|
|
331
|
+
origin_norm = os.path.normcase(origin_abs)
|
|
332
|
+
return (
|
|
333
|
+
f"{os.sep}site-packages{os.sep}" in origin_norm
|
|
334
|
+
or f"{os.sep}dist-packages{os.sep}" in origin_norm
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _get_create_client():
|
|
339
|
+
"""Lazy-load the supabase create_client function."""
|
|
340
|
+
global _supabase_create_client
|
|
341
|
+
if _supabase_create_client is None:
|
|
342
|
+
# First: if tests or other harnesses have injected a fake module into
|
|
343
|
+
# ``sys.modules['supabase']``, prefer that module. Tests commonly
|
|
344
|
+
# monkeypatch sys.modules rather than relying on import machinery, and
|
|
345
|
+
# failing here causes fragile tests. If the injected module exposes a
|
|
346
|
+
# ``create_client`` symbol it will be used. If the injected module has
|
|
347
|
+
# a __file__ located inside the repository, treat that as accidental
|
|
348
|
+
# local shadowing and fail fast with a clear message.
|
|
349
|
+
supa_mod = sys.modules.get("supabase")
|
|
350
|
+
if supa_mod is not None:
|
|
351
|
+
# Honour any test-level import-time failures: if the import
|
|
352
|
+
# machinery (builtins.__import__) has been monkeypatched to raise
|
|
353
|
+
# ImportError for 'supabase' we should respect that and exit so
|
|
354
|
+
# tests that simulate missing packages behave deterministically.
|
|
355
|
+
try:
|
|
356
|
+
__import__("supabase")
|
|
357
|
+
except ImportError:
|
|
358
|
+
logger.error(
|
|
359
|
+
"supabase-py is not installed (import failed). "
|
|
360
|
+
"Install it with: pip install supabase"
|
|
361
|
+
)
|
|
362
|
+
sys.exit(1)
|
|
363
|
+
|
|
364
|
+
origin = None
|
|
365
|
+
try:
|
|
366
|
+
spec = getattr(supa_mod, "__spec__", None)
|
|
367
|
+
spec_origin = getattr(spec, "origin", None) if spec else None
|
|
368
|
+
origin = spec_origin or getattr(supa_mod, "__file__", None)
|
|
369
|
+
except Exception:
|
|
370
|
+
origin = None
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
if origin:
|
|
374
|
+
origin_abs = os.path.abspath(origin)
|
|
375
|
+
is_repo_shadow = origin_abs.startswith(
|
|
376
|
+
_COLLAB_ROOT
|
|
377
|
+
) and not _is_installed_package_origin(origin_abs)
|
|
378
|
+
if is_repo_shadow:
|
|
379
|
+
logger.error(
|
|
380
|
+
"Detected local module 'supabase' at %s "
|
|
381
|
+
"which shadows the installed package.",
|
|
382
|
+
origin_abs,
|
|
383
|
+
)
|
|
384
|
+
logger.error(
|
|
385
|
+
"Remove or rename this file/folder and re-run "
|
|
386
|
+
"tests / watcher."
|
|
387
|
+
)
|
|
388
|
+
sys.exit(1)
|
|
389
|
+
except Exception:
|
|
390
|
+
# Defensive: any unexpected error inspecting the fake module
|
|
391
|
+
# should not break tests; fall through and attempt to use it.
|
|
392
|
+
pass
|
|
393
|
+
|
|
394
|
+
create_fn = getattr(supa_mod, "create_client", None)
|
|
395
|
+
if create_fn is None:
|
|
396
|
+
logger.error(
|
|
397
|
+
"The 'supabase' module present in sys.modules "
|
|
398
|
+
"does not expose 'create_client'."
|
|
399
|
+
)
|
|
400
|
+
logger.error(
|
|
401
|
+
"If this is a test, ensure your fake module "
|
|
402
|
+
"provides 'create_client'."
|
|
403
|
+
)
|
|
404
|
+
sys.exit(1)
|
|
405
|
+
|
|
406
|
+
_supabase_create_client = create_fn
|
|
407
|
+
return _supabase_create_client
|
|
408
|
+
|
|
409
|
+
# No preloaded module in sys.modules — fall back to importing the
|
|
410
|
+
# real package. If it is missing, fail loudly with a helpful message.
|
|
411
|
+
try:
|
|
412
|
+
# This will call the import machinery and raise ImportError if
|
|
413
|
+
# the package is not available or tests have patched __import__.
|
|
414
|
+
from supabase import create_client as create_fn
|
|
415
|
+
except ImportError:
|
|
416
|
+
logger.error(
|
|
417
|
+
"supabase-py is not installed. Install it with: pip install supabase\n"
|
|
418
|
+
"See .env.example for required environment variables."
|
|
419
|
+
)
|
|
420
|
+
sys.exit(1)
|
|
421
|
+
|
|
422
|
+
# After a successful import, detect if the resolved module originates
|
|
423
|
+
# from the repository (e.g. supabase.py) which would indicate
|
|
424
|
+
# an accidental shadowing of the real package.
|
|
425
|
+
supa_mod = sys.modules.get("supabase")
|
|
426
|
+
spec_origin = None
|
|
427
|
+
if supa_mod is not None:
|
|
428
|
+
spec = getattr(supa_mod, "__spec__", None)
|
|
429
|
+
spec_origin = getattr(spec, "origin", None) if spec else None
|
|
430
|
+
|
|
431
|
+
if supa_mod is not None:
|
|
432
|
+
origin = spec_origin or getattr(supa_mod, "__file__", None)
|
|
433
|
+
else:
|
|
434
|
+
origin = None
|
|
435
|
+
|
|
436
|
+
try:
|
|
437
|
+
if origin:
|
|
438
|
+
origin_abs = os.path.abspath(origin)
|
|
439
|
+
is_repo_shadow = origin_abs.startswith(
|
|
440
|
+
_COLLAB_ROOT
|
|
441
|
+
) and not _is_installed_package_origin(origin_abs)
|
|
442
|
+
if is_repo_shadow:
|
|
443
|
+
logger.error(
|
|
444
|
+
"Detected local module 'supabase' at %s "
|
|
445
|
+
"which shadows the installed package.",
|
|
446
|
+
origin_abs,
|
|
447
|
+
)
|
|
448
|
+
logger.error(
|
|
449
|
+
"Remove or rename this file/folder and re-run tests / watcher."
|
|
450
|
+
)
|
|
451
|
+
sys.exit(1)
|
|
452
|
+
except Exception:
|
|
453
|
+
pass
|
|
454
|
+
|
|
455
|
+
_supabase_create_client = create_fn
|
|
456
|
+
return _supabase_create_client
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
@contextmanager
|
|
460
|
+
def _quiet_console_loggers(names: Optional[List[str]] = None):
|
|
461
|
+
"""Context manager to temporarily silence noisy console loggers while preserving
|
|
462
|
+
`collab` file-based logging. Useful for clean CLI output.
|
|
463
|
+
|
|
464
|
+
- Sets specified logger names to WARNING level.
|
|
465
|
+
- Temporarily disables propagation from the `collab` logger to the root
|
|
466
|
+
console handler so `collab.*` records are still written to `logs/`.
|
|
467
|
+
"""
|
|
468
|
+
if names is None:
|
|
469
|
+
names = ["httpx", "httpcore", "urllib3", "postgrest", "supabase"]
|
|
470
|
+
old_levels: Dict[str, int] = {}
|
|
471
|
+
for n in names:
|
|
472
|
+
lg = logging.getLogger(n)
|
|
473
|
+
old_levels[n] = lg.level
|
|
474
|
+
try:
|
|
475
|
+
lg.setLevel(logging.WARNING)
|
|
476
|
+
except Exception:
|
|
477
|
+
pass
|
|
478
|
+
|
|
479
|
+
collab_logger = logging.getLogger("collab")
|
|
480
|
+
old_propagate = getattr(collab_logger, "propagate", True)
|
|
481
|
+
try:
|
|
482
|
+
# Prevent collab.* logs from propagating to the root console handler
|
|
483
|
+
# while still allowing file handlers attached to the collab logger to
|
|
484
|
+
# record messages.
|
|
485
|
+
collab_logger.propagate = False
|
|
486
|
+
yield
|
|
487
|
+
finally:
|
|
488
|
+
for n, lvl in old_levels.items():
|
|
489
|
+
try:
|
|
490
|
+
logging.getLogger(n).setLevel(lvl)
|
|
491
|
+
except Exception:
|
|
492
|
+
pass
|
|
493
|
+
try:
|
|
494
|
+
collab_logger.propagate = old_propagate
|
|
495
|
+
except Exception:
|
|
496
|
+
pass
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def _validate_credentials() -> None:
|
|
500
|
+
"""Validate that Supabase credentials are present, exit with clear error if not."""
|
|
501
|
+
if not SUPABASE_URL or not SUPABASE_ANON_KEY:
|
|
502
|
+
logger.error(
|
|
503
|
+
"Missing Supabase credentials.\n"
|
|
504
|
+
" SUPABASE_URL=%s\n"
|
|
505
|
+
" SUPABASE_ANON_KEY=%s\n\n"
|
|
506
|
+
"Please copy .env.example to .env at the project root\n"
|
|
507
|
+
"and fill in your Supabase project credentials.\n"
|
|
508
|
+
"See README.md for setup instructions.",
|
|
509
|
+
SUPABASE_URL or "(not set)",
|
|
510
|
+
"(set)" if SUPABASE_ANON_KEY else "(not set)",
|
|
511
|
+
)
|
|
512
|
+
sys.exit(1)
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def _retry_on_network_error(func, *args, **kwargs) -> Any:
|
|
516
|
+
"""Execute func with exponential backoff retry on network errors."""
|
|
517
|
+
last_error = None
|
|
518
|
+
for attempt in range(MAX_RETRIES):
|
|
519
|
+
try:
|
|
520
|
+
return func(*args, **kwargs)
|
|
521
|
+
except Exception as e:
|
|
522
|
+
last_error = e
|
|
523
|
+
err_str = str(e).lower()
|
|
524
|
+
# Only retry on network-related errors
|
|
525
|
+
if any(
|
|
526
|
+
kw in err_str
|
|
527
|
+
for kw in ("timeout", "connection", "network", "unreachable")
|
|
528
|
+
):
|
|
529
|
+
wait = 2**attempt
|
|
530
|
+
logger.debug(
|
|
531
|
+
"Network error (attempt %d/%d), retrying in %ds: %s",
|
|
532
|
+
attempt + 1,
|
|
533
|
+
MAX_RETRIES,
|
|
534
|
+
wait,
|
|
535
|
+
e,
|
|
536
|
+
)
|
|
537
|
+
time.sleep(wait)
|
|
538
|
+
else:
|
|
539
|
+
raise
|
|
540
|
+
# Log the permanent failure with full traceback so operators can diagnose
|
|
541
|
+
# why retries exhausted (e.g. DNS resolution errors).
|
|
542
|
+
logger.exception("Permanent network failure after %d attempts", MAX_RETRIES)
|
|
543
|
+
raise last_error # type: ignore[misc]
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
# ---------------------------------------------------------------------------
|
|
547
|
+
# Supabase Lock Client
|
|
548
|
+
# ---------------------------------------------------------------------------
|
|
549
|
+
class LockClient:
|
|
550
|
+
"""Supabase-backed file lock client.
|
|
551
|
+
|
|
552
|
+
All lock operations use the Supabase REST API with the official Python client. Lock
|
|
553
|
+
acquisition uses the atomic ``acquire_lock`` RPC function defined in ``schema.sql``
|
|
554
|
+
to prevent race conditions.
|
|
555
|
+
"""
|
|
556
|
+
|
|
557
|
+
def __init__(
|
|
558
|
+
self, developer_id: Optional[str] = None, local_only: bool = False
|
|
559
|
+
) -> None:
|
|
560
|
+
from typing import cast
|
|
561
|
+
|
|
562
|
+
self.local_only = local_only
|
|
563
|
+
self.developer_id = (
|
|
564
|
+
developer_id or os.getenv("COLLAB_DEVELOPER_ID") or self._get_git_username()
|
|
565
|
+
)
|
|
566
|
+
self._client: Optional[Any] = None
|
|
567
|
+
self._branch_name: Optional[str] = None
|
|
568
|
+
self._session_token: Optional[str] = None
|
|
569
|
+
self._parent_pid: Optional[int] = None
|
|
570
|
+
self._heartbeat_file: Optional[str] = None
|
|
571
|
+
self._heartbeat_grace_seconds: int = 10
|
|
572
|
+
# One-time soft-skip flag to tolerate a short heartbeat hiccup
|
|
573
|
+
self._heartbeat_soft_skipped: bool = False
|
|
574
|
+
# OS-level parent monitor status (Windows)
|
|
575
|
+
self._parent_monitor_started: bool = False
|
|
576
|
+
self._parent_monitor_handle: Optional[int] = None
|
|
577
|
+
self._parent_monitor_thread: Optional[threading.Thread] = None
|
|
578
|
+
self._is_admin: bool = bool(SUPABASE_SERVICE_ROLE_KEY)
|
|
579
|
+
# Treat certain developer ids as ephemeral (e.g. CI/test accounts) so
|
|
580
|
+
# they do not persist locks to the DB. This list is enforced in-code to
|
|
581
|
+
# avoid relying on environment configuration being correct.
|
|
582
|
+
self._ephemeral_developer_ids: set[str] = set(
|
|
583
|
+
# ephemeral (CI/test prefixes)
|
|
584
|
+
)
|
|
585
|
+
self._is_ephemeral: bool = False
|
|
586
|
+
if self.developer_id:
|
|
587
|
+
try:
|
|
588
|
+
for p in EPHEMERAL_PREFIXES:
|
|
589
|
+
if self.developer_id.startswith(p):
|
|
590
|
+
self._is_ephemeral = True
|
|
591
|
+
break
|
|
592
|
+
except Exception:
|
|
593
|
+
# Defensive: if developer_id is not a str for any reason
|
|
594
|
+
self._is_ephemeral = False
|
|
595
|
+
|
|
596
|
+
if not self.local_only and not getattr(self, "_is_ephemeral", False):
|
|
597
|
+
_validate_credentials()
|
|
598
|
+
key = SUPABASE_SERVICE_ROLE_KEY or SUPABASE_ANON_KEY
|
|
599
|
+
create_client = cast(Any, _get_create_client())
|
|
600
|
+
self._client = cast(Any, create_client(SUPABASE_URL, key))
|
|
601
|
+
|
|
602
|
+
def _normalize_file_path(self, file_path: str) -> str:
|
|
603
|
+
"""Normalize a file path to a project-root relative Unix-style path.
|
|
604
|
+
|
|
605
|
+
This ensures that paths stored in Supabase match the paths produced by "git
|
|
606
|
+
status --porcelain" (which are relative paths with forward slashes).
|
|
607
|
+
"""
|
|
608
|
+
try:
|
|
609
|
+
# If an absolute path was provided, make it relative to project root
|
|
610
|
+
if os.path.isabs(file_path):
|
|
611
|
+
rel = os.path.relpath(file_path, _PROJECT_ROOT)
|
|
612
|
+
else:
|
|
613
|
+
rel = file_path
|
|
614
|
+
# Normalise separators to forward-slash for consistency in the DB
|
|
615
|
+
rel = rel.replace("\\", "/")
|
|
616
|
+
if rel.startswith("./"):
|
|
617
|
+
rel = rel[2:]
|
|
618
|
+
|
|
619
|
+
return rel
|
|
620
|
+
except Exception:
|
|
621
|
+
return file_path.replace("\\", "/")
|
|
622
|
+
|
|
623
|
+
@property
|
|
624
|
+
def is_admin(self) -> bool:
|
|
625
|
+
"""Return True if this client has admin privileges (service role key)."""
|
|
626
|
+
return self._is_admin
|
|
627
|
+
|
|
628
|
+
def _get_session_token(self) -> str:
|
|
629
|
+
"""Return a stable session token for this machine, project and user.
|
|
630
|
+
|
|
631
|
+
Must NEVER fall back to a random value — a random token breaks cross-IDE re-
|
|
632
|
+
adoption because it cannot be reconstructed. If derivation fails for any
|
|
633
|
+
component, use a safe fallback value for that component rather than giving up
|
|
634
|
+
entirely.
|
|
635
|
+
"""
|
|
636
|
+
try:
|
|
637
|
+
dev_id = (
|
|
638
|
+
str(self.developer_id).strip().lower()
|
|
639
|
+
if self.developer_id
|
|
640
|
+
else "unknown"
|
|
641
|
+
)
|
|
642
|
+
except Exception:
|
|
643
|
+
dev_id = "unknown"
|
|
644
|
+
try:
|
|
645
|
+
hostname = socket.gethostname().lower()
|
|
646
|
+
except Exception:
|
|
647
|
+
hostname = "localhost"
|
|
648
|
+
try:
|
|
649
|
+
p_root = os.path.abspath(_PROJECT_ROOT).lower().rstrip("\\/")
|
|
650
|
+
except Exception:
|
|
651
|
+
p_root = _PROJECT_ROOT.lower().rstrip("\\/") if _PROJECT_ROOT else "project"
|
|
652
|
+
|
|
653
|
+
seed = f"{dev_id}:{hostname}:{p_root}"
|
|
654
|
+
return hashlib.sha256(seed.encode()).hexdigest()[:16]
|
|
655
|
+
|
|
656
|
+
def _is_same_machine_token(self, stored_token: str) -> bool:
|
|
657
|
+
"""Return True if stored_token looks like it was generated on this machine.
|
|
658
|
+
|
|
659
|
+
Tries multiple plausible developer ID and path variants to account for
|
|
660
|
+
environment differences between IDEs (e.g. VSCode vs PyCharm terminals may yield
|
|
661
|
+
slightly different git config outputs or working directories).
|
|
662
|
+
"""
|
|
663
|
+
hostname = socket.gethostname().lower()
|
|
664
|
+
p_root = os.path.abspath(_PROJECT_ROOT).lower().rstrip("\\/")
|
|
665
|
+
|
|
666
|
+
# Gather candidate developer IDs to try
|
|
667
|
+
candidates: list[str] = []
|
|
668
|
+
if self.developer_id:
|
|
669
|
+
candidates.append(str(self.developer_id).lower())
|
|
670
|
+
# Also try stripped variants in case of whitespace differences
|
|
671
|
+
candidates.append(str(self.developer_id).strip().lower())
|
|
672
|
+
|
|
673
|
+
# Also try git config user.name directly from the current environment
|
|
674
|
+
try:
|
|
675
|
+
git_name = (
|
|
676
|
+
subprocess.check_output(
|
|
677
|
+
["git", "config", "user.name"],
|
|
678
|
+
stderr=subprocess.DEVNULL,
|
|
679
|
+
)
|
|
680
|
+
.decode()
|
|
681
|
+
.strip()
|
|
682
|
+
.lower()
|
|
683
|
+
)
|
|
684
|
+
if git_name:
|
|
685
|
+
candidates.append(git_name)
|
|
686
|
+
except Exception:
|
|
687
|
+
pass
|
|
688
|
+
|
|
689
|
+
# Also try the system username as fallback
|
|
690
|
+
for env_var in ("USERNAME", "USER", "LOGNAME"):
|
|
691
|
+
val = os.getenv(env_var)
|
|
692
|
+
if val:
|
|
693
|
+
candidates.append(val.lower())
|
|
694
|
+
|
|
695
|
+
# Also try path variants (with/without trailing slash)
|
|
696
|
+
path_variants = [p_root, p_root.rstrip("/\\"), p_root + "/", p_root + "\\"]
|
|
697
|
+
|
|
698
|
+
seen_seeds: set[str] = set()
|
|
699
|
+
for dev_id in set(candidates):
|
|
700
|
+
for p in path_variants:
|
|
701
|
+
seed = f"{dev_id}:{hostname}:{p}"
|
|
702
|
+
if seed in seen_seeds:
|
|
703
|
+
continue
|
|
704
|
+
seen_seeds.add(seed)
|
|
705
|
+
token = hashlib.sha256(seed.encode()).hexdigest()[:16]
|
|
706
|
+
if token == stored_token:
|
|
707
|
+
logger.debug(
|
|
708
|
+
"Token matched same-machine variant: dev_id=%r path=%r",
|
|
709
|
+
dev_id,
|
|
710
|
+
p,
|
|
711
|
+
)
|
|
712
|
+
return True
|
|
713
|
+
return False
|
|
714
|
+
|
|
715
|
+
# ------------------------------------------------------------------
|
|
716
|
+
# Git helpers
|
|
717
|
+
# ------------------------------------------------------------------
|
|
718
|
+
@staticmethod
|
|
719
|
+
def _get_git_username() -> str:
|
|
720
|
+
"""Derive developer identity from git config or environment."""
|
|
721
|
+
try:
|
|
722
|
+
name = (
|
|
723
|
+
subprocess.check_output(
|
|
724
|
+
["git", "config", "user.name"], stderr=subprocess.DEVNULL
|
|
725
|
+
)
|
|
726
|
+
.decode()
|
|
727
|
+
.strip()
|
|
728
|
+
)
|
|
729
|
+
if name:
|
|
730
|
+
return name
|
|
731
|
+
except Exception:
|
|
732
|
+
pass
|
|
733
|
+
return os.getenv("USERNAME") or os.getenv("USER") or "unknown_user"
|
|
734
|
+
|
|
735
|
+
@staticmethod
|
|
736
|
+
def _get_current_branch() -> Optional[str]:
|
|
737
|
+
"""Return the current git branch name, or None."""
|
|
738
|
+
try:
|
|
739
|
+
if sys.platform == "win32":
|
|
740
|
+
return (
|
|
741
|
+
subprocess.check_output(
|
|
742
|
+
["git", "branch", "--show-current"],
|
|
743
|
+
stderr=subprocess.DEVNULL,
|
|
744
|
+
cwd=_PROJECT_ROOT,
|
|
745
|
+
creationflags=0x08000000,
|
|
746
|
+
)
|
|
747
|
+
.decode()
|
|
748
|
+
.strip()
|
|
749
|
+
)
|
|
750
|
+
else:
|
|
751
|
+
return (
|
|
752
|
+
subprocess.check_output(
|
|
753
|
+
["git", "branch", "--show-current"],
|
|
754
|
+
stderr=subprocess.DEVNULL,
|
|
755
|
+
cwd=_PROJECT_ROOT,
|
|
756
|
+
)
|
|
757
|
+
.decode()
|
|
758
|
+
.strip()
|
|
759
|
+
)
|
|
760
|
+
except Exception:
|
|
761
|
+
return None
|
|
762
|
+
|
|
763
|
+
# ------------------------------------------------------------------
|
|
764
|
+
# Response parsing (handles varying supabase-py response shapes)
|
|
765
|
+
# ------------------------------------------------------------------
|
|
766
|
+
@staticmethod
|
|
767
|
+
def _parse_response(res) -> Tuple[Optional[int], Any, Any]:
|
|
768
|
+
"""Normalize supabase-py response into (status, data, error)."""
|
|
769
|
+
status = getattr(res, "status_code", None) or getattr(res, "status", None)
|
|
770
|
+
data = getattr(res, "data", None)
|
|
771
|
+
error = getattr(res, "error", None)
|
|
772
|
+
if isinstance(res, dict):
|
|
773
|
+
status = status or res.get("status") or res.get("status_code")
|
|
774
|
+
data = data if data is not None else res.get("data")
|
|
775
|
+
error = error or res.get("error")
|
|
776
|
+
return (status, data, error)
|
|
777
|
+
|
|
778
|
+
# ------------------------------------------------------------------
|
|
779
|
+
# Remote lock scanning (like pycharm_watcher)
|
|
780
|
+
# ------------------------------------------------------------------
|
|
781
|
+
def _scan_remote_locks(self) -> None:
|
|
782
|
+
"""Fetch all active locks and log those held by this developer.
|
|
783
|
+
|
|
784
|
+
This runs before reconciliation so the user sees [LOCKED] messages for existing
|
|
785
|
+
locks, matching pycharm_watcher behavior.
|
|
786
|
+
"""
|
|
787
|
+
try:
|
|
788
|
+
client = self._client
|
|
789
|
+
assert client is not None
|
|
790
|
+
res = _retry_on_network_error(
|
|
791
|
+
lambda: client.table("file_locks").select("*").execute()
|
|
792
|
+
)
|
|
793
|
+
_, data, _ = self._parse_response(res)
|
|
794
|
+
if not data:
|
|
795
|
+
return
|
|
796
|
+
|
|
797
|
+
for lock in data:
|
|
798
|
+
owner = lock.get("developer_id", "")
|
|
799
|
+
fp = lock.get("file_path", "")
|
|
800
|
+
if not fp:
|
|
801
|
+
continue
|
|
802
|
+
|
|
803
|
+
# Only log locks owned by this developer
|
|
804
|
+
if owner == self.developer_id:
|
|
805
|
+
br = lock.get("branch_name") or "main"
|
|
806
|
+
reason = lock.get("reason") or "Auto-Watch Sync"
|
|
807
|
+
logger.debug(
|
|
808
|
+
"🔒 [LOCKED] %s — @%s (branch: %s, reason: %s)",
|
|
809
|
+
fp,
|
|
810
|
+
owner,
|
|
811
|
+
br,
|
|
812
|
+
reason,
|
|
813
|
+
)
|
|
814
|
+
except Exception as exc:
|
|
815
|
+
logger.debug("Remote lock scan failed: %s", exc)
|
|
816
|
+
|
|
817
|
+
# ------------------------------------------------------------------
|
|
818
|
+
# Public API
|
|
819
|
+
# ------------------------------------------------------------------
|
|
820
|
+
def acquire(
|
|
821
|
+
self,
|
|
822
|
+
file_path: str,
|
|
823
|
+
reason: Optional[str] = None,
|
|
824
|
+
branch_name: Optional[str] = None,
|
|
825
|
+
expires_minutes: Optional[int] = None,
|
|
826
|
+
) -> Tuple[bool, str]:
|
|
827
|
+
"""Acquire a lock on file_path using the atomic RPC function.
|
|
828
|
+
|
|
829
|
+
Returns (success: bool, message: str).
|
|
830
|
+
"""
|
|
831
|
+
# Local validation — accept either project-relative or absolute paths.
|
|
832
|
+
full_path = (
|
|
833
|
+
file_path
|
|
834
|
+
if os.path.isabs(file_path)
|
|
835
|
+
else os.path.join(_PROJECT_ROOT, file_path)
|
|
836
|
+
)
|
|
837
|
+
if not os.path.exists(full_path):
|
|
838
|
+
# Deleted files can still be "in progress" (staged/unstaged delete
|
|
839
|
+
# or committed-but-unpushed delete). Keep them lockable so the
|
|
840
|
+
# dashboard still shows ownership until the lock is explicitly
|
|
841
|
+
# released (for example on push).
|
|
842
|
+
norm = self._normalize_file_path(file_path)
|
|
843
|
+
try:
|
|
844
|
+
in_progress = norm in set(self._get_modified_and_unpushed_files())
|
|
845
|
+
except Exception:
|
|
846
|
+
in_progress = False
|
|
847
|
+
|
|
848
|
+
if not in_progress:
|
|
849
|
+
return False, f"File or directory does not exist locally: {file_path}"
|
|
850
|
+
|
|
851
|
+
logger.info(
|
|
852
|
+
(
|
|
853
|
+
"🔒 [DELETED-PATH] %s — path missing locally but "
|
|
854
|
+
"tracked as in-progress"
|
|
855
|
+
),
|
|
856
|
+
norm,
|
|
857
|
+
)
|
|
858
|
+
|
|
859
|
+
# Locking directories creates noisy, transient dashboard rows
|
|
860
|
+
# (for example runtime instance/ folders). Locks are file-oriented.
|
|
861
|
+
if os.path.isdir(full_path):
|
|
862
|
+
return False, f"Path is a directory and cannot be locked: {file_path}"
|
|
863
|
+
|
|
864
|
+
# Ephemeral developer IDs do not persist locks to the backend
|
|
865
|
+
# (useful for CI/test users). Short-circuit and return a local token.
|
|
866
|
+
if getattr(self, "_is_ephemeral", False):
|
|
867
|
+
token = f"ephemeral-{uuid.uuid4()}"
|
|
868
|
+
logger.info(
|
|
869
|
+
"🔒 [EPHEMERAL] %s (not persisted) — owner=%s",
|
|
870
|
+
file_path,
|
|
871
|
+
self.developer_id,
|
|
872
|
+
)
|
|
873
|
+
return True, token
|
|
874
|
+
|
|
875
|
+
branch = branch_name or self._get_current_branch()
|
|
876
|
+
token = self._get_session_token()
|
|
877
|
+
|
|
878
|
+
# Do not send expires_at: the RPC and DB intentionally ignore
|
|
879
|
+
# time-based expiry. This keeps acquisition atomic while ensuring
|
|
880
|
+
# locks persist until explicitly released.
|
|
881
|
+
# Normalize the stored file_path so the watcher and dashboard see the
|
|
882
|
+
# same canonical (project-relative, forward-slash) path.
|
|
883
|
+
rpc_params = {
|
|
884
|
+
"p_file_path": self._normalize_file_path(file_path),
|
|
885
|
+
"p_developer_id": self.developer_id,
|
|
886
|
+
"p_branch_name": branch,
|
|
887
|
+
"p_reason": reason,
|
|
888
|
+
"p_lock_token": token,
|
|
889
|
+
"p_is_ephemeral": bool(getattr(self, "_is_ephemeral", False)),
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
client = self._client
|
|
893
|
+
assert client is not None, "Supabase client not initialized"
|
|
894
|
+
try:
|
|
895
|
+
res = _retry_on_network_error(
|
|
896
|
+
lambda: client.rpc("acquire_lock", rpc_params).execute()
|
|
897
|
+
)
|
|
898
|
+
except Exception as e:
|
|
899
|
+
return False, f"API Error: {e}"
|
|
900
|
+
|
|
901
|
+
status, data, error = self._parse_response(res)
|
|
902
|
+
|
|
903
|
+
if error:
|
|
904
|
+
msg = (
|
|
905
|
+
error.get("message", str(error))
|
|
906
|
+
if isinstance(error, dict)
|
|
907
|
+
else str(error)
|
|
908
|
+
)
|
|
909
|
+
return False, f"API Error: {msg}"
|
|
910
|
+
|
|
911
|
+
# Parse RPC result
|
|
912
|
+
if isinstance(data, list) and len(data) > 0:
|
|
913
|
+
row = data[0]
|
|
914
|
+
if row.get("status") == "ok":
|
|
915
|
+
logger.info(
|
|
916
|
+
"🔒 [LOCKED] %s — @%s (branch: %s, reason: %s)",
|
|
917
|
+
self._normalize_file_path(file_path),
|
|
918
|
+
self.developer_id,
|
|
919
|
+
branch or "main",
|
|
920
|
+
reason or "No reason",
|
|
921
|
+
)
|
|
922
|
+
return True, token
|
|
923
|
+
if row.get("status") == "conflict":
|
|
924
|
+
owner = row.get("owner", "another developer")
|
|
925
|
+
logger.warning(
|
|
926
|
+
(
|
|
927
|
+
"⚠️ CONFLICT: %s is locked by @%s — your changes may "
|
|
928
|
+
"cause a merge conflict."
|
|
929
|
+
),
|
|
930
|
+
self._normalize_file_path(file_path),
|
|
931
|
+
owner,
|
|
932
|
+
)
|
|
933
|
+
return False, (
|
|
934
|
+
f"⚠ {file_path} is locked by @{owner}. Editing is not recommended."
|
|
935
|
+
)
|
|
936
|
+
|
|
937
|
+
if status in (200, 201):
|
|
938
|
+
logger.info(
|
|
939
|
+
"🔒 [LOCKED] %s — @%s (branch: %s, reason: %s)",
|
|
940
|
+
self._normalize_file_path(file_path),
|
|
941
|
+
self.developer_id,
|
|
942
|
+
branch or "main",
|
|
943
|
+
reason or "No reason",
|
|
944
|
+
)
|
|
945
|
+
return True, token
|
|
946
|
+
|
|
947
|
+
return False, f"Unexpected response: status={status}, data={data}"
|
|
948
|
+
|
|
949
|
+
def release(self, file_path: str) -> Tuple[bool, str]:
|
|
950
|
+
"""Release a lock on file_path owned by this developer.
|
|
951
|
+
|
|
952
|
+
Returns (success: bool, message: str).
|
|
953
|
+
"""
|
|
954
|
+
# If ephemeral, nothing was persisted so there's nothing to delete.
|
|
955
|
+
if getattr(self, "_is_ephemeral", False):
|
|
956
|
+
logger.info(
|
|
957
|
+
"🔓 [EPHEMERAL-RELEASE] %s (no-op for %s)", file_path, self.developer_id
|
|
958
|
+
)
|
|
959
|
+
return True, "ephemeral-released"
|
|
960
|
+
|
|
961
|
+
client = self._client
|
|
962
|
+
assert client is not None, "Supabase client not initialized"
|
|
963
|
+
try:
|
|
964
|
+
norm = self._normalize_file_path(file_path)
|
|
965
|
+
res = _retry_on_network_error(
|
|
966
|
+
lambda: (
|
|
967
|
+
client.table("file_locks")
|
|
968
|
+
.delete()
|
|
969
|
+
.eq("file_path", norm)
|
|
970
|
+
.eq("developer_id", self.developer_id)
|
|
971
|
+
.execute()
|
|
972
|
+
)
|
|
973
|
+
)
|
|
974
|
+
except Exception as e:
|
|
975
|
+
return False, f"API Error: {e}"
|
|
976
|
+
|
|
977
|
+
status, data, error = self._parse_response(res)
|
|
978
|
+
if error:
|
|
979
|
+
return False, f"API Error: {error}"
|
|
980
|
+
if status in (200, 204) or data is not None:
|
|
981
|
+
logger.info(
|
|
982
|
+
"🔓 [RELEASED] %s — lock released", self._normalize_file_path(file_path)
|
|
983
|
+
)
|
|
984
|
+
return True, "released"
|
|
985
|
+
return False, "No lock released (not owner or lock does not exist)"
|
|
986
|
+
|
|
987
|
+
def active(self) -> List[Dict]:
|
|
988
|
+
"""Return all currently active locks."""
|
|
989
|
+
client = self._client
|
|
990
|
+
assert client is not None, "Supabase client not initialized"
|
|
991
|
+
try:
|
|
992
|
+
res = _retry_on_network_error(
|
|
993
|
+
lambda: client.table("file_locks").select("*").execute()
|
|
994
|
+
)
|
|
995
|
+
except Exception as e:
|
|
996
|
+
logger.error("Exception in active() Supabase query: %s", e)
|
|
997
|
+
return []
|
|
998
|
+
_, data, error = self._parse_response(res)
|
|
999
|
+
if error:
|
|
1000
|
+
logger.error("Supabase error in active(): %s", error)
|
|
1001
|
+
return []
|
|
1002
|
+
return data or []
|
|
1003
|
+
|
|
1004
|
+
def get_lock_status(self, file_path: str) -> Dict:
|
|
1005
|
+
"""Return the lock status for a specific file."""
|
|
1006
|
+
client = self._client
|
|
1007
|
+
assert client is not None, "Supabase client not initialized"
|
|
1008
|
+
try:
|
|
1009
|
+
norm = self._normalize_file_path(file_path)
|
|
1010
|
+
res = _retry_on_network_error(
|
|
1011
|
+
lambda: (
|
|
1012
|
+
client.table("file_locks")
|
|
1013
|
+
.select("*")
|
|
1014
|
+
.eq("file_path", norm)
|
|
1015
|
+
.execute()
|
|
1016
|
+
)
|
|
1017
|
+
)
|
|
1018
|
+
except Exception as e:
|
|
1019
|
+
return {"is_locked": False, "error": str(e)}
|
|
1020
|
+
|
|
1021
|
+
_, data, error = self._parse_response(res)
|
|
1022
|
+
if error:
|
|
1023
|
+
return {"is_locked": False, "error": str(error)}
|
|
1024
|
+
|
|
1025
|
+
rows = data or []
|
|
1026
|
+
if not rows:
|
|
1027
|
+
return {"is_locked": False, "can_edit": True}
|
|
1028
|
+
|
|
1029
|
+
lock = rows[0]
|
|
1030
|
+
|
|
1031
|
+
# With server-side expiry disabled, a present row implies an active
|
|
1032
|
+
# lock until it is explicitly released. Do not expose expires_at — it
|
|
1033
|
+
# was removed from the schema and is treated as audit-only historically.
|
|
1034
|
+
return {
|
|
1035
|
+
"is_locked": True,
|
|
1036
|
+
"locked_by": lock.get("developer_id"),
|
|
1037
|
+
"acquired_at": lock.get("acquired_at"),
|
|
1038
|
+
"reason": lock.get("reason"),
|
|
1039
|
+
"can_edit": lock.get("developer_id") == self.developer_id,
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
def release_all(self) -> int:
|
|
1043
|
+
"""Release all locks held by this developer.
|
|
1044
|
+
|
|
1045
|
+
Returns count released.
|
|
1046
|
+
"""
|
|
1047
|
+
locks = self.active()
|
|
1048
|
+
my_locks = [lk for lk in locks if lk.get("developer_id") == self.developer_id]
|
|
1049
|
+
count = 0
|
|
1050
|
+
for lk in my_locks:
|
|
1051
|
+
ok, _ = self.release(lk.get("file_path", ""))
|
|
1052
|
+
if ok:
|
|
1053
|
+
count += 1
|
|
1054
|
+
return count
|
|
1055
|
+
|
|
1056
|
+
def force_release(self, file_path: str) -> Tuple[bool, str]:
|
|
1057
|
+
"""Force-release a lock on file_path.
|
|
1058
|
+
|
|
1059
|
+
Non-admin users can only force-release their own locks. Admin users (with
|
|
1060
|
+
SUPABASE_SERVICE_ROLE_KEY) can force-release any lock.
|
|
1061
|
+
|
|
1062
|
+
Returns (success: bool, message: str).
|
|
1063
|
+
"""
|
|
1064
|
+
if not self._is_admin:
|
|
1065
|
+
# Non-admin: verify the lock belongs to this developer
|
|
1066
|
+
status_info = self.get_lock_status(file_path)
|
|
1067
|
+
if (
|
|
1068
|
+
status_info.get("is_locked")
|
|
1069
|
+
and status_info.get("locked_by") != self.developer_id
|
|
1070
|
+
):
|
|
1071
|
+
owner = status_info.get("locked_by", "another developer")
|
|
1072
|
+
return False, (
|
|
1073
|
+
f"Permission denied: {file_path} is locked by @{owner}. "
|
|
1074
|
+
"Only admins can force-release other developers' locks."
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
client = self._client
|
|
1078
|
+
assert client is not None, "Supabase client not initialized"
|
|
1079
|
+
try:
|
|
1080
|
+
query = client.table("file_locks").delete().eq("file_path", file_path)
|
|
1081
|
+
if not self._is_admin:
|
|
1082
|
+
query = query.eq("developer_id", self.developer_id)
|
|
1083
|
+
res = _retry_on_network_error(lambda: query.execute())
|
|
1084
|
+
except Exception as e:
|
|
1085
|
+
return False, f"API Error: {e}"
|
|
1086
|
+
_, data, error = self._parse_response(res)
|
|
1087
|
+
if error:
|
|
1088
|
+
return False, f"API Error: {error}"
|
|
1089
|
+
if data is not None:
|
|
1090
|
+
return True, "force-released"
|
|
1091
|
+
return False, "No lock removed"
|
|
1092
|
+
|
|
1093
|
+
def force_release_all(self) -> int:
|
|
1094
|
+
"""Force-release all locks (admin only).
|
|
1095
|
+
|
|
1096
|
+
Returns the number of locks released.
|
|
1097
|
+
"""
|
|
1098
|
+
if not self._is_admin:
|
|
1099
|
+
logger.warning(
|
|
1100
|
+
"Attempted force_release_all without admin privileges (dev=%s)",
|
|
1101
|
+
self.developer_id,
|
|
1102
|
+
)
|
|
1103
|
+
return 0
|
|
1104
|
+
|
|
1105
|
+
try:
|
|
1106
|
+
|
|
1107
|
+
# Count existing locks and collect file paths
|
|
1108
|
+
locks = self.active()
|
|
1109
|
+
paths: List[str] = []
|
|
1110
|
+
for lk in locks or []:
|
|
1111
|
+
p = lk.get("file_path")
|
|
1112
|
+
if isinstance(p, str) and p:
|
|
1113
|
+
paths.append(p)
|
|
1114
|
+
count = len(paths)
|
|
1115
|
+
if count == 0:
|
|
1116
|
+
return 0
|
|
1117
|
+
|
|
1118
|
+
client = self._client
|
|
1119
|
+
assert client is not None, "Supabase client not initialized"
|
|
1120
|
+
|
|
1121
|
+
# PostgREST forbids DELETE without a WHERE clause. Delete by
|
|
1122
|
+
# file_path IN (<paths>) in reasonably-sized chunks to avoid URL
|
|
1123
|
+
# length limits for very large sets.
|
|
1124
|
+
def chunks(lst: List[str], n: int):
|
|
1125
|
+
for i in range(0, len(lst), n):
|
|
1126
|
+
yield lst[i : i + n]
|
|
1127
|
+
|
|
1128
|
+
deleted_total = 0
|
|
1129
|
+
for ch in chunks(paths, 200):
|
|
1130
|
+
try:
|
|
1131
|
+
res = _retry_on_network_error(
|
|
1132
|
+
lambda: client.table("file_locks")
|
|
1133
|
+
.delete()
|
|
1134
|
+
.in_("file_path", ch)
|
|
1135
|
+
.execute()
|
|
1136
|
+
)
|
|
1137
|
+
except Exception as e:
|
|
1138
|
+
logger.error("force_release_all chunk delete failed: %s", e)
|
|
1139
|
+
return deleted_total
|
|
1140
|
+
status, data, error = self._parse_response(res)
|
|
1141
|
+
if error:
|
|
1142
|
+
logger.error("force_release_all API error: %s", error)
|
|
1143
|
+
return deleted_total
|
|
1144
|
+
# If PostgREST returns the deleted rows, prefer that; otherwise
|
|
1145
|
+
# conservatively count the attempted paths in the chunk.
|
|
1146
|
+
if data is not None and isinstance(data, list):
|
|
1147
|
+
deleted_total += len(data)
|
|
1148
|
+
else:
|
|
1149
|
+
deleted_total += len(ch)
|
|
1150
|
+
|
|
1151
|
+
logger.info(
|
|
1152
|
+
"🔓 [FORCE-RELEASE-ALL] %d lock(s) released by admin", deleted_total
|
|
1153
|
+
)
|
|
1154
|
+
return deleted_total
|
|
1155
|
+
except Exception as e:
|
|
1156
|
+
logger.error("Failed to force_release_all: %s", e)
|
|
1157
|
+
return 0
|
|
1158
|
+
|
|
1159
|
+
def acquire_multiple(
|
|
1160
|
+
self,
|
|
1161
|
+
file_paths: List[str],
|
|
1162
|
+
reason: Optional[str] = None,
|
|
1163
|
+
branch_name: Optional[str] = None,
|
|
1164
|
+
) -> Tuple[bool, List[str], str]:
|
|
1165
|
+
"""Acquire locks for multiple files.
|
|
1166
|
+
|
|
1167
|
+
Returns (all_ok, failed_paths, message).
|
|
1168
|
+
"""
|
|
1169
|
+
failed = []
|
|
1170
|
+
for fp in file_paths:
|
|
1171
|
+
ok, msg = self.acquire(fp, reason=reason, branch_name=branch_name)
|
|
1172
|
+
if not ok:
|
|
1173
|
+
failed.append(fp)
|
|
1174
|
+
logger.warning("Lock conflict: %s — %s", fp, msg)
|
|
1175
|
+
if failed:
|
|
1176
|
+
return False, failed, "Conflicts or errors"
|
|
1177
|
+
return True, [], "Success"
|
|
1178
|
+
|
|
1179
|
+
def release_multiple(self, file_paths: List[str]) -> Tuple[bool, int, str]:
|
|
1180
|
+
"""Release locks for multiple files.
|
|
1181
|
+
|
|
1182
|
+
Returns (ok, count, message).
|
|
1183
|
+
"""
|
|
1184
|
+
count = 0
|
|
1185
|
+
for fp in file_paths:
|
|
1186
|
+
ok, _ = self.release(fp)
|
|
1187
|
+
if ok:
|
|
1188
|
+
count += 1
|
|
1189
|
+
return True, count, "Success"
|
|
1190
|
+
|
|
1191
|
+
def history(self, file_path: Optional[str] = None, limit: int = 20) -> List[Dict]:
|
|
1192
|
+
"""Fetch lock history records.
|
|
1193
|
+
|
|
1194
|
+
When *file_path* is provided, an exact match is tried first. If that returns
|
|
1195
|
+
nothing, a ``LIKE %<basename>%`` fallback query runs so the user does not have
|
|
1196
|
+
to remember the full stored path.
|
|
1197
|
+
"""
|
|
1198
|
+
client = self._client
|
|
1199
|
+
assert client is not None, "Supabase client not initialized"
|
|
1200
|
+
try:
|
|
1201
|
+
q = client.table("file_locks_history").select("*")
|
|
1202
|
+
if file_path:
|
|
1203
|
+
q = q.eq("file_path", file_path)
|
|
1204
|
+
q = q.order("id", desc=True).limit(limit)
|
|
1205
|
+
res = q.execute()
|
|
1206
|
+
except Exception as exc:
|
|
1207
|
+
logger.error("Failed to fetch lock history: %s", exc)
|
|
1208
|
+
return []
|
|
1209
|
+
|
|
1210
|
+
_, data, error = self._parse_response(res)
|
|
1211
|
+
if error:
|
|
1212
|
+
logger.error("History query error: %s", error)
|
|
1213
|
+
return []
|
|
1214
|
+
rows = data or []
|
|
1215
|
+
|
|
1216
|
+
# Fallback: if exact match returned nothing, try a partial match
|
|
1217
|
+
if not rows and file_path:
|
|
1218
|
+
try:
|
|
1219
|
+
basename = file_path.replace("\\", "/").rsplit("/", 1)[-1]
|
|
1220
|
+
q2 = (
|
|
1221
|
+
client.table("file_locks_history")
|
|
1222
|
+
.select("*")
|
|
1223
|
+
.ilike("file_path", f"%{basename}%")
|
|
1224
|
+
.order("id", desc=True)
|
|
1225
|
+
.limit(limit)
|
|
1226
|
+
)
|
|
1227
|
+
res2 = q2.execute()
|
|
1228
|
+
_, data2, error2 = self._parse_response(res2)
|
|
1229
|
+
if not error2 and data2:
|
|
1230
|
+
rows = data2
|
|
1231
|
+
except Exception:
|
|
1232
|
+
pass # Fallback is best-effort
|
|
1233
|
+
|
|
1234
|
+
return rows
|
|
1235
|
+
|
|
1236
|
+
def prune_history(self, retention_days: int = 30) -> Tuple[bool, int, str]:
|
|
1237
|
+
"""Delete lock history rows older than *retention_days* days.
|
|
1238
|
+
|
|
1239
|
+
Returns (ok, deleted_count, message).
|
|
1240
|
+
"""
|
|
1241
|
+
if retention_days < 1:
|
|
1242
|
+
return False, 0, "retention_days must be >= 1"
|
|
1243
|
+
|
|
1244
|
+
client = self._client
|
|
1245
|
+
assert client is not None, "Supabase client not initialized"
|
|
1246
|
+
|
|
1247
|
+
# Preferred path: RPC in schema.sql (stable, server-side retention logic).
|
|
1248
|
+
try:
|
|
1249
|
+
res = _retry_on_network_error(
|
|
1250
|
+
lambda: client.rpc(
|
|
1251
|
+
"prune_lock_history", {"p_retention_days": retention_days}
|
|
1252
|
+
).execute()
|
|
1253
|
+
)
|
|
1254
|
+
_, data, error = self._parse_response(res)
|
|
1255
|
+
if error:
|
|
1256
|
+
raise RuntimeError(str(error))
|
|
1257
|
+
|
|
1258
|
+
deleted = 0
|
|
1259
|
+
if isinstance(data, list) and data:
|
|
1260
|
+
row = data[0]
|
|
1261
|
+
if isinstance(row, dict):
|
|
1262
|
+
for k in ("prune_lock_history", "deleted_count", "count"):
|
|
1263
|
+
if k in row:
|
|
1264
|
+
try:
|
|
1265
|
+
deleted = int(row[k])
|
|
1266
|
+
break
|
|
1267
|
+
except Exception:
|
|
1268
|
+
pass
|
|
1269
|
+
elif isinstance(row, (int, float)):
|
|
1270
|
+
deleted = int(row)
|
|
1271
|
+
elif isinstance(data, (int, float)):
|
|
1272
|
+
deleted = int(data)
|
|
1273
|
+
|
|
1274
|
+
return True, deleted, "history-pruned"
|
|
1275
|
+
except Exception as exc:
|
|
1276
|
+
# Backward-compatible fallback when RPC isn't deployed yet.
|
|
1277
|
+
logger.warning(
|
|
1278
|
+
"History prune RPC unavailable, falling back to REST delete: %s", exc
|
|
1279
|
+
)
|
|
1280
|
+
|
|
1281
|
+
cutoff_iso = (
|
|
1282
|
+
_safe_now().astimezone(timezone.utc) - timedelta(days=retention_days)
|
|
1283
|
+
).isoformat()
|
|
1284
|
+
try:
|
|
1285
|
+
res = _retry_on_network_error(
|
|
1286
|
+
lambda: (
|
|
1287
|
+
client.table("file_locks_history")
|
|
1288
|
+
.delete()
|
|
1289
|
+
.lt("released_at", cutoff_iso)
|
|
1290
|
+
.execute()
|
|
1291
|
+
)
|
|
1292
|
+
)
|
|
1293
|
+
_, data, error = self._parse_response(res)
|
|
1294
|
+
if error:
|
|
1295
|
+
return False, 0, f"API Error: {error}"
|
|
1296
|
+
deleted = len(data) if isinstance(data, list) else 0
|
|
1297
|
+
return True, deleted, "history-pruned-fallback"
|
|
1298
|
+
except Exception as exc:
|
|
1299
|
+
return False, 0, f"API Error: {exc}"
|
|
1300
|
+
|
|
1301
|
+
# ------------------------------------------------------------------
|
|
1302
|
+
# Daemon management
|
|
1303
|
+
# ------------------------------------------------------------------
|
|
1304
|
+
def daemon_start(
|
|
1305
|
+
self, interval: int = 5, timeout_mins: int = 0, open_dashboard: bool = False
|
|
1306
|
+
) -> None:
|
|
1307
|
+
"""Start the watcher as a background daemon process."""
|
|
1308
|
+
pid = self._read_pid()
|
|
1309
|
+
if pid and self._is_process_alive(pid):
|
|
1310
|
+
# Check if the watcher is orphaned (parent process dead)
|
|
1311
|
+
metadata = self._read_pid_file()
|
|
1312
|
+
if metadata:
|
|
1313
|
+
parent_pid = metadata.get("parent_pid")
|
|
1314
|
+
if parent_pid and not self._is_process_alive(parent_pid):
|
|
1315
|
+
# Orphaned watcher - kill it and start fresh
|
|
1316
|
+
print(
|
|
1317
|
+
f"Detected orphaned watcher (PID: {pid}, parent "
|
|
1318
|
+
f"{parent_pid} dead). Replacing..."
|
|
1319
|
+
)
|
|
1320
|
+
self._terminate_process(pid)
|
|
1321
|
+
time.sleep(0.5) # Give it time to terminate
|
|
1322
|
+
self._remove_pid()
|
|
1323
|
+
# Continue to start a new watcher
|
|
1324
|
+
else:
|
|
1325
|
+
# Parent is alive, watcher is valid
|
|
1326
|
+
entrypoint = metadata.get("entrypoint", "")
|
|
1327
|
+
if entrypoint:
|
|
1328
|
+
print(f"Watcher already running (PID: {pid}) — {entrypoint}")
|
|
1329
|
+
else:
|
|
1330
|
+
print(f"Watcher already running (PID: {pid})")
|
|
1331
|
+
return
|
|
1332
|
+
else:
|
|
1333
|
+
# Legacy PID file without metadata - verify cmdline
|
|
1334
|
+
cmdline = self._get_cmdline_for_pid(pid)
|
|
1335
|
+
if cmdline and self._cmdline_matches_watcher(cmdline):
|
|
1336
|
+
print(f"Watcher already running (PID: {pid})")
|
|
1337
|
+
return
|
|
1338
|
+
if cmdline is None:
|
|
1339
|
+
# If process metadata cannot be read (permission/race),
|
|
1340
|
+
# prefer assuming the watcher is running instead of
|
|
1341
|
+
# spawning a duplicate daemon.
|
|
1342
|
+
print(f"Watcher already running (PID: {pid})")
|
|
1343
|
+
return
|
|
1344
|
+
# cmdline doesn't match or unavailable - consider stale.
|
|
1345
|
+
# Continue to start new
|
|
1346
|
+
|
|
1347
|
+
print("Starting lock watcher in background...")
|
|
1348
|
+
|
|
1349
|
+
# Defensive: remove any stale stop-request file left behind by a previous
|
|
1350
|
+
# `daemon-stop` (otherwise the newly-started watcher will immediately
|
|
1351
|
+
# detect it and perform a graceful shutdown). This can happen if a
|
|
1352
|
+
# stop file was left in the state dir when no watcher was running.
|
|
1353
|
+
try:
|
|
1354
|
+
stop_file = _state_path(".stop_request")
|
|
1355
|
+
if os.path.exists(stop_file):
|
|
1356
|
+
logger.debug(
|
|
1357
|
+
(
|
|
1358
|
+
"Found stale stop request %s — removing before "
|
|
1359
|
+
"starting new watcher"
|
|
1360
|
+
),
|
|
1361
|
+
stop_file,
|
|
1362
|
+
)
|
|
1363
|
+
try:
|
|
1364
|
+
os.remove(stop_file)
|
|
1365
|
+
except Exception:
|
|
1366
|
+
logger.debug("Failed to remove stale stop request: %s", stop_file)
|
|
1367
|
+
except Exception:
|
|
1368
|
+
# Best-effort — don't fail startup if we can't inspect/remove the file
|
|
1369
|
+
pass
|
|
1370
|
+
cmd = [
|
|
1371
|
+
sys.executable,
|
|
1372
|
+
"-m",
|
|
1373
|
+
"src.lock_client",
|
|
1374
|
+
"watch",
|
|
1375
|
+
"--interval",
|
|
1376
|
+
str(interval),
|
|
1377
|
+
"--timeout",
|
|
1378
|
+
str(timeout_mins),
|
|
1379
|
+
"--daemon",
|
|
1380
|
+
"--pid-file",
|
|
1381
|
+
PID_FILE,
|
|
1382
|
+
]
|
|
1383
|
+
|
|
1384
|
+
# Tie to parent PID for clean termination
|
|
1385
|
+
parent_pid, parent_method = self._get_parent_ide_pid()
|
|
1386
|
+
if parent_pid:
|
|
1387
|
+
cmd.extend(["--parent-pid", str(parent_pid)])
|
|
1388
|
+
# Get process name for better logging
|
|
1389
|
+
parent_name, _ = self._get_process_info_local(parent_pid)
|
|
1390
|
+
parent_name_str = parent_name or "unknown"
|
|
1391
|
+
# Pass parent name + detection method to child for better logging
|
|
1392
|
+
cmd.extend(["--parent-name", parent_name_str])
|
|
1393
|
+
cmd.extend(["--parent-method", parent_method or "unknown"])
|
|
1394
|
+
# Demote verbose parent-tying messages to DEBUG so they don't
|
|
1395
|
+
# clutter interactive console output when the user runs
|
|
1396
|
+
# `collab daemon-start`.
|
|
1397
|
+
logger.debug(
|
|
1398
|
+
"Tying watcher to parent process: %s (PID: %d) via %s",
|
|
1399
|
+
parent_name_str,
|
|
1400
|
+
parent_pid,
|
|
1401
|
+
parent_method or "unknown",
|
|
1402
|
+
)
|
|
1403
|
+
else:
|
|
1404
|
+
logger.debug("No parent IDE detected - watcher will run independently")
|
|
1405
|
+
|
|
1406
|
+
if open_dashboard:
|
|
1407
|
+
cmd.append("--open-dashboard")
|
|
1408
|
+
|
|
1409
|
+
if sys.platform == "win32":
|
|
1410
|
+
pythonw = os.path.join(os.path.dirname(sys.executable), "pythonw.exe")
|
|
1411
|
+
# CREATE_NO_WINDOW (0x08000000) - hide console window
|
|
1412
|
+
# Only use DETACHED_PROCESS if we DON'T have a parent to track
|
|
1413
|
+
# DETACHED_PROCESS would orphan the process,
|
|
1414
|
+
# preventing IDE shutdown detection
|
|
1415
|
+
if parent_pid:
|
|
1416
|
+
# Tied to parent - use only CREATE_NO_WINDOW, not DETACHED_PROCESS
|
|
1417
|
+
# This ensures the process terminates when the parent IDE closes
|
|
1418
|
+
creation_flags = 0x08000000
|
|
1419
|
+
logger.debug(
|
|
1420
|
+
"Starting watcher tied to parent PID %d (no DETACHED)", parent_pid
|
|
1421
|
+
)
|
|
1422
|
+
else:
|
|
1423
|
+
# No parent to track - can safely detach
|
|
1424
|
+
creation_flags = (
|
|
1425
|
+
0x00000008 | 0x08000000
|
|
1426
|
+
) # DETACHED_PROCESS + CREATE_NO_WINDOW
|
|
1427
|
+
logger.debug("Starting detached watcher (no parent to track)")
|
|
1428
|
+
|
|
1429
|
+
# CRITICAL: Don't pass file handles from parent to child!
|
|
1430
|
+
# The child process will open its own log files via logging_config.py.
|
|
1431
|
+
# Passing parent file handles causes NUL corruption and file locking issues.
|
|
1432
|
+
if os.path.exists(pythonw):
|
|
1433
|
+
proc = subprocess.Popen(
|
|
1434
|
+
[pythonw] + cmd[1:],
|
|
1435
|
+
creationflags=creation_flags,
|
|
1436
|
+
stdout=subprocess.DEVNULL,
|
|
1437
|
+
stderr=subprocess.DEVNULL,
|
|
1438
|
+
close_fds=True,
|
|
1439
|
+
cwd=_PROJECT_ROOT,
|
|
1440
|
+
)
|
|
1441
|
+
else:
|
|
1442
|
+
proc = subprocess.Popen(
|
|
1443
|
+
cmd,
|
|
1444
|
+
creationflags=creation_flags,
|
|
1445
|
+
stdout=subprocess.DEVNULL,
|
|
1446
|
+
stderr=subprocess.DEVNULL,
|
|
1447
|
+
close_fds=True,
|
|
1448
|
+
cwd=_PROJECT_ROOT,
|
|
1449
|
+
)
|
|
1450
|
+
else:
|
|
1451
|
+
# Unix/Linux/Mac: only use start_new_session if NOT tracking a parent
|
|
1452
|
+
# start_new_session creates a new process group, detaching from parent
|
|
1453
|
+
if not parent_pid:
|
|
1454
|
+
# No parent to track - can safely create new session
|
|
1455
|
+
logger.debug("Starting detached watcher (new session)")
|
|
1456
|
+
proc = subprocess.Popen(
|
|
1457
|
+
cmd,
|
|
1458
|
+
stdout=subprocess.DEVNULL,
|
|
1459
|
+
stderr=subprocess.DEVNULL,
|
|
1460
|
+
cwd=_PROJECT_ROOT,
|
|
1461
|
+
start_new_session=True,
|
|
1462
|
+
)
|
|
1463
|
+
else:
|
|
1464
|
+
# Tied to parent - stay in same process group
|
|
1465
|
+
logger.debug(
|
|
1466
|
+
"Starting watcher tied to parent %d (same session)", parent_pid
|
|
1467
|
+
)
|
|
1468
|
+
proc = subprocess.Popen(
|
|
1469
|
+
cmd,
|
|
1470
|
+
stdout=subprocess.DEVNULL,
|
|
1471
|
+
stderr=subprocess.DEVNULL,
|
|
1472
|
+
cwd=_PROJECT_ROOT,
|
|
1473
|
+
)
|
|
1474
|
+
if sys.platform != "win32":
|
|
1475
|
+
# On Linux/Mac, the spawned proc.pid is the real child.
|
|
1476
|
+
# We record it immediately for tracking, though the child
|
|
1477
|
+
# will soon overwrite it with its own metadata.
|
|
1478
|
+
self._write_pid(proc.pid)
|
|
1479
|
+
|
|
1480
|
+
# Wait up to 10 seconds for the child loop to start and write its true PID.
|
|
1481
|
+
# On Windows venv, pythonw.exe is a wrapper that exits quickly.
|
|
1482
|
+
# On Linux/Mac or non-venv Windows, it stays identical to proc.pid.
|
|
1483
|
+
actual_pid = None
|
|
1484
|
+
for i in range(100): # 10 seconds max
|
|
1485
|
+
pid = self._read_pid()
|
|
1486
|
+
if pid and self._is_process_alive(pid):
|
|
1487
|
+
if sys.platform != "win32" or pid != proc.pid:
|
|
1488
|
+
# Successfully found the real child (different PID from launcher)
|
|
1489
|
+
actual_pid = pid
|
|
1490
|
+
break
|
|
1491
|
+
# On Windows, if pid == proc.pid, it might be the launcher or a
|
|
1492
|
+
# non-wrapped pythonw.exe process.
|
|
1493
|
+
# If it stays stable for 1.5s, assume it's the real process.
|
|
1494
|
+
if i > 15:
|
|
1495
|
+
actual_pid = pid
|
|
1496
|
+
break
|
|
1497
|
+
time.sleep(0.1)
|
|
1498
|
+
|
|
1499
|
+
if actual_pid:
|
|
1500
|
+
print(f"✅ Started (PID: {actual_pid})")
|
|
1501
|
+
else:
|
|
1502
|
+
print(
|
|
1503
|
+
"❌ Watcher process exited or failed to record PID. "
|
|
1504
|
+
f"(Launcher PID: {proc.pid})"
|
|
1505
|
+
)
|
|
1506
|
+
print(" Check logs/collab.log for details.")
|
|
1507
|
+
pid = self._read_pid()
|
|
1508
|
+
if pid == proc.pid:
|
|
1509
|
+
self._remove_pid()
|
|
1510
|
+
|
|
1511
|
+
def daemon_stop(self) -> None:
|
|
1512
|
+
"""Stop the running watcher daemon."""
|
|
1513
|
+
# Ensure file-based collab logging is configured for CLI actions,
|
|
1514
|
+
# then temporarily prevent collab.* logs from propagating to the root
|
|
1515
|
+
# console handler so INFO-level records produced by this command are
|
|
1516
|
+
# still written to the collab log file but do not echo to the
|
|
1517
|
+
# user's terminal. Restore the original propagation setting at the end.
|
|
1518
|
+
try:
|
|
1519
|
+
from .logging_config import setup_collab_logging
|
|
1520
|
+
|
|
1521
|
+
setup_collab_logging(collab_dir=_COLLAB_ROOT)
|
|
1522
|
+
except Exception:
|
|
1523
|
+
# Best-effort: continue even if logging setup fails
|
|
1524
|
+
pass
|
|
1525
|
+
|
|
1526
|
+
collab_logger = logging.getLogger("collab")
|
|
1527
|
+
_old_prop = getattr(collab_logger, "propagate", True)
|
|
1528
|
+
collab_logger.propagate = False
|
|
1529
|
+
try:
|
|
1530
|
+
|
|
1531
|
+
# Try PID file first, but fall back to discovering running watcher
|
|
1532
|
+
# processes for this workspace if the PID file is missing or stale.
|
|
1533
|
+
pid = self._read_pid()
|
|
1534
|
+
pids_to_stop: List[int] = []
|
|
1535
|
+
|
|
1536
|
+
if pid and self._is_process_alive(pid):
|
|
1537
|
+
pids_to_stop = [pid]
|
|
1538
|
+
else:
|
|
1539
|
+
# Safety rail: during tests, never discover/stop external watcher
|
|
1540
|
+
# processes when the module is still using the production PID file.
|
|
1541
|
+
default_pid = os.path.join(_COLLAB_ROOT, ".daemon.pid")
|
|
1542
|
+
if _is_test_mode() and os.path.abspath(PID_FILE) == os.path.abspath(
|
|
1543
|
+
default_pid
|
|
1544
|
+
):
|
|
1545
|
+
print("No running watcher found.")
|
|
1546
|
+
logger.info(
|
|
1547
|
+
(
|
|
1548
|
+
"Test mode with default PID file detected; "
|
|
1549
|
+
"skipping watcher discovery fallback"
|
|
1550
|
+
)
|
|
1551
|
+
)
|
|
1552
|
+
self._remove_pid()
|
|
1553
|
+
return
|
|
1554
|
+
|
|
1555
|
+
# Attempt to discover live watcher processes related to this repo
|
|
1556
|
+
try:
|
|
1557
|
+
found = self._discover_running_watchers()
|
|
1558
|
+
if found:
|
|
1559
|
+
pids_to_stop = found
|
|
1560
|
+
else:
|
|
1561
|
+
print("No running watcher found.")
|
|
1562
|
+
logger.info("No running watcher found for this workspace")
|
|
1563
|
+
self._remove_pid()
|
|
1564
|
+
return
|
|
1565
|
+
except Exception as e:
|
|
1566
|
+
logger.debug("Watcher discovery failed: %s", e)
|
|
1567
|
+
print("No running watcher found.")
|
|
1568
|
+
self._remove_pid()
|
|
1569
|
+
return
|
|
1570
|
+
|
|
1571
|
+
# Stop each discovered watcher PID (soft stop first, then force)
|
|
1572
|
+
for target_pid in pids_to_stop:
|
|
1573
|
+
try:
|
|
1574
|
+
print(f"Stopping lock watcher (PID: {target_pid})...")
|
|
1575
|
+
except Exception:
|
|
1576
|
+
pass
|
|
1577
|
+
|
|
1578
|
+
stop_file = _state_path(".stop_request")
|
|
1579
|
+
# Prefer token-based stop requests when available to avoid
|
|
1580
|
+
# accidentally stopping unrelated watcher processes that happen
|
|
1581
|
+
# to share PIDs (PID reuse) or when multiple watchers exist.
|
|
1582
|
+
try:
|
|
1583
|
+
pid_meta = self._read_pid_file()
|
|
1584
|
+
token = None
|
|
1585
|
+
if pid_meta and isinstance(pid_meta, dict):
|
|
1586
|
+
token = pid_meta.get("token")
|
|
1587
|
+
if token:
|
|
1588
|
+
payload = f"TOKEN:{token}"
|
|
1589
|
+
else:
|
|
1590
|
+
payload = f"PID:{target_pid}"
|
|
1591
|
+
|
|
1592
|
+
with open(stop_file, "w", encoding="utf-8") as sf:
|
|
1593
|
+
sf.write(payload)
|
|
1594
|
+
sf.flush()
|
|
1595
|
+
try:
|
|
1596
|
+
os.fsync(sf.fileno())
|
|
1597
|
+
except Exception:
|
|
1598
|
+
pass
|
|
1599
|
+
logger.info(
|
|
1600
|
+
"Wrote stop request file: %s (payload: %s)", stop_file, payload
|
|
1601
|
+
)
|
|
1602
|
+
except Exception as _e:
|
|
1603
|
+
logger.exception("Failed to write stop request file: %s", _e)
|
|
1604
|
+
|
|
1605
|
+
# Wait up to ~8 seconds for watcher to exit gracefully
|
|
1606
|
+
for _ in range(16):
|
|
1607
|
+
if not self._is_process_alive(target_pid):
|
|
1608
|
+
break
|
|
1609
|
+
time.sleep(0.5)
|
|
1610
|
+
|
|
1611
|
+
if not self._is_process_alive(target_pid):
|
|
1612
|
+
# Wait briefly for the shutdown marker
|
|
1613
|
+
shutdown_file = _state_path(".shutdown_complete")
|
|
1614
|
+
for _ in range(20):
|
|
1615
|
+
if os.path.exists(shutdown_file):
|
|
1616
|
+
break
|
|
1617
|
+
time.sleep(0.1)
|
|
1618
|
+
|
|
1619
|
+
# Do NOT remove the stop request here; the IDE extension
|
|
1620
|
+
# needs to see it to avoid triggering an auto-restart.
|
|
1621
|
+
# The next watcher startup will clean it up.
|
|
1622
|
+
|
|
1623
|
+
# If the stopped PID matched the canonical PID file, remove it
|
|
1624
|
+
try:
|
|
1625
|
+
canonical_pid = self._read_pid()
|
|
1626
|
+
if canonical_pid == target_pid:
|
|
1627
|
+
self._remove_pid()
|
|
1628
|
+
except Exception:
|
|
1629
|
+
logger.debug(
|
|
1630
|
+
"Failed to remove canonical PID after stop: %s", target_pid
|
|
1631
|
+
)
|
|
1632
|
+
|
|
1633
|
+
logger.info("Stopped watcher (PID: %d)", target_pid)
|
|
1634
|
+
print("✅ Stopped.")
|
|
1635
|
+
continue
|
|
1636
|
+
|
|
1637
|
+
# Soft stop did not work — fallback to forced termination
|
|
1638
|
+
if sys.platform == "win32":
|
|
1639
|
+
subprocess.run(
|
|
1640
|
+
["taskkill", "/F", "/T", "/PID", str(target_pid)],
|
|
1641
|
+
capture_output=True,
|
|
1642
|
+
creationflags=0x08000000,
|
|
1643
|
+
)
|
|
1644
|
+
else:
|
|
1645
|
+
try:
|
|
1646
|
+
os.kill(-target_pid, signal.SIGTERM)
|
|
1647
|
+
except (ProcessLookupError, OSError):
|
|
1648
|
+
try:
|
|
1649
|
+
os.kill(target_pid, signal.SIGTERM)
|
|
1650
|
+
except ProcessLookupError:
|
|
1651
|
+
pass
|
|
1652
|
+
|
|
1653
|
+
# Wait up to 5 seconds for clean exit
|
|
1654
|
+
for _ in range(10):
|
|
1655
|
+
if not self._is_process_alive(target_pid):
|
|
1656
|
+
break
|
|
1657
|
+
time.sleep(0.5)
|
|
1658
|
+
else:
|
|
1659
|
+
# Force kill if still running (Unix only)
|
|
1660
|
+
if sys.platform != "win32":
|
|
1661
|
+
try:
|
|
1662
|
+
os.kill(-target_pid, signal.SIGKILL)
|
|
1663
|
+
except (ProcessLookupError, OSError):
|
|
1664
|
+
try:
|
|
1665
|
+
os.kill(target_pid, signal.SIGKILL)
|
|
1666
|
+
except ProcessLookupError:
|
|
1667
|
+
pass
|
|
1668
|
+
|
|
1669
|
+
# Clean up PID file if it referenced the killed process
|
|
1670
|
+
try:
|
|
1671
|
+
canonical_pid = self._read_pid()
|
|
1672
|
+
if canonical_pid == target_pid:
|
|
1673
|
+
self._remove_pid()
|
|
1674
|
+
except Exception:
|
|
1675
|
+
logger.debug(
|
|
1676
|
+
"Failed to remove canonical PID after forced kill: %s",
|
|
1677
|
+
target_pid,
|
|
1678
|
+
)
|
|
1679
|
+
|
|
1680
|
+
logger.info("Stopped watcher (PID: %d) (forced)", target_pid)
|
|
1681
|
+
print("✅ Stopped.")
|
|
1682
|
+
|
|
1683
|
+
# Final cleanup: ensure canonical PID file removed
|
|
1684
|
+
try:
|
|
1685
|
+
self._remove_pid()
|
|
1686
|
+
except Exception:
|
|
1687
|
+
pass
|
|
1688
|
+
finally:
|
|
1689
|
+
try:
|
|
1690
|
+
collab_logger.propagate = _old_prop
|
|
1691
|
+
except Exception:
|
|
1692
|
+
pass
|
|
1693
|
+
|
|
1694
|
+
def daemon_status(self) -> bool:
|
|
1695
|
+
"""Check if the watcher daemon is running.
|
|
1696
|
+
|
|
1697
|
+
Checks both the primary PID file and the legacy PyCharm watcher PID file for
|
|
1698
|
+
backward compatibility.
|
|
1699
|
+
"""
|
|
1700
|
+
pid = self._read_pid()
|
|
1701
|
+
local_only_mode = bool(getattr(self, "local_only", False))
|
|
1702
|
+
if pid and self._is_process_alive(pid):
|
|
1703
|
+
# Attempt to read PID metadata (entrypoint) and prefer it for
|
|
1704
|
+
# human-facing output when available. When the PID file is the
|
|
1705
|
+
# legacy plain-integer format we avoid strict cmdline verification
|
|
1706
|
+
# to reduce false negatives in environments where reconstructing
|
|
1707
|
+
# a cmdline is unreliable (tests, limited containers, etc.).
|
|
1708
|
+
entrypoint: Optional[str] = None
|
|
1709
|
+
had_metadata = False
|
|
1710
|
+
try:
|
|
1711
|
+
if os.path.exists(PID_FILE):
|
|
1712
|
+
with open(PID_FILE, "r", encoding="utf-8") as fh:
|
|
1713
|
+
raw = fh.read().strip()
|
|
1714
|
+
if raw.startswith("{"):
|
|
1715
|
+
had_metadata = True
|
|
1716
|
+
obj = json.loads(raw)
|
|
1717
|
+
entrypoint = obj.get("entrypoint")
|
|
1718
|
+
except Exception:
|
|
1719
|
+
entrypoint = None
|
|
1720
|
+
|
|
1721
|
+
# If an entrypoint is present in the PID metadata, prefer it.
|
|
1722
|
+
if entrypoint:
|
|
1723
|
+
print(f"✅ Lock watcher is RUNNING (PID: {pid}) — {entrypoint}")
|
|
1724
|
+
return True
|
|
1725
|
+
|
|
1726
|
+
# If we have no richer metadata (legacy plain-PID) preserve the
|
|
1727
|
+
# historical, lenient behaviour: older clients only wrote an integer PID
|
|
1728
|
+
# and callers expect a live PID to indicate the watcher is running.
|
|
1729
|
+
# Do NOT mark such PIDs stale solely because the reconstructed
|
|
1730
|
+
# command-line doesn't match — this avoids false negatives in tests
|
|
1731
|
+
# and constrained environments where cmdline inspection is unreliable.
|
|
1732
|
+
if not had_metadata:
|
|
1733
|
+
# If this is the legacy plain-PID file, preserve the historical
|
|
1734
|
+
# behavior: if the PID matches the current process, confidently
|
|
1735
|
+
# report running. Otherwise fall through and attempt a
|
|
1736
|
+
# best-effort cmdline verification below to avoid treating an
|
|
1737
|
+
# unrelated process as the watcher.
|
|
1738
|
+
if pid == os.getpid():
|
|
1739
|
+
print(f"✅ Lock watcher is RUNNING (PID: {pid}) (cmdline unknown)")
|
|
1740
|
+
return True
|
|
1741
|
+
|
|
1742
|
+
# Fallback: try to verify the process command-line to avoid false positives
|
|
1743
|
+
cmdline = self._get_cmdline_for_pid(pid)
|
|
1744
|
+
if cmdline:
|
|
1745
|
+
if not self._cmdline_matches_watcher(cmdline):
|
|
1746
|
+
logger.debug("PID %d cmdline: %s", pid, cmdline)
|
|
1747
|
+
else:
|
|
1748
|
+
print(f"✅ Lock watcher is RUNNING (PID: {pid}) — {cmdline}")
|
|
1749
|
+
return True
|
|
1750
|
+
else:
|
|
1751
|
+
# Can't verify cmdline — assume running
|
|
1752
|
+
print(f"✅ Lock watcher is RUNNING (PID: {pid}) (cmdline unknown)")
|
|
1753
|
+
return True
|
|
1754
|
+
|
|
1755
|
+
# Stale or repurposed PID in canonical file; in local-only CLI mode,
|
|
1756
|
+
# try process discovery before reporting NOT running.
|
|
1757
|
+
if local_only_mode:
|
|
1758
|
+
try:
|
|
1759
|
+
found = self._discover_running_watchers()
|
|
1760
|
+
for found_pid in found:
|
|
1761
|
+
if self._is_process_alive(found_pid):
|
|
1762
|
+
found_cmd = self._get_cmdline_for_pid(found_pid)
|
|
1763
|
+
if found_cmd and self._cmdline_matches_watcher(found_cmd):
|
|
1764
|
+
print(
|
|
1765
|
+
"✅ Lock watcher is RUNNING "
|
|
1766
|
+
f"(PID: {found_pid}) — {found_cmd}"
|
|
1767
|
+
)
|
|
1768
|
+
else:
|
|
1769
|
+
print(
|
|
1770
|
+
"✅ Lock watcher is RUNNING "
|
|
1771
|
+
f"(PID: {found_pid}) (discovered)"
|
|
1772
|
+
)
|
|
1773
|
+
return True
|
|
1774
|
+
except Exception as e:
|
|
1775
|
+
logger.debug("Watcher discovery fallback failed: %s", e)
|
|
1776
|
+
|
|
1777
|
+
return False
|
|
1778
|
+
|
|
1779
|
+
# In local-only CLI mode, if no canonical PID was available/alive,
|
|
1780
|
+
# fall back to watcher process discovery.
|
|
1781
|
+
if local_only_mode:
|
|
1782
|
+
try:
|
|
1783
|
+
found = self._discover_running_watchers()
|
|
1784
|
+
for found_pid in found:
|
|
1785
|
+
if self._is_process_alive(found_pid):
|
|
1786
|
+
found_cmd = self._get_cmdline_for_pid(found_pid)
|
|
1787
|
+
if found_cmd and self._cmdline_matches_watcher(found_cmd):
|
|
1788
|
+
print(
|
|
1789
|
+
"✅ Lock watcher is RUNNING "
|
|
1790
|
+
f"(PID: {found_pid}) — {found_cmd}"
|
|
1791
|
+
)
|
|
1792
|
+
else:
|
|
1793
|
+
print(
|
|
1794
|
+
"✅ Lock watcher is RUNNING "
|
|
1795
|
+
f"(PID: {found_pid}) (discovered)"
|
|
1796
|
+
)
|
|
1797
|
+
return True
|
|
1798
|
+
except Exception as e:
|
|
1799
|
+
logger.debug("Watcher discovery fallback failed: %s", e)
|
|
1800
|
+
|
|
1801
|
+
# Fallback: check legacy PyCharm watcher PID file
|
|
1802
|
+
_legacy_pid_file = os.path.join(_COLLAB_ROOT, ".pycharm_watcher.pid")
|
|
1803
|
+
if os.path.exists(_legacy_pid_file):
|
|
1804
|
+
try:
|
|
1805
|
+
with open(_legacy_pid_file, "r") as f:
|
|
1806
|
+
legacy_pid = int(f.read().strip())
|
|
1807
|
+
if self._is_process_alive(legacy_pid):
|
|
1808
|
+
print(f"✅ Lock watcher is RUNNING (PID: {legacy_pid})")
|
|
1809
|
+
return True
|
|
1810
|
+
except (ValueError, OSError):
|
|
1811
|
+
pass
|
|
1812
|
+
print("❌ Lock watcher is NOT running.")
|
|
1813
|
+
return False
|
|
1814
|
+
|
|
1815
|
+
def cleanup_orphaned_processes(self) -> None:
|
|
1816
|
+
"""Find and kill all orphaned lock_client.py processes.
|
|
1817
|
+
|
|
1818
|
+
This is useful when log files are locked by zombie processes.
|
|
1819
|
+
Locks are PRESERVED - only the watcher processes are terminated.
|
|
1820
|
+
"""
|
|
1821
|
+
print("Scanning for orphaned lock_client processes...")
|
|
1822
|
+
killed = 0
|
|
1823
|
+
pids_to_check: set[int] = set()
|
|
1824
|
+
|
|
1825
|
+
is_test = _is_test_mode()
|
|
1826
|
+
|
|
1827
|
+
def _should_kill(cmdline: str) -> bool:
|
|
1828
|
+
cmd = cmdline.lower()
|
|
1829
|
+
if "lock_client" not in cmd:
|
|
1830
|
+
return False
|
|
1831
|
+
|
|
1832
|
+
# Safeguard: prevent test runs from killing production daemons.
|
|
1833
|
+
is_test_watcher = (
|
|
1834
|
+
"pytest-of-" in cmd
|
|
1835
|
+
or "collab_test_" in cmd
|
|
1836
|
+
or "mockcmms_pytest_collab_" in cmd
|
|
1837
|
+
)
|
|
1838
|
+
return is_test_watcher if is_test else not is_test_watcher
|
|
1839
|
+
|
|
1840
|
+
if sys.platform == "win32":
|
|
1841
|
+
# Check multiple Python executable names
|
|
1842
|
+
python_images = ["python.exe", "pythonw.exe", "python3.exe"]
|
|
1843
|
+
for image in python_images:
|
|
1844
|
+
try:
|
|
1845
|
+
result = subprocess.run(
|
|
1846
|
+
[
|
|
1847
|
+
"tasklist",
|
|
1848
|
+
"/FI",
|
|
1849
|
+
f"IMAGENAME eq {image}",
|
|
1850
|
+
"/FO",
|
|
1851
|
+
"CSV",
|
|
1852
|
+
"/NH",
|
|
1853
|
+
],
|
|
1854
|
+
capture_output=True,
|
|
1855
|
+
text=True,
|
|
1856
|
+
creationflags=0x08000000,
|
|
1857
|
+
)
|
|
1858
|
+
for line in result.stdout.strip().split("\n"):
|
|
1859
|
+
if not line.strip():
|
|
1860
|
+
continue
|
|
1861
|
+
parts = line.strip().strip('"').split('","')
|
|
1862
|
+
if len(parts) >= 2:
|
|
1863
|
+
try:
|
|
1864
|
+
pid = int(parts[1])
|
|
1865
|
+
# Don't kill ourselves
|
|
1866
|
+
if pid != os.getpid():
|
|
1867
|
+
pids_to_check.add(pid)
|
|
1868
|
+
except (ValueError, IndexError):
|
|
1869
|
+
pass
|
|
1870
|
+
except Exception as e:
|
|
1871
|
+
logger.debug("Error scanning %s processes: %s", image, e)
|
|
1872
|
+
|
|
1873
|
+
# Inspect command-lines (prefer psutil); fall back to WMIC if available.
|
|
1874
|
+
for pid in list(pids_to_check):
|
|
1875
|
+
try:
|
|
1876
|
+
inspected = False
|
|
1877
|
+
try:
|
|
1878
|
+
import psutil
|
|
1879
|
+
|
|
1880
|
+
try:
|
|
1881
|
+
p = psutil.Process(pid)
|
|
1882
|
+
cmd = (
|
|
1883
|
+
" ".join(p.cmdline())
|
|
1884
|
+
if isinstance(p.cmdline(), (list, tuple))
|
|
1885
|
+
else str(p.cmdline())
|
|
1886
|
+
)
|
|
1887
|
+
inspected = True
|
|
1888
|
+
except psutil.NoSuchProcess:
|
|
1889
|
+
continue
|
|
1890
|
+
except Exception:
|
|
1891
|
+
inspected = False
|
|
1892
|
+
except Exception:
|
|
1893
|
+
inspected = False
|
|
1894
|
+
|
|
1895
|
+
if inspected and cmd and _should_kill(cmd):
|
|
1896
|
+
print(f"Killing orphaned lock_client (PID: {pid})")
|
|
1897
|
+
subprocess.run(
|
|
1898
|
+
["taskkill", "/F", "/T", "/PID", str(pid)],
|
|
1899
|
+
capture_output=True,
|
|
1900
|
+
creationflags=0x08000000,
|
|
1901
|
+
)
|
|
1902
|
+
killed += 1
|
|
1903
|
+
continue
|
|
1904
|
+
|
|
1905
|
+
# psutil not available or didn't identify commandline;
|
|
1906
|
+
# try WMIC if present
|
|
1907
|
+
if shutil.which("wmic"):
|
|
1908
|
+
try:
|
|
1909
|
+
result = subprocess.run(
|
|
1910
|
+
[
|
|
1911
|
+
"wmic",
|
|
1912
|
+
"process",
|
|
1913
|
+
"where",
|
|
1914
|
+
f"ProcessId={pid}",
|
|
1915
|
+
"get",
|
|
1916
|
+
"CommandLine",
|
|
1917
|
+
"/value",
|
|
1918
|
+
],
|
|
1919
|
+
capture_output=True,
|
|
1920
|
+
text=True,
|
|
1921
|
+
creationflags=0x08000000,
|
|
1922
|
+
errors="ignore",
|
|
1923
|
+
)
|
|
1924
|
+
out = (result.stdout or "").lower()
|
|
1925
|
+
if _should_kill(out):
|
|
1926
|
+
print(f"Killing orphaned lock_client (PID: {pid})")
|
|
1927
|
+
subprocess.run(
|
|
1928
|
+
["taskkill", "/F", "/T", "/PID", str(pid)],
|
|
1929
|
+
capture_output=True,
|
|
1930
|
+
creationflags=0x08000000,
|
|
1931
|
+
)
|
|
1932
|
+
killed += 1
|
|
1933
|
+
except Exception as e:
|
|
1934
|
+
logger.debug("Error checking PID %d via WMIC: %s", pid, e)
|
|
1935
|
+
else:
|
|
1936
|
+
# Cannot reliably inspect command-line on this host
|
|
1937
|
+
logger.debug(
|
|
1938
|
+
(
|
|
1939
|
+
"Skipping command-line inspection for PID %d "
|
|
1940
|
+
"(no psutil or wmic)"
|
|
1941
|
+
),
|
|
1942
|
+
pid,
|
|
1943
|
+
)
|
|
1944
|
+
except Exception as e:
|
|
1945
|
+
logger.debug("Error checking PID %d: %s", pid, e)
|
|
1946
|
+
else:
|
|
1947
|
+
# Unix: use ps and grep
|
|
1948
|
+
try:
|
|
1949
|
+
result = subprocess.run(
|
|
1950
|
+
["ps", "aux"],
|
|
1951
|
+
capture_output=True,
|
|
1952
|
+
text=True,
|
|
1953
|
+
)
|
|
1954
|
+
for line in result.stdout.split("\n"):
|
|
1955
|
+
if "python" in line.lower() and _should_kill(line):
|
|
1956
|
+
parts = line.split()
|
|
1957
|
+
if len(parts) >= 2:
|
|
1958
|
+
try:
|
|
1959
|
+
pid = int(parts[1])
|
|
1960
|
+
# Don't kill ourselves
|
|
1961
|
+
if pid != os.getpid():
|
|
1962
|
+
print(f" Killing orphaned process (PID: {pid})")
|
|
1963
|
+
try:
|
|
1964
|
+
os.kill(pid, signal.SIGTERM)
|
|
1965
|
+
killed += 1
|
|
1966
|
+
except ProcessLookupError:
|
|
1967
|
+
pass
|
|
1968
|
+
except (ValueError, IndexError):
|
|
1969
|
+
pass
|
|
1970
|
+
except Exception as e:
|
|
1971
|
+
logger.warning("Error scanning for orphaned processes: %s", e)
|
|
1972
|
+
|
|
1973
|
+
if killed > 0:
|
|
1974
|
+
print(f"✅ Killed {killed} orphaned process(es).")
|
|
1975
|
+
print("Log files should now be unlocked.")
|
|
1976
|
+
# Also clean up PID file if present
|
|
1977
|
+
self._remove_pid()
|
|
1978
|
+
else:
|
|
1979
|
+
print("No orphaned lock_client processes found.")
|
|
1980
|
+
# Try to identify what's holding the log files
|
|
1981
|
+
if sys.platform == "win32":
|
|
1982
|
+
print("\nChecking what's holding log files...")
|
|
1983
|
+
for log_file in ["application.log", "errors.log"]:
|
|
1984
|
+
log_path = os.path.join(_COLLAB_ROOT, "logs", log_file)
|
|
1985
|
+
if os.path.exists(log_path):
|
|
1986
|
+
try:
|
|
1987
|
+
# Try to open the file to see if it's locked
|
|
1988
|
+
with open(log_path, "a"):
|
|
1989
|
+
pass # File is accessible
|
|
1990
|
+
except PermissionError:
|
|
1991
|
+
print(f" {log_file} is LOCKED by another process")
|
|
1992
|
+
print(f" Run: handle.exe {log_path} (from Sysinternals)")
|
|
1993
|
+
except Exception as e:
|
|
1994
|
+
print(f" {log_file}: {e}")
|
|
1995
|
+
|
|
1996
|
+
# ------------------------------------------------------------------
|
|
1997
|
+
# Dashboard
|
|
1998
|
+
# ------------------------------------------------------------------
|
|
1999
|
+
def dashboard(self) -> None:
|
|
2000
|
+
"""Open the collaborative dashboard in the default browser."""
|
|
2001
|
+
url, _ = self._prepare_dashboard_server()
|
|
2002
|
+
if not url:
|
|
2003
|
+
return
|
|
2004
|
+
try:
|
|
2005
|
+
import webbrowser
|
|
2006
|
+
|
|
2007
|
+
webbrowser.open(url)
|
|
2008
|
+
except Exception:
|
|
2009
|
+
print(f"Open in browser manually: {url}")
|
|
2010
|
+
|
|
2011
|
+
def _prepare_dashboard_server(self) -> Tuple[Optional[str], Optional[str]]:
|
|
2012
|
+
"""Create temp HTML with injected config, start local HTTP server.
|
|
2013
|
+
|
|
2014
|
+
Returns (url, tmp_path) or (None, None) on error.
|
|
2015
|
+
"""
|
|
2016
|
+
html_path = os.path.join(_RESOURCE_ROOT, "dashboard", "index.html")
|
|
2017
|
+
if not os.path.exists(html_path):
|
|
2018
|
+
logger.error("Dashboard file not found at %s", html_path)
|
|
2019
|
+
return None, None
|
|
2020
|
+
|
|
2021
|
+
try:
|
|
2022
|
+
with open(html_path, "r", encoding="utf-8") as fh:
|
|
2023
|
+
content = fh.read()
|
|
2024
|
+
except Exception as e:
|
|
2025
|
+
logger.error("Error reading dashboard template: %s", e)
|
|
2026
|
+
return None, None
|
|
2027
|
+
|
|
2028
|
+
injected = {
|
|
2029
|
+
"url": SUPABASE_URL or "",
|
|
2030
|
+
"anonKey": SUPABASE_ANON_KEY or "",
|
|
2031
|
+
"serviceKey": SUPABASE_SERVICE_ROLE_KEY or None,
|
|
2032
|
+
"user": self.developer_id or "",
|
|
2033
|
+
}
|
|
2034
|
+
inject_script = (
|
|
2035
|
+
f"<script>window.__SUPABASE_CONFIG__ = {json.dumps(injected)};</script>\n"
|
|
2036
|
+
)
|
|
2037
|
+
|
|
2038
|
+
try:
|
|
2039
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
2040
|
+
mode="w", delete=False, suffix=".html", encoding="utf-8"
|
|
2041
|
+
)
|
|
2042
|
+
tmp.write(inject_script)
|
|
2043
|
+
tmp.write(content)
|
|
2044
|
+
tmp.flush()
|
|
2045
|
+
tmp.close()
|
|
2046
|
+
except Exception as e:
|
|
2047
|
+
logger.error("Error creating temp dashboard file: %s", e)
|
|
2048
|
+
return None, None
|
|
2049
|
+
|
|
2050
|
+
try:
|
|
2051
|
+
import http.server
|
|
2052
|
+
from functools import partial
|
|
2053
|
+
|
|
2054
|
+
tmp_dir = os.path.dirname(tmp.name)
|
|
2055
|
+
filename = os.path.basename(tmp.name)
|
|
2056
|
+
|
|
2057
|
+
Handler = partial(http.server.SimpleHTTPRequestHandler, directory=tmp_dir)
|
|
2058
|
+
|
|
2059
|
+
# Silence request logging
|
|
2060
|
+
RequestHandler = http.server.SimpleHTTPRequestHandler
|
|
2061
|
+
RequestHandler.log_message = lambda *a, **k: None # type: ignore # noqa
|
|
2062
|
+
|
|
2063
|
+
server = http.server.ThreadingHTTPServer(("127.0.0.1", 0), Handler)
|
|
2064
|
+
port = server.server_address[1]
|
|
2065
|
+
|
|
2066
|
+
thread = threading.Thread(target=server.serve_forever, daemon=True)
|
|
2067
|
+
thread.start()
|
|
2068
|
+
|
|
2069
|
+
def _safe_shutdown() -> None:
|
|
2070
|
+
"""Best-effort dashboard server shutdown for process exit."""
|
|
2071
|
+
try:
|
|
2072
|
+
server.shutdown()
|
|
2073
|
+
except BaseException:
|
|
2074
|
+
pass
|
|
2075
|
+
try:
|
|
2076
|
+
server.server_close()
|
|
2077
|
+
except Exception:
|
|
2078
|
+
pass
|
|
2079
|
+
|
|
2080
|
+
atexit.register(_safe_shutdown)
|
|
2081
|
+
|
|
2082
|
+
url = f"http://127.0.0.1:{port}/{filename}"
|
|
2083
|
+
|
|
2084
|
+
# Probe until ready
|
|
2085
|
+
import socket as _socket
|
|
2086
|
+
|
|
2087
|
+
for _ in range(20):
|
|
2088
|
+
try:
|
|
2089
|
+
with _socket.create_connection(("127.0.0.1", port), timeout=0.3):
|
|
2090
|
+
break
|
|
2091
|
+
except Exception:
|
|
2092
|
+
time.sleep(0.05)
|
|
2093
|
+
|
|
2094
|
+
return url, tmp.name
|
|
2095
|
+
except Exception as e:
|
|
2096
|
+
try:
|
|
2097
|
+
os.unlink(tmp.name)
|
|
2098
|
+
except Exception:
|
|
2099
|
+
pass
|
|
2100
|
+
logger.error("Failed to start local dashboard server: %s", e)
|
|
2101
|
+
return None, None
|
|
2102
|
+
|
|
2103
|
+
# ------------------------------------------------------------------
|
|
2104
|
+
# Watcher (foreground process)
|
|
2105
|
+
# ------------------------------------------------------------------
|
|
2106
|
+
def watch(
|
|
2107
|
+
self,
|
|
2108
|
+
interval: int = 5,
|
|
2109
|
+
timeout_mins: int = 0,
|
|
2110
|
+
open_dashboard: bool = False,
|
|
2111
|
+
daemon_mode: bool = False,
|
|
2112
|
+
parent_pid: Optional[int] = None,
|
|
2113
|
+
parent_name: Optional[str] = None,
|
|
2114
|
+
parent_method: Optional[str] = None,
|
|
2115
|
+
heartbeat_file: Optional[str] = None,
|
|
2116
|
+
heartbeat_grace_seconds: int = 10,
|
|
2117
|
+
) -> None:
|
|
2118
|
+
"""Run the file-watching loop (foreground).
|
|
2119
|
+
|
|
2120
|
+
Called by daemon_start. When *daemon_mode* is True the parent- PID liveness
|
|
2121
|
+
check is skipped (detached daemons have no meaningful parent).
|
|
2122
|
+
"""
|
|
2123
|
+
# Ensure file-based logging is wired so watch output goes to logs/
|
|
2124
|
+
from .logging_config import setup_collab_logging
|
|
2125
|
+
|
|
2126
|
+
setup_collab_logging(collab_dir=_COLLAB_ROOT)
|
|
2127
|
+
|
|
2128
|
+
if not daemon_mode:
|
|
2129
|
+
self._parent_pid = parent_pid or os.getppid()
|
|
2130
|
+
else:
|
|
2131
|
+
self._parent_pid = parent_pid
|
|
2132
|
+
|
|
2133
|
+
self._heartbeat_file = heartbeat_file
|
|
2134
|
+
self._heartbeat_grace_seconds = heartbeat_grace_seconds
|
|
2135
|
+
# Reset soft-skip on (re)start of the watch loop
|
|
2136
|
+
self._heartbeat_soft_skipped = False
|
|
2137
|
+
|
|
2138
|
+
# Include a short session token in PID metadata so stop requests can
|
|
2139
|
+
# target the intended watcher instance instead of relying solely on PIDs.
|
|
2140
|
+
try:
|
|
2141
|
+
token = self._get_session_token()
|
|
2142
|
+
except Exception:
|
|
2143
|
+
token = None
|
|
2144
|
+
self._write_pid(os.getpid(), parent_pid=self._parent_pid, token=token)
|
|
2145
|
+
logger.info("Wrote PID metadata to %s (PID: %d)", PID_FILE, os.getpid())
|
|
2146
|
+
|
|
2147
|
+
# Defensive: remove any stale stop-request file on startup so we don't
|
|
2148
|
+
# immediately shut down. The IDE extension or CLI may have left this
|
|
2149
|
+
# behind from a previous session.
|
|
2150
|
+
try:
|
|
2151
|
+
stop_file = _state_path(".stop_request")
|
|
2152
|
+
if os.path.exists(stop_file):
|
|
2153
|
+
os.remove(stop_file)
|
|
2154
|
+
logger.info("Removed stale stop request on watch loop entry.")
|
|
2155
|
+
except Exception:
|
|
2156
|
+
pass
|
|
2157
|
+
|
|
2158
|
+
self._register_signal_handlers()
|
|
2159
|
+
# Start a low-latency OS-level parent monitor (Windows) to detect
|
|
2160
|
+
# parent termination without relying on WMIC/tasklist polling.
|
|
2161
|
+
try:
|
|
2162
|
+
self._start_parent_monitor_thread()
|
|
2163
|
+
except Exception:
|
|
2164
|
+
# Best-effort: continue if monitor can't be started
|
|
2165
|
+
logger.debug("Parent monitor thread not started or failed to initialize")
|
|
2166
|
+
|
|
2167
|
+
# NOTE: Job Object is disabled to allow graceful shutdown
|
|
2168
|
+
# The Job Object kills the process immediately when parent dies,
|
|
2169
|
+
# preventing signal handlers and atexit from running.
|
|
2170
|
+
# We rely on parent death detection and signal handlers instead.
|
|
2171
|
+
|
|
2172
|
+
# Startup banner matching pycharm_watcher format exactly
|
|
2173
|
+
timeout_label = f"{timeout_mins}m" if timeout_mins > 0 else "disabled"
|
|
2174
|
+
logger.info("=" * 60)
|
|
2175
|
+
logger.info("Collab Locks -- Lock Client Watcher")
|
|
2176
|
+
logger.info("Developer: %s", self.developer_id)
|
|
2177
|
+
logger.info("Interval: %ds | Timeout: %s", interval, timeout_label)
|
|
2178
|
+
# Dashboard URL or command (like pycharm_watcher)
|
|
2179
|
+
dashboard_url, _ = self._prepare_dashboard_server()
|
|
2180
|
+
if dashboard_url:
|
|
2181
|
+
logger.info("Dashboard: %s", dashboard_url)
|
|
2182
|
+
else:
|
|
2183
|
+
logger.info("Dashboard: collab dashboard")
|
|
2184
|
+
# Optionally open the dashboard in the default browser when requested.
|
|
2185
|
+
if open_dashboard:
|
|
2186
|
+
try:
|
|
2187
|
+
self.dashboard()
|
|
2188
|
+
except Exception:
|
|
2189
|
+
logger.exception("Failed to open dashboard")
|
|
2190
|
+
logger.info("=" * 60)
|
|
2191
|
+
|
|
2192
|
+
# Log session token (truncated) for debugging cross-IDE token divergence
|
|
2193
|
+
session_token = self._get_session_token()
|
|
2194
|
+
logger.debug(
|
|
2195
|
+
"Session token: %s... (dev=%s, host=%s)",
|
|
2196
|
+
session_token[:8],
|
|
2197
|
+
self.developer_id,
|
|
2198
|
+
socket.gethostname(),
|
|
2199
|
+
)
|
|
2200
|
+
|
|
2201
|
+
# Initialize parent PID tracking for adoption detection (debug only)
|
|
2202
|
+
self._initial_ppid = os.getppid()
|
|
2203
|
+
logger.debug(
|
|
2204
|
+
"Initial parent PID recorded for adoption detection: %d", self._initial_ppid
|
|
2205
|
+
)
|
|
2206
|
+
|
|
2207
|
+
last_modified: set = set()
|
|
2208
|
+
last_change_time = _safe_now()
|
|
2209
|
+
last_parent_check = _safe_now()
|
|
2210
|
+
|
|
2211
|
+
# Initialize WMIC resolution failure streak counter for zombie process detection
|
|
2212
|
+
_parent_name_unknown_streak = 0
|
|
2213
|
+
_last_known_parent_name = parent_name
|
|
2214
|
+
|
|
2215
|
+
# Initial remote lock scan (logs [LOCKED] for existing locks)
|
|
2216
|
+
self._scan_remote_locks()
|
|
2217
|
+
|
|
2218
|
+
# Startup reconciliation: sync Supabase lock state with local git
|
|
2219
|
+
last_modified = self._reconcile()
|
|
2220
|
+
|
|
2221
|
+
# Short grace window after startup where a missing heartbeat should
|
|
2222
|
+
# not immediately trigger shutdown. This avoids a race where the
|
|
2223
|
+
# extension spawns the watcher and the heartbeat file is created
|
|
2224
|
+
# a few milliseconds later.
|
|
2225
|
+
startup_time = time.time()
|
|
2226
|
+
|
|
2227
|
+
# Normalize parent detection method if not provided by caller. This
|
|
2228
|
+
# ensures logs can state how the parent PID was inferred.
|
|
2229
|
+
if parent_method is None:
|
|
2230
|
+
try:
|
|
2231
|
+
# If VSCODE_PID matches the provided parent_pid, mark accordingly
|
|
2232
|
+
vspid = os.getenv("VSCODE_PID")
|
|
2233
|
+
if (
|
|
2234
|
+
vspid
|
|
2235
|
+
and vspid.isdigit()
|
|
2236
|
+
and parent_pid
|
|
2237
|
+
and int(vspid) == int(parent_pid)
|
|
2238
|
+
):
|
|
2239
|
+
parent_method = "vscode_pid"
|
|
2240
|
+
elif os.getenv("PYCHARM_HOSTED") == "1":
|
|
2241
|
+
parent_method = "pycharm_hosted"
|
|
2242
|
+
else:
|
|
2243
|
+
detected_pid, detected_method = self._get_parent_ide_pid()
|
|
2244
|
+
if detected_method:
|
|
2245
|
+
parent_method = detected_method
|
|
2246
|
+
else:
|
|
2247
|
+
parent_method = "unknown"
|
|
2248
|
+
except Exception:
|
|
2249
|
+
parent_method = "unknown"
|
|
2250
|
+
|
|
2251
|
+
try:
|
|
2252
|
+
while True:
|
|
2253
|
+
try:
|
|
2254
|
+
# Parent process liveness check every 2 seconds
|
|
2255
|
+
# (faster zombie detection)
|
|
2256
|
+
if (_safe_now() - last_parent_check).total_seconds() > 2:
|
|
2257
|
+
last_parent_check = _safe_now()
|
|
2258
|
+
|
|
2259
|
+
# Soft-stop request support: if a .stop_request file is
|
|
2260
|
+
# present, the watcher should perform a graceful
|
|
2261
|
+
# shutdown instead of being forcibly killed.
|
|
2262
|
+
try:
|
|
2263
|
+
stop_file = _state_path(".stop_request")
|
|
2264
|
+
if os.path.exists(stop_file):
|
|
2265
|
+
try:
|
|
2266
|
+
with open(stop_file, "r", encoding="utf-8") as sf:
|
|
2267
|
+
txt = sf.read().strip()
|
|
2268
|
+
except Exception:
|
|
2269
|
+
txt = ""
|
|
2270
|
+
|
|
2271
|
+
# Determine this watcher's PID (actual running pid)
|
|
2272
|
+
try:
|
|
2273
|
+
actual_pid = self._read_pid() or os.getpid()
|
|
2274
|
+
except Exception:
|
|
2275
|
+
actual_pid = os.getpid()
|
|
2276
|
+
|
|
2277
|
+
matched = False
|
|
2278
|
+
|
|
2279
|
+
# TOKEN:<token> takes precedence
|
|
2280
|
+
if txt.startswith("TOKEN:"):
|
|
2281
|
+
requested_token = txt.split(":", 1)[1]
|
|
2282
|
+
try:
|
|
2283
|
+
my_token = self._get_session_token()
|
|
2284
|
+
except Exception:
|
|
2285
|
+
my_token = None
|
|
2286
|
+
if (
|
|
2287
|
+
requested_token
|
|
2288
|
+
and my_token
|
|
2289
|
+
and requested_token == my_token
|
|
2290
|
+
):
|
|
2291
|
+
matched = True
|
|
2292
|
+
elif txt.startswith("PID:"):
|
|
2293
|
+
try:
|
|
2294
|
+
requested_pid = int(txt.split(":", 1)[1])
|
|
2295
|
+
if requested_pid in (actual_pid, os.getpid()):
|
|
2296
|
+
matched = True
|
|
2297
|
+
except Exception:
|
|
2298
|
+
matched = False
|
|
2299
|
+
else:
|
|
2300
|
+
# Backwards-compatible numeric-only payload
|
|
2301
|
+
try:
|
|
2302
|
+
if txt:
|
|
2303
|
+
requested_pid_opt = int(txt)
|
|
2304
|
+
if requested_pid_opt in (
|
|
2305
|
+
actual_pid,
|
|
2306
|
+
os.getpid(),
|
|
2307
|
+
):
|
|
2308
|
+
matched = True
|
|
2309
|
+
except Exception:
|
|
2310
|
+
matched = False
|
|
2311
|
+
|
|
2312
|
+
if matched:
|
|
2313
|
+
logger.info(
|
|
2314
|
+
(
|
|
2315
|
+
"Stop request detected (%s). "
|
|
2316
|
+
"Initiating graceful shutdown."
|
|
2317
|
+
),
|
|
2318
|
+
stop_file,
|
|
2319
|
+
)
|
|
2320
|
+
# Do NOT remove the stop_file here. The IDE
|
|
2321
|
+
# extension needs to see it after the process
|
|
2322
|
+
# exits to avoid an automatic restart.
|
|
2323
|
+
# The next watcher startup (via daemon_start)
|
|
2324
|
+
# will clean it up.
|
|
2325
|
+
self._graceful_shutdown(reason="stop_requested")
|
|
2326
|
+
return
|
|
2327
|
+
except Exception as exc:
|
|
2328
|
+
# Best-effort - don't crash the watcher over the stop file
|
|
2329
|
+
logger.debug("Stop-request polling failed: %s", exc)
|
|
2330
|
+
|
|
2331
|
+
# VSCode heartbeat support: if the heartbeat stops updating,
|
|
2332
|
+
# treat it as IDE/window termination and shut down.
|
|
2333
|
+
# NOTE: Check heartbeat even when an OS-level parent monitor
|
|
2334
|
+
# exists. Some IDE reloads may not terminate the parent PID
|
|
2335
|
+
# but will stop the extension/heartbeat; checking the
|
|
2336
|
+
# heartbeat makes the watcher more robust to fast reloads.
|
|
2337
|
+
if self._heartbeat_file:
|
|
2338
|
+
try:
|
|
2339
|
+
# DEBUG: Log heartbeat check
|
|
2340
|
+
now_ts = time.time()
|
|
2341
|
+
logger.debug(
|
|
2342
|
+
"Heartbeat check: file=%s exists=%s",
|
|
2343
|
+
self._heartbeat_file,
|
|
2344
|
+
os.path.exists(self._heartbeat_file),
|
|
2345
|
+
)
|
|
2346
|
+
|
|
2347
|
+
# If the heartbeat file is missing, allow a short
|
|
2348
|
+
# startup grace window to avoid races with the
|
|
2349
|
+
# extension creating the heartbeat immediately
|
|
2350
|
+
# after spawning the watcher.
|
|
2351
|
+
if not os.path.exists(self._heartbeat_file):
|
|
2352
|
+
if now_ts - startup_time < 3.0:
|
|
2353
|
+
logger.debug(
|
|
2354
|
+
(
|
|
2355
|
+
"Heartbeat missing but within startup "
|
|
2356
|
+
"grace (%.2fs) — ignoring"
|
|
2357
|
+
),
|
|
2358
|
+
now_ts - startup_time,
|
|
2359
|
+
)
|
|
2360
|
+
else:
|
|
2361
|
+
logger.info(
|
|
2362
|
+
(
|
|
2363
|
+
"Heartbeat file missing (%s). "
|
|
2364
|
+
"Shutting down..."
|
|
2365
|
+
),
|
|
2366
|
+
self._heartbeat_file,
|
|
2367
|
+
)
|
|
2368
|
+
self._graceful_shutdown(
|
|
2369
|
+
reason="heartbeat_missing"
|
|
2370
|
+
)
|
|
2371
|
+
return
|
|
2372
|
+
|
|
2373
|
+
# If the heartbeat file exists, ensure it has been
|
|
2374
|
+
# updated recently according to the configured
|
|
2375
|
+
# grace window.
|
|
2376
|
+
age = now_ts - os.path.getmtime(self._heartbeat_file)
|
|
2377
|
+
logger.debug(
|
|
2378
|
+
"Heartbeat age: %.1fs (threshold: %ss)",
|
|
2379
|
+
age,
|
|
2380
|
+
self._heartbeat_grace_seconds,
|
|
2381
|
+
)
|
|
2382
|
+
# Allow a small one-time soft skip when the parent
|
|
2383
|
+
# IDE process is still alive. This helps tolerate
|
|
2384
|
+
# brief extension-host hiccups (file system delays,
|
|
2385
|
+
# quick reloads) while preserving safety.
|
|
2386
|
+
soft_extra = 5.0
|
|
2387
|
+
if age > float(self._heartbeat_grace_seconds):
|
|
2388
|
+
parent_alive = bool(
|
|
2389
|
+
self._parent_pid
|
|
2390
|
+
and self._is_process_alive(self._parent_pid)
|
|
2391
|
+
)
|
|
2392
|
+
if parent_alive and not getattr(
|
|
2393
|
+
self, "_heartbeat_soft_skipped", False
|
|
2394
|
+
):
|
|
2395
|
+
logger.warning(
|
|
2396
|
+
(
|
|
2397
|
+
"Heartbeat stale (%.1fs > %ss). "
|
|
2398
|
+
"Parent alive; allowing "
|
|
2399
|
+
"one-time extra %.1fs grace."
|
|
2400
|
+
),
|
|
2401
|
+
age,
|
|
2402
|
+
self._heartbeat_grace_seconds,
|
|
2403
|
+
soft_extra,
|
|
2404
|
+
)
|
|
2405
|
+
self._heartbeat_soft_skipped = True
|
|
2406
|
+
elif (
|
|
2407
|
+
age
|
|
2408
|
+
> float(self._heartbeat_grace_seconds)
|
|
2409
|
+
+ soft_extra
|
|
2410
|
+
):
|
|
2411
|
+
# Final failure: log file contents for debugging
|
|
2412
|
+
try:
|
|
2413
|
+
with open(
|
|
2414
|
+
self._heartbeat_file,
|
|
2415
|
+
"r",
|
|
2416
|
+
encoding="utf-8",
|
|
2417
|
+
) as hf:
|
|
2418
|
+
content = hf.read().strip()
|
|
2419
|
+
logger.debug(
|
|
2420
|
+
"Heartbeat file content: %s", content
|
|
2421
|
+
)
|
|
2422
|
+
except Exception:
|
|
2423
|
+
pass
|
|
2424
|
+
logger.info(
|
|
2425
|
+
(
|
|
2426
|
+
"Heartbeat stale (%.1fs > %ss) at %s. "
|
|
2427
|
+
"Shutting down..."
|
|
2428
|
+
),
|
|
2429
|
+
age,
|
|
2430
|
+
self._heartbeat_grace_seconds,
|
|
2431
|
+
self._heartbeat_file,
|
|
2432
|
+
)
|
|
2433
|
+
self._graceful_shutdown(
|
|
2434
|
+
reason="heartbeat_stale"
|
|
2435
|
+
)
|
|
2436
|
+
return
|
|
2437
|
+
except Exception as e:
|
|
2438
|
+
logger.debug("Heartbeat check exception: %s", e)
|
|
2439
|
+
pass
|
|
2440
|
+
|
|
2441
|
+
# Parent diagnostics are useful during debugging but too noisy
|
|
2442
|
+
# for normal collab.log operation, so keep them at DEBUG.
|
|
2443
|
+
parent_alive = (
|
|
2444
|
+
self._is_process_alive(self._parent_pid)
|
|
2445
|
+
if self._parent_pid
|
|
2446
|
+
else False
|
|
2447
|
+
)
|
|
2448
|
+
parent_name = "unknown"
|
|
2449
|
+
if self._parent_pid:
|
|
2450
|
+
try:
|
|
2451
|
+
name, _ = self._get_process_info_local(self._parent_pid)
|
|
2452
|
+
if name:
|
|
2453
|
+
parent_name = name
|
|
2454
|
+
except Exception:
|
|
2455
|
+
pass
|
|
2456
|
+
|
|
2457
|
+
# Track WMIC resolution failures for zombie process detection
|
|
2458
|
+
if parent_name == "unknown":
|
|
2459
|
+
_parent_name_unknown_streak += 1
|
|
2460
|
+
# First transient failure: log at DEBUG
|
|
2461
|
+
# to avoid noisy warnings
|
|
2462
|
+
if (
|
|
2463
|
+
_last_known_parent_name
|
|
2464
|
+
and _parent_name_unknown_streak == 1
|
|
2465
|
+
):
|
|
2466
|
+
logger.debug(
|
|
2467
|
+
(
|
|
2468
|
+
"Parent PID %d name no longer resolvable "
|
|
2469
|
+
"(was '%s'). Streak: %d"
|
|
2470
|
+
),
|
|
2471
|
+
self._parent_pid,
|
|
2472
|
+
_last_known_parent_name,
|
|
2473
|
+
_parent_name_unknown_streak,
|
|
2474
|
+
)
|
|
2475
|
+
# Escalate to WARNING on the second consecutive failure
|
|
2476
|
+
elif (
|
|
2477
|
+
_last_known_parent_name
|
|
2478
|
+
and _parent_name_unknown_streak == 2
|
|
2479
|
+
):
|
|
2480
|
+
logger.warning(
|
|
2481
|
+
(
|
|
2482
|
+
"Parent PID %d name unresolvable for %d "
|
|
2483
|
+
"consecutive checks (was '%s'). May indicate "
|
|
2484
|
+
"IDE is shutting down."
|
|
2485
|
+
),
|
|
2486
|
+
self._parent_pid,
|
|
2487
|
+
_parent_name_unknown_streak,
|
|
2488
|
+
_last_known_parent_name,
|
|
2489
|
+
)
|
|
2490
|
+
else:
|
|
2491
|
+
if _parent_name_unknown_streak > 0:
|
|
2492
|
+
logger.info(
|
|
2493
|
+
(
|
|
2494
|
+
"Parent PID %d name resolved again as '%s'. "
|
|
2495
|
+
"Resetting streak."
|
|
2496
|
+
),
|
|
2497
|
+
self._parent_pid,
|
|
2498
|
+
parent_name,
|
|
2499
|
+
)
|
|
2500
|
+
_parent_name_unknown_streak = 0
|
|
2501
|
+
_last_known_parent_name = parent_name
|
|
2502
|
+
|
|
2503
|
+
# If parent is reported alive but name has been
|
|
2504
|
+
# unresolvable for 2+ checks,
|
|
2505
|
+
# treat it as a zombie process and shut down
|
|
2506
|
+
# (2 checks @ 2s interval = 4s max wait)
|
|
2507
|
+
if parent_alive and _parent_name_unknown_streak >= 2:
|
|
2508
|
+
parent_name_str = _last_known_parent_name or "unknown"
|
|
2509
|
+
logger.info(
|
|
2510
|
+
(
|
|
2511
|
+
"Parent process %s (PID: %d) confirmed "
|
|
2512
|
+
"terminated after %d unresolvable checks. "
|
|
2513
|
+
"Initiating shutdown."
|
|
2514
|
+
),
|
|
2515
|
+
parent_name_str,
|
|
2516
|
+
self._parent_pid,
|
|
2517
|
+
_parent_name_unknown_streak,
|
|
2518
|
+
)
|
|
2519
|
+
logger.info(
|
|
2520
|
+
(
|
|
2521
|
+
"Parent PID %d name unresolvable for %d "
|
|
2522
|
+
"consecutive checks — treating as terminated. "
|
|
2523
|
+
"Shutting down..."
|
|
2524
|
+
),
|
|
2525
|
+
self._parent_pid,
|
|
2526
|
+
_parent_name_unknown_streak,
|
|
2527
|
+
)
|
|
2528
|
+
# Console printing is redundant with logging; keep it in
|
|
2529
|
+
# the logs only to avoid duplicate terminal lines.
|
|
2530
|
+
self._graceful_shutdown()
|
|
2531
|
+
return
|
|
2532
|
+
|
|
2533
|
+
current_ppid = os.getppid()
|
|
2534
|
+
|
|
2535
|
+
# DEBUG: Always log the comparison
|
|
2536
|
+
logger.debug(
|
|
2537
|
+
"adoption check: initial=%d current=%d match=%s",
|
|
2538
|
+
self._initial_ppid,
|
|
2539
|
+
current_ppid,
|
|
2540
|
+
current_ppid == self._initial_ppid,
|
|
2541
|
+
)
|
|
2542
|
+
|
|
2543
|
+
# Check if adopted by a new parent (original parent died)
|
|
2544
|
+
if current_ppid != self._initial_ppid:
|
|
2545
|
+
logger.info(
|
|
2546
|
+
(
|
|
2547
|
+
"Detected adoption by new parent (was %d, now %d). "
|
|
2548
|
+
"Original parent died. Shutting down..."
|
|
2549
|
+
),
|
|
2550
|
+
self._initial_ppid,
|
|
2551
|
+
current_ppid,
|
|
2552
|
+
)
|
|
2553
|
+
# avoid printing duplicate messages to console
|
|
2554
|
+
self._graceful_shutdown()
|
|
2555
|
+
return
|
|
2556
|
+
|
|
2557
|
+
# Resolve immediate parent process name for clearer logs
|
|
2558
|
+
immediate_parent_name = None
|
|
2559
|
+
try:
|
|
2560
|
+
if current_ppid:
|
|
2561
|
+
immediate_parent_name, _ = self._get_process_info_local(
|
|
2562
|
+
current_ppid
|
|
2563
|
+
)
|
|
2564
|
+
except Exception:
|
|
2565
|
+
immediate_parent_name = None
|
|
2566
|
+
|
|
2567
|
+
# Include detection method for clarity
|
|
2568
|
+
if self._parent_pid:
|
|
2569
|
+
logger.debug(
|
|
2570
|
+
(
|
|
2571
|
+
"Parent check — detected IDE: %s (PID: %s) via=%s "
|
|
2572
|
+
"alive=%s; immediate parent: %s (PID: %d)"
|
|
2573
|
+
),
|
|
2574
|
+
parent_name or "unknown",
|
|
2575
|
+
self._parent_pid,
|
|
2576
|
+
parent_method or "unknown",
|
|
2577
|
+
parent_alive,
|
|
2578
|
+
immediate_parent_name or "unknown",
|
|
2579
|
+
current_ppid,
|
|
2580
|
+
)
|
|
2581
|
+
else:
|
|
2582
|
+
logger.debug(
|
|
2583
|
+
(
|
|
2584
|
+
"Parent check — immediate parent: %s (PID: %d) "
|
|
2585
|
+
"via=%s alive=%s"
|
|
2586
|
+
),
|
|
2587
|
+
immediate_parent_name or "unknown",
|
|
2588
|
+
current_ppid,
|
|
2589
|
+
parent_method or "unknown",
|
|
2590
|
+
parent_alive,
|
|
2591
|
+
)
|
|
2592
|
+
|
|
2593
|
+
# Check if we have a parent PID and it's dead
|
|
2594
|
+
if self._parent_pid:
|
|
2595
|
+
if not self._is_process_alive(self._parent_pid):
|
|
2596
|
+
logger.info(
|
|
2597
|
+
"Parent process (PID: %d) terminated. "
|
|
2598
|
+
"Shutting down...",
|
|
2599
|
+
self._parent_pid,
|
|
2600
|
+
)
|
|
2601
|
+
# Avoid duplicate console prints;
|
|
2602
|
+
# logging is authoritative
|
|
2603
|
+
self._graceful_shutdown()
|
|
2604
|
+
return
|
|
2605
|
+
else:
|
|
2606
|
+
# No explicit parent PID - check for orphan status
|
|
2607
|
+
current_ppid = os.getppid()
|
|
2608
|
+
# On Windows, orphaned processes may get
|
|
2609
|
+
# adopted by system processes
|
|
2610
|
+
# On Unix, they get adopted by init (PID 1)
|
|
2611
|
+
if sys.platform == "win32":
|
|
2612
|
+
# Windows: check if adopted by a low-PID system process
|
|
2613
|
+
if (
|
|
2614
|
+
current_ppid <= 4
|
|
2615
|
+
): # System, smss.exe, csrss.exe, etc.
|
|
2616
|
+
logger.info(
|
|
2617
|
+
(
|
|
2618
|
+
"Detected orphaned watcher (adopted "
|
|
2619
|
+
"by system PID: %d). "
|
|
2620
|
+
"Shutting down..."
|
|
2621
|
+
),
|
|
2622
|
+
current_ppid,
|
|
2623
|
+
)
|
|
2624
|
+
# Avoid printing to console redundantly
|
|
2625
|
+
self._graceful_shutdown()
|
|
2626
|
+
return
|
|
2627
|
+
else:
|
|
2628
|
+
# Unix: check if adopted by init
|
|
2629
|
+
if current_ppid == 1:
|
|
2630
|
+
logger.info(
|
|
2631
|
+
(
|
|
2632
|
+
"Detected orphaned watcher (adopted "
|
|
2633
|
+
"by init). Shutting down..."
|
|
2634
|
+
),
|
|
2635
|
+
)
|
|
2636
|
+
# Avoid printing to console redundantly
|
|
2637
|
+
self._graceful_shutdown()
|
|
2638
|
+
return
|
|
2639
|
+
|
|
2640
|
+
out = self._get_modified_and_unpushed_files()
|
|
2641
|
+
current_modified = set(out)
|
|
2642
|
+
|
|
2643
|
+
if current_modified != last_modified:
|
|
2644
|
+
last_change_time = _safe_now()
|
|
2645
|
+
new_files = current_modified - last_modified
|
|
2646
|
+
if new_files:
|
|
2647
|
+
logger.info("Detected local changes: %s", list(new_files))
|
|
2648
|
+
branch = self._get_current_branch()
|
|
2649
|
+
ok, failed, msg = self.acquire_multiple(
|
|
2650
|
+
list(new_files),
|
|
2651
|
+
branch_name=branch,
|
|
2652
|
+
reason="Auto-Watch Sync",
|
|
2653
|
+
)
|
|
2654
|
+
if not ok:
|
|
2655
|
+
logger.warning("⚠️ CONFLICT ALERT: %s", msg)
|
|
2656
|
+
|
|
2657
|
+
released = last_modified - current_modified
|
|
2658
|
+
if released:
|
|
2659
|
+
ok, count, _ = self.release_multiple(list(released))
|
|
2660
|
+
if ok and count > 0:
|
|
2661
|
+
logger.info("🔓 [RELEASED] %d file(s) released", count)
|
|
2662
|
+
|
|
2663
|
+
last_modified = current_modified
|
|
2664
|
+
else:
|
|
2665
|
+
# Idle timeout
|
|
2666
|
+
idle = _safe_now() - last_change_time
|
|
2667
|
+
if timeout_mins > 0 and idle > timedelta(minutes=timeout_mins):
|
|
2668
|
+
logger.info(
|
|
2669
|
+
"Watcher timed out after %dm inactivity.", timeout_mins
|
|
2670
|
+
)
|
|
2671
|
+
break
|
|
2672
|
+
|
|
2673
|
+
time.sleep(interval)
|
|
2674
|
+
except Exception as e:
|
|
2675
|
+
logger.error("Error in watcher loop: %s", e, exc_info=True)
|
|
2676
|
+
time.sleep(interval)
|
|
2677
|
+
except KeyboardInterrupt:
|
|
2678
|
+
logger.info("Watcher stopped by user.")
|
|
2679
|
+
finally:
|
|
2680
|
+
self._graceful_shutdown()
|
|
2681
|
+
|
|
2682
|
+
# ------------------------------------------------------------------
|
|
2683
|
+
# Internal helpers
|
|
2684
|
+
# ------------------------------------------------------------------
|
|
2685
|
+
def _register_signal_handlers(self) -> None:
|
|
2686
|
+
"""Register cleanup handlers for clean shutdown."""
|
|
2687
|
+
logger.debug("_register_signal_handlers called")
|
|
2688
|
+
|
|
2689
|
+
if os.getenv("COLLAB_TEST_MODE") != "1":
|
|
2690
|
+
logger.debug("Registering atexit handler")
|
|
2691
|
+
atexit.register(self._graceful_shutdown)
|
|
2692
|
+
|
|
2693
|
+
def _handle_signal(signum, frame):
|
|
2694
|
+
logger.debug("Signal handler called: signum=%d", signum)
|
|
2695
|
+
logger.info("Received signal %d, shutting down...", signum)
|
|
2696
|
+
try:
|
|
2697
|
+
self._graceful_shutdown(reason=f"signal_{signum}")
|
|
2698
|
+
except Exception:
|
|
2699
|
+
logger.exception("Error during graceful shutdown for signal %s", signum)
|
|
2700
|
+
sys.exit(0)
|
|
2701
|
+
|
|
2702
|
+
if sys.platform != "win32":
|
|
2703
|
+
logger.debug("Registering SIGTERM handler")
|
|
2704
|
+
signal.signal(signal.SIGTERM, _handle_signal)
|
|
2705
|
+
logger.debug("Registering SIGINT handler")
|
|
2706
|
+
signal.signal(signal.SIGINT, _handle_signal)
|
|
2707
|
+
|
|
2708
|
+
# Windows-specific handlers: SIGBREAK and a console control handler.
|
|
2709
|
+
# These improve the chance that we run graceful shutdown when the
|
|
2710
|
+
# extension host or window closes (CTRL_CLOSE_EVENT, SHUTDOWN, etc.).
|
|
2711
|
+
if sys.platform == "win32":
|
|
2712
|
+
if hasattr(signal, "SIGBREAK"):
|
|
2713
|
+
try:
|
|
2714
|
+
logger.debug("Registering SIGBREAK handler")
|
|
2715
|
+
signal.signal(signal.SIGBREAK, _handle_signal)
|
|
2716
|
+
except Exception as _e:
|
|
2717
|
+
logger.debug("Failed to register SIGBREAK handler: %s", _e)
|
|
2718
|
+
|
|
2719
|
+
try:
|
|
2720
|
+
import ctypes
|
|
2721
|
+
from ctypes import wintypes
|
|
2722
|
+
|
|
2723
|
+
HandlerRoutine = ctypes.WINFUNCTYPE(wintypes.BOOL, wintypes.DWORD)
|
|
2724
|
+
|
|
2725
|
+
def _console_handler(dwCtrlType):
|
|
2726
|
+
try:
|
|
2727
|
+
logger.debug("Console control event: %s", dwCtrlType)
|
|
2728
|
+
# Attempt graceful shutdown
|
|
2729
|
+
try:
|
|
2730
|
+
self._graceful_shutdown(reason=f"console_ctrl_{dwCtrlType}")
|
|
2731
|
+
except Exception:
|
|
2732
|
+
logger.exception(
|
|
2733
|
+
"Exception during graceful shutdown in console handler"
|
|
2734
|
+
)
|
|
2735
|
+
except Exception:
|
|
2736
|
+
logger.exception("Exception in console handler")
|
|
2737
|
+
return True
|
|
2738
|
+
|
|
2739
|
+
ctypes.windll.kernel32.SetConsoleCtrlHandler(
|
|
2740
|
+
HandlerRoutine(_console_handler), True
|
|
2741
|
+
)
|
|
2742
|
+
logger.debug("Registered Windows console ctrl handler")
|
|
2743
|
+
except Exception as _e:
|
|
2744
|
+
logger.debug("Failed to register console ctrl handler: %s", _e)
|
|
2745
|
+
|
|
2746
|
+
logger.debug("Signal handlers registered")
|
|
2747
|
+
|
|
2748
|
+
def _start_parent_monitor_thread(self) -> None:
|
|
2749
|
+
"""Start a background thread that waits on the parent process handle (Windows).
|
|
2750
|
+
|
|
2751
|
+
This uses OpenProcess + WaitForSingleObject so we can be notified the instant
|
|
2752
|
+
the parent process exits, avoiding fragile polling or WMIC queries. The thread
|
|
2753
|
+
is daemonized so it won't block shutdown.
|
|
2754
|
+
"""
|
|
2755
|
+
if sys.platform != "win32":
|
|
2756
|
+
return
|
|
2757
|
+
parent = getattr(self, "_parent_pid", None)
|
|
2758
|
+
if not parent:
|
|
2759
|
+
return
|
|
2760
|
+
try:
|
|
2761
|
+
import ctypes
|
|
2762
|
+
|
|
2763
|
+
# SYNCHRONIZE | PROCESS_QUERY_LIMITED_INFORMATION
|
|
2764
|
+
SYNCHRONIZE = 0x00100000
|
|
2765
|
+
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
|
|
2766
|
+
desired_access = SYNCHRONIZE | PROCESS_QUERY_LIMITED_INFORMATION
|
|
2767
|
+
|
|
2768
|
+
handle = ctypes.windll.kernel32.OpenProcess(
|
|
2769
|
+
desired_access, False, int(parent)
|
|
2770
|
+
)
|
|
2771
|
+
if not handle:
|
|
2772
|
+
try:
|
|
2773
|
+
err = ctypes.windll.kernel32.GetLastError()
|
|
2774
|
+
except Exception:
|
|
2775
|
+
err = None
|
|
2776
|
+
logger.debug(
|
|
2777
|
+
"OpenProcess failed for parent PID %s: err=%s", parent, err
|
|
2778
|
+
)
|
|
2779
|
+
return
|
|
2780
|
+
|
|
2781
|
+
def _waiter(hndl, ppid):
|
|
2782
|
+
try:
|
|
2783
|
+
INFINITE = 0xFFFFFFFF
|
|
2784
|
+
res = ctypes.windll.kernel32.WaitForSingleObject(hndl, INFINITE)
|
|
2785
|
+
logger.info(
|
|
2786
|
+
(
|
|
2787
|
+
"Parent PID %s handle signaled "
|
|
2788
|
+
"(WaitForSingleObject returned %s). "
|
|
2789
|
+
"Initiating shutdown."
|
|
2790
|
+
),
|
|
2791
|
+
ppid,
|
|
2792
|
+
res,
|
|
2793
|
+
)
|
|
2794
|
+
try:
|
|
2795
|
+
ctypes.windll.kernel32.CloseHandle(hndl)
|
|
2796
|
+
except Exception as exc:
|
|
2797
|
+
logger.debug("CloseHandle failed for parent monitor: %s", exc)
|
|
2798
|
+
# mark monitor as stopped to avoid races
|
|
2799
|
+
self._parent_monitor_started = False
|
|
2800
|
+
self._parent_monitor_handle = None
|
|
2801
|
+
self._parent_monitor_thread = None
|
|
2802
|
+
# Trigger graceful shutdown with a reason
|
|
2803
|
+
try:
|
|
2804
|
+
self._graceful_shutdown(reason=f"parent_exit_{ppid}")
|
|
2805
|
+
except Exception:
|
|
2806
|
+
logger.exception("Error while shutting down after parent exit")
|
|
2807
|
+
except Exception as e:
|
|
2808
|
+
logger.debug("Parent monitor waiter failed: %s", e)
|
|
2809
|
+
|
|
2810
|
+
th = threading.Thread(
|
|
2811
|
+
target=_waiter, args=(handle, int(parent)), daemon=True
|
|
2812
|
+
)
|
|
2813
|
+
# Record diagnostics before starting
|
|
2814
|
+
self._parent_monitor_handle = handle
|
|
2815
|
+
self._parent_monitor_started = True
|
|
2816
|
+
self._parent_monitor_thread = th
|
|
2817
|
+
logger.info("Parent monitor listening for parent PID %s", parent)
|
|
2818
|
+
th.start()
|
|
2819
|
+
except Exception as e:
|
|
2820
|
+
logger.debug("Failed to start parent monitor thread: %s", e)
|
|
2821
|
+
self._parent_monitor_started = False
|
|
2822
|
+
self._parent_monitor_handle = None
|
|
2823
|
+
self._parent_monitor_thread = None
|
|
2824
|
+
|
|
2825
|
+
def _graceful_shutdown(self, reason: Optional[str] = None) -> None:
|
|
2826
|
+
"""Cleanup the local daemon state on shutdown.
|
|
2827
|
+
|
|
2828
|
+
IMPORTANT: This handler strictly DOES NOT release any Supabase locks.
|
|
2829
|
+
Locks are preserved to ensure they persist across IDE restarts and
|
|
2830
|
+
terminal sessions. They are only released automatically during 'git push'
|
|
2831
|
+
(via pre-push hook) or manual release-all.
|
|
2832
|
+
"""
|
|
2833
|
+
logger.debug("_graceful_shutdown called (reason=%s)", reason)
|
|
2834
|
+
|
|
2835
|
+
# Flush immediately so we see this even if process dies
|
|
2836
|
+
for handler in logging.getLogger().handlers:
|
|
2837
|
+
try:
|
|
2838
|
+
handler.flush()
|
|
2839
|
+
except Exception:
|
|
2840
|
+
pass
|
|
2841
|
+
|
|
2842
|
+
if getattr(self, "_shutdown_done", False):
|
|
2843
|
+
logger.debug("shutdown already done, returning (reason=%s)", reason)
|
|
2844
|
+
return
|
|
2845
|
+
self._shutdown_done = True
|
|
2846
|
+
|
|
2847
|
+
# Never touch real Supabase OR local PID file in test mode
|
|
2848
|
+
if os.getenv("COLLAB_TEST_MODE") == "1":
|
|
2849
|
+
logger.debug("COLLAB_TEST_MODE=1 - skipping real shutdown actions")
|
|
2850
|
+
return
|
|
2851
|
+
|
|
2852
|
+
# Log shutdown start (clear, stepwise messages)
|
|
2853
|
+
if reason:
|
|
2854
|
+
logger.info(
|
|
2855
|
+
(
|
|
2856
|
+
"Shutdown initiated — received shutdown signal (%s). "
|
|
2857
|
+
"Beginning graceful shutdown."
|
|
2858
|
+
),
|
|
2859
|
+
reason,
|
|
2860
|
+
)
|
|
2861
|
+
else:
|
|
2862
|
+
logger.info(
|
|
2863
|
+
(
|
|
2864
|
+
"Shutdown initiated — received shutdown signal. "
|
|
2865
|
+
"Beginning graceful shutdown."
|
|
2866
|
+
)
|
|
2867
|
+
)
|
|
2868
|
+
|
|
2869
|
+
# Flush again
|
|
2870
|
+
for handler in logging.getLogger().handlers:
|
|
2871
|
+
try:
|
|
2872
|
+
handler.flush()
|
|
2873
|
+
except Exception:
|
|
2874
|
+
pass
|
|
2875
|
+
|
|
2876
|
+
# Log kept locks (matching pycharm_watcher format)
|
|
2877
|
+
n_kept = 0
|
|
2878
|
+
try:
|
|
2879
|
+
active_locks = self.active()
|
|
2880
|
+
logger.debug(
|
|
2881
|
+
"Graceful shutdown: fetched %d active locks from Supabase. "
|
|
2882
|
+
"My dev ID: %s",
|
|
2883
|
+
len(active_locks),
|
|
2884
|
+
self.developer_id,
|
|
2885
|
+
)
|
|
2886
|
+
my_locks = [
|
|
2887
|
+
lk for lk in active_locks if lk.get("developer_id") == self.developer_id
|
|
2888
|
+
]
|
|
2889
|
+
for lock in sorted(my_locks, key=lambda x: x.get("file_path", "")):
|
|
2890
|
+
fp = lock.get("file_path", "")
|
|
2891
|
+
if fp:
|
|
2892
|
+
n_kept += 1
|
|
2893
|
+
logger.info(
|
|
2894
|
+
"🔒 [PRESERVED] %s — still has local edits, lock preserved", fp
|
|
2895
|
+
)
|
|
2896
|
+
except Exception as e:
|
|
2897
|
+
logger.error(
|
|
2898
|
+
"Exception while enumerating active locks during shutdown: %s", e
|
|
2899
|
+
)
|
|
2900
|
+
|
|
2901
|
+
logger.info(
|
|
2902
|
+
"Shutdown complete. Preserved %d lock(s); released 0 lock(s).", n_kept
|
|
2903
|
+
)
|
|
2904
|
+
# Emit a concise stdout marker for the extension to detect.
|
|
2905
|
+
try:
|
|
2906
|
+
print(
|
|
2907
|
+
f"Shutdown complete. Preserved {n_kept} lock(s); released 0 lock(s).",
|
|
2908
|
+
flush=True,
|
|
2909
|
+
)
|
|
2910
|
+
except Exception:
|
|
2911
|
+
pass
|
|
2912
|
+
|
|
2913
|
+
# Write shutdown marker early into the per-workspace state dir so
|
|
2914
|
+
# external tools can detect shutdown without placing transient files
|
|
2915
|
+
# inside the repository working tree.
|
|
2916
|
+
try:
|
|
2917
|
+
shutdown_file = _state_path(".shutdown_complete")
|
|
2918
|
+
with open(shutdown_file, "w") as f:
|
|
2919
|
+
f.write(f"{n_kept}\n")
|
|
2920
|
+
f.flush()
|
|
2921
|
+
try:
|
|
2922
|
+
os.fsync(f.fileno())
|
|
2923
|
+
except Exception:
|
|
2924
|
+
pass
|
|
2925
|
+
logger.info("Wrote shutdown marker to %s", shutdown_file)
|
|
2926
|
+
# Remove any stray shutdown/startup markers that may exist in the
|
|
2927
|
+
# repository runtime root from older runs.
|
|
2928
|
+
try:
|
|
2929
|
+
repo_shutdown = os.path.join(_COLLAB_ROOT, ".shutdown_complete")
|
|
2930
|
+
repo_summary = os.path.join(_COLLAB_ROOT, ".startup_summary.json")
|
|
2931
|
+
for p in (repo_shutdown, repo_summary):
|
|
2932
|
+
try:
|
|
2933
|
+
if os.path.exists(p):
|
|
2934
|
+
os.remove(p)
|
|
2935
|
+
logger.info("Removed stray runtime marker in repo: %s", p)
|
|
2936
|
+
except Exception as _e:
|
|
2937
|
+
logger.debug("Failed to remove stray repo marker %s: %s", p, _e)
|
|
2938
|
+
except Exception:
|
|
2939
|
+
pass
|
|
2940
|
+
except Exception as _e:
|
|
2941
|
+
logger.debug("Failed to write shutdown marker early: %s", _e)
|
|
2942
|
+
|
|
2943
|
+
# Remove PID file with logging (matching pycharm_watcher)
|
|
2944
|
+
for _attempt in range(3):
|
|
2945
|
+
try:
|
|
2946
|
+
if os.path.exists(PID_FILE):
|
|
2947
|
+
os.remove(PID_FILE)
|
|
2948
|
+
logger.info("Removed PID file: %s", PID_FILE)
|
|
2949
|
+
break
|
|
2950
|
+
except OSError:
|
|
2951
|
+
if _attempt < 2:
|
|
2952
|
+
time.sleep(0.1)
|
|
2953
|
+
pass
|
|
2954
|
+
|
|
2955
|
+
# Flush all logging handlers to ensure shutdown logs are written
|
|
2956
|
+
# Flush handlers attached to the 'collab' logger (file handlers)
|
|
2957
|
+
try:
|
|
2958
|
+
collab_logger = logging.getLogger("collab")
|
|
2959
|
+
for handler in getattr(collab_logger, "handlers", []):
|
|
2960
|
+
try:
|
|
2961
|
+
handler.flush()
|
|
2962
|
+
except Exception:
|
|
2963
|
+
pass
|
|
2964
|
+
except Exception:
|
|
2965
|
+
pass
|
|
2966
|
+
|
|
2967
|
+
# Also flush and fsync file-backed handlers as a best-effort so that
|
|
2968
|
+
# logs are persisted to disk even if the parent IDE reloads quickly.
|
|
2969
|
+
try:
|
|
2970
|
+
# First, handle collab-specific handlers
|
|
2971
|
+
collab_logger = logging.getLogger("collab")
|
|
2972
|
+
for handler in getattr(collab_logger, "handlers", []):
|
|
2973
|
+
try:
|
|
2974
|
+
handler.flush()
|
|
2975
|
+
except Exception:
|
|
2976
|
+
pass
|
|
2977
|
+
try:
|
|
2978
|
+
stream = getattr(handler, "stream", None)
|
|
2979
|
+
if stream and hasattr(stream, "fileno"):
|
|
2980
|
+
try:
|
|
2981
|
+
os.fsync(stream.fileno())
|
|
2982
|
+
except Exception:
|
|
2983
|
+
pass
|
|
2984
|
+
except Exception:
|
|
2985
|
+
pass
|
|
2986
|
+
except Exception:
|
|
2987
|
+
pass
|
|
2988
|
+
|
|
2989
|
+
# Then root handlers
|
|
2990
|
+
try:
|
|
2991
|
+
for handler in logging.getLogger().handlers:
|
|
2992
|
+
try:
|
|
2993
|
+
handler.flush()
|
|
2994
|
+
except Exception:
|
|
2995
|
+
pass
|
|
2996
|
+
try:
|
|
2997
|
+
stream = getattr(handler, "stream", None)
|
|
2998
|
+
if stream and hasattr(stream, "fileno"):
|
|
2999
|
+
try:
|
|
3000
|
+
os.fsync(stream.fileno())
|
|
3001
|
+
except Exception:
|
|
3002
|
+
pass
|
|
3003
|
+
except Exception:
|
|
3004
|
+
pass
|
|
3005
|
+
except Exception:
|
|
3006
|
+
pass
|
|
3007
|
+
|
|
3008
|
+
# Ensure all logging resources are flushed and closed before exit.
|
|
3009
|
+
try:
|
|
3010
|
+
logging.shutdown()
|
|
3011
|
+
except Exception:
|
|
3012
|
+
pass
|
|
3013
|
+
|
|
3014
|
+
# Ensure stdout is flushed for console output
|
|
3015
|
+
try:
|
|
3016
|
+
sys.stdout.flush()
|
|
3017
|
+
except Exception:
|
|
3018
|
+
pass
|
|
3019
|
+
|
|
3020
|
+
# Small delay to ensure file writes complete before process exit
|
|
3021
|
+
time.sleep(0.5)
|
|
3022
|
+
|
|
3023
|
+
def _reconcile(self) -> set:
|
|
3024
|
+
"""Sync Supabase locks with local git status and upstream state."""
|
|
3025
|
+
try:
|
|
3026
|
+
modified_files = self._get_modified_and_unpushed_files()
|
|
3027
|
+
git_modified = set(modified_files)
|
|
3028
|
+
except Exception as e:
|
|
3029
|
+
logger.error("Error identifying modified files (skipping reconcile): %s", e)
|
|
3030
|
+
# DANGEROUS: Returning set() here would cause it to think we should
|
|
3031
|
+
# release EVERYTHING we currently have. Instead, return our currently
|
|
3032
|
+
# known locks so reconciliation essentially becomes a no-op for this cycle.
|
|
3033
|
+
try:
|
|
3034
|
+
active = self.active()
|
|
3035
|
+
return {
|
|
3036
|
+
lk["file_path"]
|
|
3037
|
+
for lk in active
|
|
3038
|
+
if lk.get("developer_id") == self.developer_id
|
|
3039
|
+
}
|
|
3040
|
+
except Exception:
|
|
3041
|
+
return set()
|
|
3042
|
+
|
|
3043
|
+
try:
|
|
3044
|
+
active = self.active()
|
|
3045
|
+
my_locks = {
|
|
3046
|
+
lk["file_path"]
|
|
3047
|
+
for lk in active
|
|
3048
|
+
if lk.get("developer_id") == self.developer_id
|
|
3049
|
+
}
|
|
3050
|
+
# Build lock_map for token checking
|
|
3051
|
+
lock_map: dict[str, dict] = {}
|
|
3052
|
+
for lk in active:
|
|
3053
|
+
if lk.get("developer_id") == self.developer_id:
|
|
3054
|
+
fp = lk.get("file_path", "")
|
|
3055
|
+
if fp:
|
|
3056
|
+
lock_map[fp] = lk
|
|
3057
|
+
except Exception as e:
|
|
3058
|
+
logger.error("Error getting Supabase locks: %s", e)
|
|
3059
|
+
return git_modified
|
|
3060
|
+
|
|
3061
|
+
# Calculate lock categories
|
|
3062
|
+
stale = my_locks - git_modified
|
|
3063
|
+
missing = git_modified - my_locks
|
|
3064
|
+
still_valid = my_locks & git_modified
|
|
3065
|
+
|
|
3066
|
+
# Count categories for summary
|
|
3067
|
+
current_token = self._get_session_token()
|
|
3068
|
+
resumed_locks = []
|
|
3069
|
+
refreshed_locks = []
|
|
3070
|
+
multi_session_locks = []
|
|
3071
|
+
|
|
3072
|
+
for fp in sorted(still_valid):
|
|
3073
|
+
lock = lock_map.get(fp, {})
|
|
3074
|
+
stored_token = lock.get("lock_token", "")
|
|
3075
|
+
|
|
3076
|
+
if stored_token and stored_token != current_token:
|
|
3077
|
+
if self._is_same_machine_token(stored_token):
|
|
3078
|
+
resumed_locks.append(fp)
|
|
3079
|
+
else:
|
|
3080
|
+
multi_session_locks.append(fp)
|
|
3081
|
+
elif stored_token == current_token:
|
|
3082
|
+
resumed_locks.append(fp)
|
|
3083
|
+
else:
|
|
3084
|
+
refreshed_locks.append(fp)
|
|
3085
|
+
|
|
3086
|
+
# Calculate counts for summary
|
|
3087
|
+
n_released = len(stale)
|
|
3088
|
+
n_newly_locked = len(missing)
|
|
3089
|
+
n_readopted = len(resumed_locks)
|
|
3090
|
+
n_refreshed = len(refreshed_locks)
|
|
3091
|
+
n_multi = len(multi_session_locks)
|
|
3092
|
+
|
|
3093
|
+
# Only log start message if there's work to do
|
|
3094
|
+
if any([n_released, n_newly_locked, n_readopted, n_refreshed, n_multi]):
|
|
3095
|
+
logger.debug("Starting lock reconciliation...")
|
|
3096
|
+
|
|
3097
|
+
# Process stale locks
|
|
3098
|
+
if stale:
|
|
3099
|
+
for fp in sorted(stale):
|
|
3100
|
+
logger.info(
|
|
3101
|
+
"🔓 [STALE-RELEASED] %s — locked but file is now clean, releasing",
|
|
3102
|
+
fp,
|
|
3103
|
+
)
|
|
3104
|
+
self.release_multiple(list(stale))
|
|
3105
|
+
|
|
3106
|
+
# Process RESUMED locks: use direct table update (preserves acquired_at)
|
|
3107
|
+
# This prevents the timer from resetting when switching IDEs
|
|
3108
|
+
if resumed_locks:
|
|
3109
|
+
for fp in sorted(resumed_locks):
|
|
3110
|
+
logger.info("🔒 [RESUMED] %s — lock re-adopted from this machine", fp)
|
|
3111
|
+
try:
|
|
3112
|
+
# Use direct update to ONLY change lock_token, NOT acquired_at
|
|
3113
|
+
client = self._client
|
|
3114
|
+
assert client is not None
|
|
3115
|
+
client.table("file_locks").update({"lock_token": current_token}).eq(
|
|
3116
|
+
"file_path", fp
|
|
3117
|
+
).eq("developer_id", self.developer_id).execute()
|
|
3118
|
+
except Exception:
|
|
3119
|
+
logger.debug("Failed to update lock_token for %s (non-fatal)", fp)
|
|
3120
|
+
|
|
3121
|
+
# Process multi-session locks (different machine) - just log, don't touch
|
|
3122
|
+
if multi_session_locks:
|
|
3123
|
+
for fp in sorted(multi_session_locks):
|
|
3124
|
+
lock = lock_map.get(fp, {})
|
|
3125
|
+
stored_token = lock.get("lock_token", "")
|
|
3126
|
+
logger.warning(
|
|
3127
|
+
(
|
|
3128
|
+
"⚠️ [MULTI-SESSION] %s — token mismatch (stored: %s..., "
|
|
3129
|
+
"current: %s...). "
|
|
3130
|
+
"Lock left untouched — use 'collab release-all' "
|
|
3131
|
+
"if stale."
|
|
3132
|
+
),
|
|
3133
|
+
fp,
|
|
3134
|
+
stored_token[:8] if stored_token else "none",
|
|
3135
|
+
current_token[:8],
|
|
3136
|
+
)
|
|
3137
|
+
|
|
3138
|
+
# Process REFRESHED locks (no stored token) - use acquire RPC
|
|
3139
|
+
if refreshed_locks:
|
|
3140
|
+
for fp in sorted(refreshed_locks):
|
|
3141
|
+
logger.info("🔒 [REFRESHED] %s — token refreshed", fp)
|
|
3142
|
+
branch = self._get_current_branch()
|
|
3143
|
+
self.acquire_multiple(
|
|
3144
|
+
list(refreshed_locks), branch_name=branch, reason="Auto-Watch Sync"
|
|
3145
|
+
)
|
|
3146
|
+
|
|
3147
|
+
# Process NEW locks (missing) - use acquire RPC
|
|
3148
|
+
if missing:
|
|
3149
|
+
branch = self._get_current_branch()
|
|
3150
|
+
self.acquire_multiple(
|
|
3151
|
+
list(missing), branch_name=branch, reason="Auto-Watch Sync"
|
|
3152
|
+
)
|
|
3153
|
+
|
|
3154
|
+
# Always log startup reconciliation summary for notification detection
|
|
3155
|
+
# Ensure a clear stdout marker so the VS Code extension (which
|
|
3156
|
+
# monitors the watcher's stdout) reliably detects startup completion.
|
|
3157
|
+
print("Startup reconciliation complete.")
|
|
3158
|
+
logger.info("Startup reconciliation complete.")
|
|
3159
|
+
if n_readopted:
|
|
3160
|
+
logger.info(" Re-adopted: %d lock(s)", n_readopted)
|
|
3161
|
+
if n_released:
|
|
3162
|
+
logger.info(" Stale released: %d lock(s)", n_released)
|
|
3163
|
+
if n_newly_locked:
|
|
3164
|
+
logger.info(" Newly locked: %d file(s)", n_newly_locked)
|
|
3165
|
+
if n_multi:
|
|
3166
|
+
logger.info(" Conflicts: %d file(s)", n_multi)
|
|
3167
|
+
if n_refreshed:
|
|
3168
|
+
logger.info(" Token refresh: %d lock(s)", n_refreshed)
|
|
3169
|
+
|
|
3170
|
+
# Write startup summary to file for VSCode extension notification
|
|
3171
|
+
# Skip if silencing is requested (e.g., during tests)
|
|
3172
|
+
if os.environ.get("COLLAB_SILENT_DAEMON"):
|
|
3173
|
+
logger.debug("Skipping startup summary (COLLAB_SILENT_DAEMON set)")
|
|
3174
|
+
return git_modified
|
|
3175
|
+
|
|
3176
|
+
try:
|
|
3177
|
+
import json
|
|
3178
|
+
|
|
3179
|
+
summary_file = _state_path(".startup_summary.json")
|
|
3180
|
+
summary_data = {
|
|
3181
|
+
"readopted": n_readopted,
|
|
3182
|
+
"stale_released": n_released,
|
|
3183
|
+
"newly_locked": n_newly_locked,
|
|
3184
|
+
"conflicts": n_multi,
|
|
3185
|
+
"refreshed": n_refreshed,
|
|
3186
|
+
"timestamp": time.time(),
|
|
3187
|
+
}
|
|
3188
|
+
with open(summary_file, "w") as f:
|
|
3189
|
+
json.dump(summary_data, f)
|
|
3190
|
+
|
|
3191
|
+
# For backward compatibility with older extension instances that
|
|
3192
|
+
# expect `.startup_summary.json` inside the repository root,
|
|
3193
|
+
# also write a short-lived copy there. Schedule its removal after
|
|
3194
|
+
# a short grace period so the git tree is not polluted long-term.
|
|
3195
|
+
try:
|
|
3196
|
+
repo_summary = os.path.join(_COLLAB_ROOT, ".startup_summary.json")
|
|
3197
|
+
try:
|
|
3198
|
+
with open(repo_summary, "w") as rf:
|
|
3199
|
+
json.dump(summary_data, rf)
|
|
3200
|
+
except Exception as _e:
|
|
3201
|
+
logger.debug("Failed to write repo startup summary: %s", _e)
|
|
3202
|
+
|
|
3203
|
+
def _cleanup_repo_markers(paths, delay=30):
|
|
3204
|
+
def _worker():
|
|
3205
|
+
try:
|
|
3206
|
+
time.sleep(delay)
|
|
3207
|
+
for p in paths:
|
|
3208
|
+
try:
|
|
3209
|
+
if os.path.exists(p):
|
|
3210
|
+
os.remove(p)
|
|
3211
|
+
_emit_log_resilient(
|
|
3212
|
+
logger,
|
|
3213
|
+
logging.INFO,
|
|
3214
|
+
"Removed stray repo marker: %s",
|
|
3215
|
+
p,
|
|
3216
|
+
)
|
|
3217
|
+
except Exception:
|
|
3218
|
+
_emit_log_resilient(
|
|
3219
|
+
logger,
|
|
3220
|
+
logging.DEBUG,
|
|
3221
|
+
"Failed to remove stray repo marker: %s",
|
|
3222
|
+
p,
|
|
3223
|
+
)
|
|
3224
|
+
except Exception:
|
|
3225
|
+
pass
|
|
3226
|
+
|
|
3227
|
+
th = threading.Thread(target=_worker, daemon=True)
|
|
3228
|
+
th.start()
|
|
3229
|
+
|
|
3230
|
+
# Schedule removal of both startup and shutdown markers (if present)
|
|
3231
|
+
repo_shutdown = os.path.join(_COLLAB_ROOT, ".shutdown_complete")
|
|
3232
|
+
_cleanup_repo_markers([repo_summary, repo_shutdown], delay=30)
|
|
3233
|
+
except Exception:
|
|
3234
|
+
pass
|
|
3235
|
+
except Exception:
|
|
3236
|
+
pass
|
|
3237
|
+
|
|
3238
|
+
return git_modified
|
|
3239
|
+
|
|
3240
|
+
@staticmethod
|
|
3241
|
+
def _run_git_status() -> str:
|
|
3242
|
+
"""Run git status --porcelain and return output."""
|
|
3243
|
+
args = ["git", "status", "--porcelain"]
|
|
3244
|
+
if sys.platform == "win32":
|
|
3245
|
+
return (
|
|
3246
|
+
subprocess.check_output(
|
|
3247
|
+
args, stderr=subprocess.DEVNULL, creationflags=0x08000000
|
|
3248
|
+
)
|
|
3249
|
+
.decode()
|
|
3250
|
+
.strip()
|
|
3251
|
+
)
|
|
3252
|
+
else:
|
|
3253
|
+
return (
|
|
3254
|
+
subprocess.check_output(args, stderr=subprocess.DEVNULL)
|
|
3255
|
+
.decode()
|
|
3256
|
+
.strip()
|
|
3257
|
+
)
|
|
3258
|
+
|
|
3259
|
+
def _get_modified_and_unpushed_files(self) -> List[str]:
|
|
3260
|
+
"""Return files that are either dirty (status) or have unpushed commits
|
|
3261
|
+
(diff)."""
|
|
3262
|
+
modified = set()
|
|
3263
|
+
|
|
3264
|
+
# 1. Get Dirty/Staged files
|
|
3265
|
+
try:
|
|
3266
|
+
out = self._run_git_status()
|
|
3267
|
+
if out:
|
|
3268
|
+
for line in out.splitlines():
|
|
3269
|
+
if len(line) > 3:
|
|
3270
|
+
p = self._normalize_file_path(self._parse_git_status_path(line))
|
|
3271
|
+
if p.endswith("/"):
|
|
3272
|
+
continue
|
|
3273
|
+
if not self._should_ignore_path(p):
|
|
3274
|
+
modified.add(p)
|
|
3275
|
+
except Exception as e:
|
|
3276
|
+
logger.debug("Git status failed: %s", e)
|
|
3277
|
+
|
|
3278
|
+
# 2. Get Unpushed Files (diff against upstream)
|
|
3279
|
+
try:
|
|
3280
|
+
# Check if upstream exists
|
|
3281
|
+
args_rev = [
|
|
3282
|
+
"git",
|
|
3283
|
+
"rev-parse",
|
|
3284
|
+
"--abbrev-ref",
|
|
3285
|
+
"--symbolic-full-name",
|
|
3286
|
+
"@{u}",
|
|
3287
|
+
]
|
|
3288
|
+
if sys.platform == "win32":
|
|
3289
|
+
subprocess.check_output(
|
|
3290
|
+
args_rev, stderr=subprocess.DEVNULL, creationflags=0x08000000
|
|
3291
|
+
)
|
|
3292
|
+
else:
|
|
3293
|
+
subprocess.check_output(args_rev, stderr=subprocess.DEVNULL)
|
|
3294
|
+
|
|
3295
|
+
# If upstream exists, get names/statuses of files that differ from it.
|
|
3296
|
+
# Keep deleted paths as "in progress" so lock ownership remains
|
|
3297
|
+
# visible in the dashboard until explicit release.
|
|
3298
|
+
args_diff = ["git", "diff", "--name-status", "@{u}..HEAD"]
|
|
3299
|
+
if sys.platform == "win32":
|
|
3300
|
+
diff_out = (
|
|
3301
|
+
subprocess.check_output(
|
|
3302
|
+
args_diff, stderr=subprocess.DEVNULL, creationflags=0x08000000
|
|
3303
|
+
)
|
|
3304
|
+
.decode()
|
|
3305
|
+
.strip()
|
|
3306
|
+
)
|
|
3307
|
+
else:
|
|
3308
|
+
diff_out = (
|
|
3309
|
+
subprocess.check_output(args_diff, stderr=subprocess.DEVNULL)
|
|
3310
|
+
.decode()
|
|
3311
|
+
.strip()
|
|
3312
|
+
)
|
|
3313
|
+
|
|
3314
|
+
if diff_out:
|
|
3315
|
+
for line in diff_out.splitlines():
|
|
3316
|
+
raw = line.strip()
|
|
3317
|
+
if not raw:
|
|
3318
|
+
continue
|
|
3319
|
+
parts = raw.split(None, 1)
|
|
3320
|
+
if len(parts) != 2:
|
|
3321
|
+
continue
|
|
3322
|
+
status, payload = parts
|
|
3323
|
+
payload = payload.strip()
|
|
3324
|
+
if "\t" in payload:
|
|
3325
|
+
payload = payload.split("\t")[-1].strip()
|
|
3326
|
+
if " -> " in payload:
|
|
3327
|
+
payload = payload.split(" -> ")[-1].strip()
|
|
3328
|
+
path = self._normalize_file_path(payload)
|
|
3329
|
+
if path.endswith("/"):
|
|
3330
|
+
continue
|
|
3331
|
+
if path and not self._should_ignore_path(path):
|
|
3332
|
+
modified.add(path)
|
|
3333
|
+
except Exception:
|
|
3334
|
+
# No upstream or command failed - fallback to status-only
|
|
3335
|
+
pass
|
|
3336
|
+
|
|
3337
|
+
return list(modified)
|
|
3338
|
+
|
|
3339
|
+
@staticmethod
|
|
3340
|
+
def _parse_git_status_path(line: str) -> str:
|
|
3341
|
+
"""Extract file path from git status --porcelain, handling renames."""
|
|
3342
|
+
p = line[3:].strip()
|
|
3343
|
+
if " -> " in p:
|
|
3344
|
+
p = p.split(" -> ")[-1].strip()
|
|
3345
|
+
if p.startswith('"') and p.endswith('"'):
|
|
3346
|
+
p = p[1:-1]
|
|
3347
|
+
try:
|
|
3348
|
+
p = p.encode("utf-8").decode("unicode_escape")
|
|
3349
|
+
except Exception:
|
|
3350
|
+
pass
|
|
3351
|
+
return p
|
|
3352
|
+
|
|
3353
|
+
@staticmethod
|
|
3354
|
+
def _should_ignore_path(path: str) -> bool:
|
|
3355
|
+
"""Return True for paths the watcher should skip."""
|
|
3356
|
+
norm = path.replace("\\", "/")
|
|
3357
|
+
if "/.git/" in norm or norm.startswith(".git/"):
|
|
3358
|
+
return True
|
|
3359
|
+
# Ignore runtime instance folders: they are environment artifacts and
|
|
3360
|
+
# should not produce collaborative file locks.
|
|
3361
|
+
if (
|
|
3362
|
+
norm == "instance"
|
|
3363
|
+
or norm.startswith("instance/")
|
|
3364
|
+
or norm.endswith("/instance")
|
|
3365
|
+
or "/instance/" in norm
|
|
3366
|
+
):
|
|
3367
|
+
return True
|
|
3368
|
+
# Ignore collab metadata files that the watcher itself creates
|
|
3369
|
+
if ".startup_summary.json" in norm or ".shutdown_complete" in norm:
|
|
3370
|
+
return True
|
|
3371
|
+
# Do not ignore other runtime-relative project paths here.
|
|
3372
|
+
return False
|
|
3373
|
+
|
|
3374
|
+
@staticmethod
|
|
3375
|
+
def _read_pid() -> Optional[int]:
|
|
3376
|
+
"""Read daemon PID from the PID file.
|
|
3377
|
+
|
|
3378
|
+
Supports two formats for backward compatibility:
|
|
3379
|
+
- Plain integer stored in `.daemon.pid` (legacy)
|
|
3380
|
+
- JSON object stored in `.daemon.pid` containing a numeric "pid" field
|
|
3381
|
+
|
|
3382
|
+
Returns the pid as an int, or None if the file is missing or malformed.
|
|
3383
|
+
"""
|
|
3384
|
+
if not os.path.exists(PID_FILE):
|
|
3385
|
+
return None
|
|
3386
|
+
try:
|
|
3387
|
+
with open(PID_FILE, "r", encoding="utf-8") as f:
|
|
3388
|
+
raw = f.read().strip()
|
|
3389
|
+
if not raw:
|
|
3390
|
+
return None
|
|
3391
|
+
# Try JSON first (richer metadata), fall back to int
|
|
3392
|
+
if raw.startswith("{"):
|
|
3393
|
+
try:
|
|
3394
|
+
obj = json.loads(raw)
|
|
3395
|
+
pid = obj.get("pid")
|
|
3396
|
+
if isinstance(pid, int):
|
|
3397
|
+
return pid
|
|
3398
|
+
except Exception:
|
|
3399
|
+
logger.debug("PID file contains invalid JSON: %s", raw)
|
|
3400
|
+
return None
|
|
3401
|
+
# Fallback: plain integer
|
|
3402
|
+
return int(raw)
|
|
3403
|
+
except ValueError:
|
|
3404
|
+
logger.debug("PID file does not contain an integer: %s", PID_FILE)
|
|
3405
|
+
return None
|
|
3406
|
+
except OSError as e:
|
|
3407
|
+
logger.debug("Could not read PID file %s: %s", PID_FILE, e)
|
|
3408
|
+
return None
|
|
3409
|
+
|
|
3410
|
+
@staticmethod
|
|
3411
|
+
def _get_cmdline_for_pid(pid: int) -> Optional[str]:
|
|
3412
|
+
"""Return the command-line string for a process, or None if unavailable.
|
|
3413
|
+
|
|
3414
|
+
Uses psutil when available. If psutil is not installed or access fails, returns
|
|
3415
|
+
None which indicates we couldn't verify the cmdline.
|
|
3416
|
+
"""
|
|
3417
|
+
# Prefer psutil when available (robust cross-platform). If unavailable,
|
|
3418
|
+
# fall back to lightweight platform-specific methods (procfs on Unix,
|
|
3419
|
+
# WMIC/tasklist on Windows) so we can verify PID command-lines even
|
|
3420
|
+
# in minimal environments.
|
|
3421
|
+
try:
|
|
3422
|
+
import psutil
|
|
3423
|
+
|
|
3424
|
+
try:
|
|
3425
|
+
p = psutil.Process(pid)
|
|
3426
|
+
cmd = p.cmdline()
|
|
3427
|
+
if isinstance(cmd, (list, tuple)):
|
|
3428
|
+
return " ".join(cmd)
|
|
3429
|
+
return str(cmd)
|
|
3430
|
+
except Exception:
|
|
3431
|
+
pass
|
|
3432
|
+
except Exception:
|
|
3433
|
+
# psutil not installed — continue to platform fallbacks
|
|
3434
|
+
pass
|
|
3435
|
+
|
|
3436
|
+
# Platform-specific fallbacks
|
|
3437
|
+
if sys.platform == "win32":
|
|
3438
|
+
# Prefer modern PowerShell CIM query when WMIC is not present.
|
|
3439
|
+
# Only call WMIC if it is actually available on PATH to avoid
|
|
3440
|
+
# repeated FileNotFoundError/WinError logs on newer Windows.
|
|
3441
|
+
try:
|
|
3442
|
+
if shutil.which("wmic"):
|
|
3443
|
+
try:
|
|
3444
|
+
out = subprocess.check_output(
|
|
3445
|
+
[
|
|
3446
|
+
"wmic",
|
|
3447
|
+
"process",
|
|
3448
|
+
"where",
|
|
3449
|
+
f"ProcessId={pid}",
|
|
3450
|
+
"get",
|
|
3451
|
+
"CommandLine",
|
|
3452
|
+
],
|
|
3453
|
+
stderr=subprocess.DEVNULL,
|
|
3454
|
+
text=True,
|
|
3455
|
+
)
|
|
3456
|
+
lines = [
|
|
3457
|
+
line.strip() for line in out.splitlines() if line.strip()
|
|
3458
|
+
]
|
|
3459
|
+
if len(lines) >= 2:
|
|
3460
|
+
return " ".join(lines[1:]).strip()
|
|
3461
|
+
except Exception:
|
|
3462
|
+
# If WMIC fails, continue to PowerShell fallback
|
|
3463
|
+
logger.debug("WMIC command-line query failed for PID %d", pid)
|
|
3464
|
+
# PowerShell CIM fallback (works on recent Windows)
|
|
3465
|
+
try:
|
|
3466
|
+
cmd_str = (
|
|
3467
|
+
"(Get-CimInstance Win32_Process -Filter "
|
|
3468
|
+
'"ProcessId=%d").CommandLine'
|
|
3469
|
+
) % pid
|
|
3470
|
+
ps_cmd = ("-NoProfile", "-Command", cmd_str)
|
|
3471
|
+
out = subprocess.check_output(
|
|
3472
|
+
["powershell", *ps_cmd], stderr=subprocess.DEVNULL, text=True
|
|
3473
|
+
)
|
|
3474
|
+
out = out.strip()
|
|
3475
|
+
if out:
|
|
3476
|
+
return out
|
|
3477
|
+
except Exception:
|
|
3478
|
+
logger.debug("PowerShell command-line query failed for PID %d", pid)
|
|
3479
|
+
except Exception:
|
|
3480
|
+
# Defensive: if shutil or other checks fail, give up gracefully
|
|
3481
|
+
logger.debug("Windows cmdline fallback failed for PID %d", pid)
|
|
3482
|
+
# As a last resort on Windows we cannot reliably get a cmdline
|
|
3483
|
+
return None
|
|
3484
|
+
else:
|
|
3485
|
+
# Unix-like systems: read /proc/<pid>/cmdline if available
|
|
3486
|
+
proc_path = f"/proc/{pid}/cmdline"
|
|
3487
|
+
try:
|
|
3488
|
+
if os.path.exists(proc_path):
|
|
3489
|
+
with open(proc_path, "rb") as fh:
|
|
3490
|
+
data = fh.read()
|
|
3491
|
+
if not data:
|
|
3492
|
+
return None
|
|
3493
|
+
# cmdline entries are null-separated
|
|
3494
|
+
raw_parts = data.split(b"\x00")
|
|
3495
|
+
parts = [
|
|
3496
|
+
part.decode(errors="replace") for part in raw_parts if part
|
|
3497
|
+
]
|
|
3498
|
+
return " ".join(parts)
|
|
3499
|
+
except Exception:
|
|
3500
|
+
pass
|
|
3501
|
+
return None
|
|
3502
|
+
|
|
3503
|
+
@staticmethod
|
|
3504
|
+
def _cmdline_matches_watcher(cmdline: str) -> bool:
|
|
3505
|
+
"""Heuristic: return True if the command-line looks like our watcher.
|
|
3506
|
+
|
|
3507
|
+
Matches supported watcher entrypoints, including legacy path-based invocations
|
|
3508
|
+
and the current module/CLI forms.
|
|
3509
|
+
"""
|
|
3510
|
+
if not cmdline:
|
|
3511
|
+
return False
|
|
3512
|
+
s = cmdline.lower()
|
|
3513
|
+
return (
|
|
3514
|
+
"live_locks_watcher" in s
|
|
3515
|
+
or ("lock_client.py" in s and "watch" in s)
|
|
3516
|
+
or ("collab.core.lock_client" in s and "watch" in s)
|
|
3517
|
+
or ("collab" in s and "watch" in s)
|
|
3518
|
+
)
|
|
3519
|
+
|
|
3520
|
+
@staticmethod
|
|
3521
|
+
def _extract_pid_file_from_cmdline(cmdline: str) -> Optional[str]:
|
|
3522
|
+
"""Extract a --pid-file argument from cmdline when present.
|
|
3523
|
+
|
|
3524
|
+
Returns the parsed value as-is (possibly quoted), or None when missing.
|
|
3525
|
+
"""
|
|
3526
|
+
if not cmdline:
|
|
3527
|
+
return None
|
|
3528
|
+
# Match either:
|
|
3529
|
+
# --pid-file VALUE
|
|
3530
|
+
# --pid-file="VALUE"
|
|
3531
|
+
# --pid-file='VALUE'
|
|
3532
|
+
m = re.search(r"--pid-file(?:=|\s+)(\"[^\"]+\"|'[^']+'|\S+)", cmdline)
|
|
3533
|
+
if not m:
|
|
3534
|
+
return None
|
|
3535
|
+
raw = m.group(1).strip()
|
|
3536
|
+
if (raw.startswith('"') and raw.endswith('"')) or (
|
|
3537
|
+
raw.startswith("'") and raw.endswith("'")
|
|
3538
|
+
):
|
|
3539
|
+
raw = raw[1:-1]
|
|
3540
|
+
return raw
|
|
3541
|
+
|
|
3542
|
+
def _cmdline_matches_current_pid_namespace(self, cmdline: str) -> bool:
|
|
3543
|
+
"""Return True when a watcher cmdline belongs to this client's PID file scope.
|
|
3544
|
+
|
|
3545
|
+
Rules:
|
|
3546
|
+
- If cmdline contains --pid-file, it must match current PID_FILE exactly.
|
|
3547
|
+
- If cmdline has no --pid-file (legacy watcher), only accept it for the
|
|
3548
|
+
default production PID file while *not* in test mode.
|
|
3549
|
+
"""
|
|
3550
|
+
parsed = self._extract_pid_file_from_cmdline(cmdline)
|
|
3551
|
+
current = os.path.abspath(PID_FILE)
|
|
3552
|
+
default_pid = os.path.abspath(os.path.join(_COLLAB_ROOT, ".daemon.pid"))
|
|
3553
|
+
if parsed:
|
|
3554
|
+
try:
|
|
3555
|
+
return os.path.abspath(parsed) == current
|
|
3556
|
+
except Exception:
|
|
3557
|
+
return False
|
|
3558
|
+
# Legacy watcher without explicit namespace tag.
|
|
3559
|
+
if _is_test_mode():
|
|
3560
|
+
return False
|
|
3561
|
+
return current == default_pid
|
|
3562
|
+
|
|
3563
|
+
@staticmethod
|
|
3564
|
+
def _write_pid(
|
|
3565
|
+
pid: int, parent_pid: Optional[int] = None, token: Optional[str] = None
|
|
3566
|
+
) -> None:
|
|
3567
|
+
"""Write daemon PID metadata to the PID file as JSON.
|
|
3568
|
+
|
|
3569
|
+
Historically this file contained a plain integer. Newer clients write a small
|
|
3570
|
+
JSON object with fields useful for diagnostics. The reader already supports both
|
|
3571
|
+
formats for backward compatibility.
|
|
3572
|
+
"""
|
|
3573
|
+
meta = {
|
|
3574
|
+
"pid": int(pid),
|
|
3575
|
+
# Use _safe_now to accommodate tests that monkeypatch the module
|
|
3576
|
+
# level `datetime` symbol. Ensure the stored time is in UTC.
|
|
3577
|
+
"started_at": _safe_now().astimezone(timezone.utc).isoformat(),
|
|
3578
|
+
# Use a human-friendly entrypoint string so other tools can display
|
|
3579
|
+
# a concise description without reconstructing the full cmdline.
|
|
3580
|
+
"entrypoint": "python lock_client.py",
|
|
3581
|
+
"cmdline": " ".join([sys.executable] + sys.argv),
|
|
3582
|
+
"cwd": os.getcwd(),
|
|
3583
|
+
}
|
|
3584
|
+
if parent_pid:
|
|
3585
|
+
meta["parent_pid"] = parent_pid
|
|
3586
|
+
if token:
|
|
3587
|
+
# Small session token to uniquely identify this watcher instance
|
|
3588
|
+
meta["token"] = str(token)
|
|
3589
|
+
|
|
3590
|
+
try:
|
|
3591
|
+
# Write atomically where possible
|
|
3592
|
+
tmp = PID_FILE + ".tmp"
|
|
3593
|
+
with open(tmp, "w", encoding="utf-8") as f:
|
|
3594
|
+
f.write(json.dumps(meta))
|
|
3595
|
+
f.flush()
|
|
3596
|
+
try:
|
|
3597
|
+
os.fsync(f.fileno())
|
|
3598
|
+
except Exception:
|
|
3599
|
+
pass
|
|
3600
|
+
try:
|
|
3601
|
+
os.replace(tmp, PID_FILE)
|
|
3602
|
+
except Exception:
|
|
3603
|
+
# Fallback to non-atomic write
|
|
3604
|
+
with open(PID_FILE, "w", encoding="utf-8") as f2:
|
|
3605
|
+
f2.write(json.dumps(meta))
|
|
3606
|
+
except OSError as e:
|
|
3607
|
+
logger.warning("Could not write PID file: %s", e)
|
|
3608
|
+
|
|
3609
|
+
@staticmethod
|
|
3610
|
+
def _remove_pid() -> None:
|
|
3611
|
+
"""Remove the PID file if it exists.
|
|
3612
|
+
|
|
3613
|
+
Suppressed in COLLAB_TEST_MODE to prevent test processes from accidentally
|
|
3614
|
+
deleting the production watcher's PID file.
|
|
3615
|
+
"""
|
|
3616
|
+
if os.getenv("COLLAB_TEST_MODE") == "1":
|
|
3617
|
+
return
|
|
3618
|
+
|
|
3619
|
+
try:
|
|
3620
|
+
if os.path.exists(PID_FILE):
|
|
3621
|
+
os.remove(PID_FILE)
|
|
3622
|
+
except OSError:
|
|
3623
|
+
pass
|
|
3624
|
+
|
|
3625
|
+
@staticmethod
|
|
3626
|
+
def _assign_to_job_object() -> None:
|
|
3627
|
+
"""Assign current process to a Job Object that terminates children when parent
|
|
3628
|
+
dies.
|
|
3629
|
+
|
|
3630
|
+
This is a Windows-specific mechanism to ensure the watcher dies with its parent
|
|
3631
|
+
IDE. If the parent process terminates, all processes in the job are
|
|
3632
|
+
automatically killed.
|
|
3633
|
+
"""
|
|
3634
|
+
if sys.platform != "win32":
|
|
3635
|
+
return
|
|
3636
|
+
|
|
3637
|
+
try:
|
|
3638
|
+
import ctypes
|
|
3639
|
+
from ctypes import wintypes
|
|
3640
|
+
|
|
3641
|
+
# Windows constants
|
|
3642
|
+
JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE = 0x2000
|
|
3643
|
+
JOB_OBJECT_EXTENDED_LIMIT_INFORMATION = 9
|
|
3644
|
+
|
|
3645
|
+
# Create a job object
|
|
3646
|
+
job_handle = ctypes.windll.kernel32.CreateJobObjectW(None, None)
|
|
3647
|
+
if not job_handle:
|
|
3648
|
+
logger.debug("Failed to create Job Object")
|
|
3649
|
+
return
|
|
3650
|
+
|
|
3651
|
+
# Configure the job to kill processes when the job handle is closed
|
|
3652
|
+
class JOBOBJECT_BASIC_LIMIT_INFORMATION(ctypes.Structure):
|
|
3653
|
+
_fields_ = [
|
|
3654
|
+
("PerProcessUserTimeLimit", wintypes.LARGE_INTEGER),
|
|
3655
|
+
("PerJobUserTimeLimit", wintypes.LARGE_INTEGER),
|
|
3656
|
+
("LimitFlags", wintypes.DWORD),
|
|
3657
|
+
("MinimumWorkingSetSize", ctypes.c_size_t),
|
|
3658
|
+
("MaximumWorkingSetSize", ctypes.c_size_t),
|
|
3659
|
+
("ActiveProcessLimit", wintypes.DWORD),
|
|
3660
|
+
("Affinity", ctypes.c_void_p),
|
|
3661
|
+
("PriorityClass", wintypes.DWORD),
|
|
3662
|
+
("SchedulingClass", wintypes.DWORD),
|
|
3663
|
+
]
|
|
3664
|
+
|
|
3665
|
+
class IO_COUNTERS(ctypes.Structure):
|
|
3666
|
+
_fields_ = [
|
|
3667
|
+
("ReadOperationCount", wintypes.ULARGE_INTEGER),
|
|
3668
|
+
("WriteOperationCount", wintypes.ULARGE_INTEGER),
|
|
3669
|
+
("OtherOperationCount", wintypes.ULARGE_INTEGER),
|
|
3670
|
+
("ReadTransferCount", wintypes.ULARGE_INTEGER),
|
|
3671
|
+
("WriteTransferCount", wintypes.ULARGE_INTEGER),
|
|
3672
|
+
("OtherTransferCount", wintypes.ULARGE_INTEGER),
|
|
3673
|
+
]
|
|
3674
|
+
|
|
3675
|
+
class JOBOBJECT_EXTENDED_LIMIT_INFORMATION(ctypes.Structure):
|
|
3676
|
+
_fields_ = [
|
|
3677
|
+
("BasicLimitInformation", JOBOBJECT_BASIC_LIMIT_INFORMATION),
|
|
3678
|
+
("IoInfo", IO_COUNTERS),
|
|
3679
|
+
("ProcessMemoryLimit", ctypes.c_size_t),
|
|
3680
|
+
("JobMemoryLimit", ctypes.c_size_t),
|
|
3681
|
+
("PeakProcessMemoryUsed", ctypes.c_size_t),
|
|
3682
|
+
("PeakJobMemoryUsed", ctypes.c_size_t),
|
|
3683
|
+
]
|
|
3684
|
+
|
|
3685
|
+
info = JOBOBJECT_EXTENDED_LIMIT_INFORMATION()
|
|
3686
|
+
info.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE
|
|
3687
|
+
|
|
3688
|
+
# Set the job information
|
|
3689
|
+
result = ctypes.windll.kernel32.SetInformationJobObject(
|
|
3690
|
+
job_handle,
|
|
3691
|
+
JOB_OBJECT_EXTENDED_LIMIT_INFORMATION,
|
|
3692
|
+
ctypes.byref(info),
|
|
3693
|
+
ctypes.sizeof(info),
|
|
3694
|
+
)
|
|
3695
|
+
|
|
3696
|
+
if not result:
|
|
3697
|
+
logger.debug("Failed to set Job Object information")
|
|
3698
|
+
ctypes.windll.kernel32.CloseHandle(job_handle)
|
|
3699
|
+
return
|
|
3700
|
+
|
|
3701
|
+
# Assign current process to the job
|
|
3702
|
+
current_process = ctypes.windll.kernel32.GetCurrentProcess()
|
|
3703
|
+
result = ctypes.windll.kernel32.AssignProcessToJobObject(
|
|
3704
|
+
job_handle, current_process
|
|
3705
|
+
)
|
|
3706
|
+
|
|
3707
|
+
if result:
|
|
3708
|
+
logger.info(
|
|
3709
|
+
"Assigned watcher to Job Object for automatic cleanup "
|
|
3710
|
+
"on parent exit"
|
|
3711
|
+
)
|
|
3712
|
+
else:
|
|
3713
|
+
logger.debug(
|
|
3714
|
+
"Failed to assign process to Job Object (may already be in a job)"
|
|
3715
|
+
)
|
|
3716
|
+
|
|
3717
|
+
# Keep the job handle open - it will be closed when the process exits,
|
|
3718
|
+
# triggering termination of all processes in the job
|
|
3719
|
+
except Exception as e:
|
|
3720
|
+
logger.debug("Job Object setup failed (non-critical): %s", e)
|
|
3721
|
+
|
|
3722
|
+
@staticmethod
|
|
3723
|
+
def _is_process_alive(pid: int) -> bool:
|
|
3724
|
+
"""Check if a process with the given PID is currently running."""
|
|
3725
|
+
if sys.platform == "win32":
|
|
3726
|
+
# Try psutil first for most accurate status check
|
|
3727
|
+
try:
|
|
3728
|
+
import psutil
|
|
3729
|
+
except ImportError:
|
|
3730
|
+
pass
|
|
3731
|
+
else:
|
|
3732
|
+
try:
|
|
3733
|
+
p = psutil.Process(pid)
|
|
3734
|
+
status = p.status()
|
|
3735
|
+
if status in (psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD):
|
|
3736
|
+
return False
|
|
3737
|
+
return True
|
|
3738
|
+
except psutil.NoSuchProcess:
|
|
3739
|
+
return False
|
|
3740
|
+
except psutil.AccessDenied:
|
|
3741
|
+
return True # exists but we can't query it
|
|
3742
|
+
except Exception as exc:
|
|
3743
|
+
logger.debug("psutil status check failed for PID %s: %s", pid, exc)
|
|
3744
|
+
|
|
3745
|
+
# Win32 API with GetExitCodeProcess to detect zombies
|
|
3746
|
+
try:
|
|
3747
|
+
import ctypes
|
|
3748
|
+
|
|
3749
|
+
# PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
|
|
3750
|
+
process_handle = ctypes.windll.kernel32.OpenProcess(0x1000, False, pid)
|
|
3751
|
+
if process_handle:
|
|
3752
|
+
try:
|
|
3753
|
+
exit_code = ctypes.c_ulong(0)
|
|
3754
|
+
result = ctypes.windll.kernel32.GetExitCodeProcess(
|
|
3755
|
+
process_handle, ctypes.byref(exit_code)
|
|
3756
|
+
)
|
|
3757
|
+
# STILL_ACTIVE = 259
|
|
3758
|
+
if result and exit_code.value != 259:
|
|
3759
|
+
return False # Process has exited
|
|
3760
|
+
return True
|
|
3761
|
+
finally:
|
|
3762
|
+
ctypes.windll.kernel32.CloseHandle(process_handle)
|
|
3763
|
+
else:
|
|
3764
|
+
# Access denied (5) often means the process exists but
|
|
3765
|
+
# is a high-privileged system process.
|
|
3766
|
+
error = ctypes.windll.kernel32.GetLastError()
|
|
3767
|
+
if error == 5:
|
|
3768
|
+
return True
|
|
3769
|
+
return False
|
|
3770
|
+
except Exception as exc:
|
|
3771
|
+
logger.debug("Win32 API process check failed for PID %s: %s", pid, exc)
|
|
3772
|
+
|
|
3773
|
+
# Fallback: psutil pid_exists only (no status check)
|
|
3774
|
+
try:
|
|
3775
|
+
import psutil
|
|
3776
|
+
|
|
3777
|
+
return bool(psutil.pid_exists(pid))
|
|
3778
|
+
except ImportError:
|
|
3779
|
+
pass
|
|
3780
|
+
except Exception as exc:
|
|
3781
|
+
logger.debug("psutil pid_exists failed for PID %s: %s", pid, exc)
|
|
3782
|
+
|
|
3783
|
+
# Final Fallback: tasklist (slow but usually present)
|
|
3784
|
+
try:
|
|
3785
|
+
tasklist_exe = _resolve_executable_path("tasklist")
|
|
3786
|
+
if not tasklist_exe:
|
|
3787
|
+
return False
|
|
3788
|
+
out = subprocess.check_output(
|
|
3789
|
+
[tasklist_exe, "/FI", f"PID eq {pid}", "/NH"],
|
|
3790
|
+
text=True,
|
|
3791
|
+
creationflags=0x08000000,
|
|
3792
|
+
)
|
|
3793
|
+
return str(pid) in out
|
|
3794
|
+
except Exception as exc:
|
|
3795
|
+
logger.debug("tasklist process check failed for PID %s: %s", pid, exc)
|
|
3796
|
+
return False
|
|
3797
|
+
else:
|
|
3798
|
+
try:
|
|
3799
|
+
os.kill(pid, 0)
|
|
3800
|
+
return True
|
|
3801
|
+
except (ProcessLookupError, OSError):
|
|
3802
|
+
return False
|
|
3803
|
+
|
|
3804
|
+
def _discover_running_watchers(self) -> List[int]:
|
|
3805
|
+
"""Discover running watcher PIDs that appear to belong to this workspace.
|
|
3806
|
+
|
|
3807
|
+
Tries psutil first for speed, then falls back to platform- specific process
|
|
3808
|
+
enumeration. Returns a list of candidate PIDs (may be empty).
|
|
3809
|
+
"""
|
|
3810
|
+
candidates: set[int] = set()
|
|
3811
|
+
|
|
3812
|
+
# Fast path: psutil if available
|
|
3813
|
+
try:
|
|
3814
|
+
import psutil
|
|
3815
|
+
|
|
3816
|
+
for p in psutil.process_iter(attrs=("pid", "cmdline")):
|
|
3817
|
+
try:
|
|
3818
|
+
pid = int(p.info.get("pid") or 0)
|
|
3819
|
+
if pid == os.getpid():
|
|
3820
|
+
continue
|
|
3821
|
+
cmdline = p.info.get("cmdline")
|
|
3822
|
+
if not cmdline:
|
|
3823
|
+
continue
|
|
3824
|
+
cmd_str = (
|
|
3825
|
+
" ".join(cmdline)
|
|
3826
|
+
if isinstance(cmdline, (list, tuple))
|
|
3827
|
+
else str(cmdline)
|
|
3828
|
+
)
|
|
3829
|
+
if self._cmdline_matches_watcher(cmd_str):
|
|
3830
|
+
if not self._cmdline_matches_current_pid_namespace(cmd_str):
|
|
3831
|
+
continue
|
|
3832
|
+
# Ensure the process references this repo (cwd or path)
|
|
3833
|
+
s = cmd_str.lower()
|
|
3834
|
+
if (
|
|
3835
|
+
_PROJECT_ROOT.lower() in s
|
|
3836
|
+
or _COLLAB_ROOT.lower() in s
|
|
3837
|
+
or ".collab" in s
|
|
3838
|
+
):
|
|
3839
|
+
candidates.add(pid)
|
|
3840
|
+
except Exception:
|
|
3841
|
+
continue
|
|
3842
|
+
return sorted(candidates)
|
|
3843
|
+
except Exception as exc:
|
|
3844
|
+
# No psutil — fallback to platform enumeration
|
|
3845
|
+
logger.debug("psutil process_iter unavailable/failed: %s", exc)
|
|
3846
|
+
|
|
3847
|
+
if sys.platform == "win32":
|
|
3848
|
+
tasklist_exe = _resolve_executable_path("tasklist")
|
|
3849
|
+
if not tasklist_exe:
|
|
3850
|
+
logger.debug("tasklist executable not found; skipping Windows fallback")
|
|
3851
|
+
tasklist_exe = None
|
|
3852
|
+
if not tasklist_exe:
|
|
3853
|
+
return sorted(candidates)
|
|
3854
|
+
python_images = ["python.exe", "pythonw.exe", "python3.exe"]
|
|
3855
|
+
for image in python_images:
|
|
3856
|
+
try:
|
|
3857
|
+
result = subprocess.run(
|
|
3858
|
+
[
|
|
3859
|
+
tasklist_exe,
|
|
3860
|
+
"/FI",
|
|
3861
|
+
f"IMAGENAME eq {image}",
|
|
3862
|
+
"/FO",
|
|
3863
|
+
"CSV",
|
|
3864
|
+
"/NH",
|
|
3865
|
+
],
|
|
3866
|
+
capture_output=True,
|
|
3867
|
+
text=True,
|
|
3868
|
+
creationflags=0x08000000,
|
|
3869
|
+
)
|
|
3870
|
+
for line in (result.stdout or "").splitlines():
|
|
3871
|
+
line = line.strip()
|
|
3872
|
+
if not line:
|
|
3873
|
+
continue
|
|
3874
|
+
parts = line.strip().strip('"').split('","')
|
|
3875
|
+
if len(parts) >= 2:
|
|
3876
|
+
try:
|
|
3877
|
+
pid = int(parts[1])
|
|
3878
|
+
if pid != os.getpid():
|
|
3879
|
+
candidates.add(pid)
|
|
3880
|
+
except Exception as exc:
|
|
3881
|
+
logger.debug(
|
|
3882
|
+
"Failed parsing tasklist row for image %s: %s",
|
|
3883
|
+
image,
|
|
3884
|
+
exc,
|
|
3885
|
+
)
|
|
3886
|
+
except Exception as exc:
|
|
3887
|
+
logger.debug(
|
|
3888
|
+
"tasklist fallback failed for image %s: %s", image, exc
|
|
3889
|
+
)
|
|
3890
|
+
continue
|
|
3891
|
+
else:
|
|
3892
|
+
try:
|
|
3893
|
+
ps_exe = _resolve_executable_path("ps") or "ps"
|
|
3894
|
+
result = subprocess.run(
|
|
3895
|
+
[ps_exe, "-eo", "pid,cmd"], capture_output=True, text=True
|
|
3896
|
+
)
|
|
3897
|
+
for line in (result.stdout or "").splitlines():
|
|
3898
|
+
line = line.strip()
|
|
3899
|
+
if not line:
|
|
3900
|
+
continue
|
|
3901
|
+
parts = line.split(None, 1)
|
|
3902
|
+
if len(parts) >= 2:
|
|
3903
|
+
try:
|
|
3904
|
+
pid = int(parts[0])
|
|
3905
|
+
if pid != os.getpid():
|
|
3906
|
+
candidates.add(pid)
|
|
3907
|
+
except Exception as exc:
|
|
3908
|
+
logger.debug("Failed parsing ps output row: %s", exc)
|
|
3909
|
+
except Exception as exc:
|
|
3910
|
+
logger.debug("ps fallback failed: %s", exc)
|
|
3911
|
+
|
|
3912
|
+
found: List[int] = []
|
|
3913
|
+
for pid in sorted(candidates):
|
|
3914
|
+
try:
|
|
3915
|
+
cmd = self._get_cmdline_for_pid(pid)
|
|
3916
|
+
if not cmd:
|
|
3917
|
+
continue
|
|
3918
|
+
if not self._cmdline_matches_watcher(cmd):
|
|
3919
|
+
continue
|
|
3920
|
+
if not self._cmdline_matches_current_pid_namespace(cmd):
|
|
3921
|
+
continue
|
|
3922
|
+
s = cmd.lower()
|
|
3923
|
+
if (
|
|
3924
|
+
_PROJECT_ROOT.lower() in s
|
|
3925
|
+
or _COLLAB_ROOT.lower() in s
|
|
3926
|
+
or ".collab" in s
|
|
3927
|
+
):
|
|
3928
|
+
found.append(pid)
|
|
3929
|
+
except Exception:
|
|
3930
|
+
continue
|
|
3931
|
+
return found
|
|
3932
|
+
|
|
3933
|
+
def _read_pid_file(self) -> Optional[Dict[str, Any]]:
|
|
3934
|
+
"""Read the PID file and return the metadata dictionary if available."""
|
|
3935
|
+
if not os.path.exists(PID_FILE):
|
|
3936
|
+
return None
|
|
3937
|
+
try:
|
|
3938
|
+
with open(PID_FILE, "r", encoding="utf-8") as fh:
|
|
3939
|
+
raw = fh.read().strip()
|
|
3940
|
+
if raw.startswith("{"):
|
|
3941
|
+
metadata = json.loads(raw)
|
|
3942
|
+
if isinstance(metadata, dict):
|
|
3943
|
+
return metadata
|
|
3944
|
+
except Exception as exc:
|
|
3945
|
+
logger.debug("Failed reading PID metadata file %s: %s", PID_FILE, exc)
|
|
3946
|
+
return None
|
|
3947
|
+
|
|
3948
|
+
def _terminate_process(self, pid: int) -> None:
|
|
3949
|
+
"""Forcefully terminate a process by PID."""
|
|
3950
|
+
if sys.platform == "win32":
|
|
3951
|
+
taskkill_exe = _resolve_executable_path("taskkill")
|
|
3952
|
+
if not taskkill_exe:
|
|
3953
|
+
logger.debug("taskkill not found while terminating PID %s", pid)
|
|
3954
|
+
return
|
|
3955
|
+
subprocess.run(
|
|
3956
|
+
[taskkill_exe, "/F", "/PID", str(pid)],
|
|
3957
|
+
capture_output=True,
|
|
3958
|
+
creationflags=0x08000000,
|
|
3959
|
+
)
|
|
3960
|
+
else:
|
|
3961
|
+
try:
|
|
3962
|
+
# Use getattr or numeric 9 for SIGKILL fallback on Windows
|
|
3963
|
+
sig = getattr(signal, "SIGKILL", 9)
|
|
3964
|
+
os.kill(pid, sig)
|
|
3965
|
+
except ProcessLookupError:
|
|
3966
|
+
pass
|
|
3967
|
+
|
|
3968
|
+
def _get_process_info_local(self, pid: int) -> Tuple[Optional[str], Optional[int]]:
|
|
3969
|
+
"""Fetch process name and parent PID via various Windows tools."""
|
|
3970
|
+
if sys.platform != "win32":
|
|
3971
|
+
return None, None
|
|
3972
|
+
# Prefer psutil when available - it's the most reliable cross-platform
|
|
3973
|
+
try:
|
|
3974
|
+
import psutil
|
|
3975
|
+
|
|
3976
|
+
try:
|
|
3977
|
+
p = psutil.Process(pid)
|
|
3978
|
+
name = p.name()
|
|
3979
|
+
ppid = p.ppid()
|
|
3980
|
+
if name and not name.lower().endswith(".exe"):
|
|
3981
|
+
name = name + ".exe"
|
|
3982
|
+
return name, ppid
|
|
3983
|
+
except psutil.NoSuchProcess:
|
|
3984
|
+
return None, None
|
|
3985
|
+
except Exception:
|
|
3986
|
+
# psutil present but failed for this PID; fall through to fallbacks
|
|
3987
|
+
pass
|
|
3988
|
+
except Exception:
|
|
3989
|
+
# psutil not available - continue to platform fallbacks
|
|
3990
|
+
pass
|
|
3991
|
+
|
|
3992
|
+
# If WMIC is available, prefer it for name+PPID. Otherwise fall back
|
|
3993
|
+
# to tasklist for a name-only result.
|
|
3994
|
+
try:
|
|
3995
|
+
wmic_exe = _resolve_executable_path("wmic")
|
|
3996
|
+
if wmic_exe:
|
|
3997
|
+
result = subprocess.run(
|
|
3998
|
+
[
|
|
3999
|
+
wmic_exe,
|
|
4000
|
+
"process",
|
|
4001
|
+
"where",
|
|
4002
|
+
f"ProcessId={pid}",
|
|
4003
|
+
"get",
|
|
4004
|
+
"Name,ParentProcessId",
|
|
4005
|
+
"/value",
|
|
4006
|
+
],
|
|
4007
|
+
capture_output=True,
|
|
4008
|
+
text=True,
|
|
4009
|
+
creationflags=0x08000000,
|
|
4010
|
+
timeout=5,
|
|
4011
|
+
errors="ignore",
|
|
4012
|
+
)
|
|
4013
|
+
logger.debug(
|
|
4014
|
+
"WMIC result for PID %d: rc=%d stdout=%r stderr=%r",
|
|
4015
|
+
pid,
|
|
4016
|
+
result.returncode,
|
|
4017
|
+
result.stdout[:200] if result.stdout else None,
|
|
4018
|
+
result.stderr[:200] if result.stderr else None,
|
|
4019
|
+
)
|
|
4020
|
+
if result.returncode == 0 and result.stdout:
|
|
4021
|
+
name_match = re.search(r"Name=(\S+)", result.stdout)
|
|
4022
|
+
parent_match = re.search(r"ParentProcessId=(\d+)", result.stdout)
|
|
4023
|
+
logger.debug(
|
|
4024
|
+
"WMIC parse for PID %d: name_match=%s parent_match=%s",
|
|
4025
|
+
pid,
|
|
4026
|
+
name_match.group(0) if name_match else None,
|
|
4027
|
+
parent_match.group(0) if parent_match else None,
|
|
4028
|
+
)
|
|
4029
|
+
if name_match:
|
|
4030
|
+
name = name_match.group(1)
|
|
4031
|
+
parent_id = int(parent_match.group(1)) if parent_match else None
|
|
4032
|
+
if not name.lower().endswith(".exe"):
|
|
4033
|
+
name += ".exe"
|
|
4034
|
+
logger.info(
|
|
4035
|
+
"WMIC success: PID %d = %s, parent = %s",
|
|
4036
|
+
pid,
|
|
4037
|
+
name,
|
|
4038
|
+
parent_id,
|
|
4039
|
+
)
|
|
4040
|
+
return name, parent_id
|
|
4041
|
+
except Exception as e:
|
|
4042
|
+
logger.debug("WMIC query failed for PID %d: %s", pid, e)
|
|
4043
|
+
|
|
4044
|
+
# Fallback: tasklist for name only
|
|
4045
|
+
try:
|
|
4046
|
+
tasklist_exe = _resolve_executable_path("tasklist")
|
|
4047
|
+
if not tasklist_exe:
|
|
4048
|
+
return None, None
|
|
4049
|
+
args = [tasklist_exe, "/FI", f"PID eq {pid}", "/NH", "/FO", "CSV"]
|
|
4050
|
+
out = (
|
|
4051
|
+
subprocess.check_output(
|
|
4052
|
+
args, stderr=subprocess.DEVNULL, creationflags=0x08000000, timeout=5
|
|
4053
|
+
)
|
|
4054
|
+
.decode("utf-8", errors="ignore")
|
|
4055
|
+
.strip()
|
|
4056
|
+
)
|
|
4057
|
+
# Format: "Image Name","PID","Session Name","Session#","Mem Usage"
|
|
4058
|
+
if out.startswith('"'):
|
|
4059
|
+
parts = [p.strip('"') for p in out.split(",")]
|
|
4060
|
+
if len(parts) >= 2:
|
|
4061
|
+
name = parts[0]
|
|
4062
|
+
return name, None
|
|
4063
|
+
except Exception as e:
|
|
4064
|
+
logger.debug("tasklist query failed for PID %d: %s", pid, e)
|
|
4065
|
+
|
|
4066
|
+
return None, None
|
|
4067
|
+
|
|
4068
|
+
def _get_parent_ide_pid(self) -> Tuple[Optional[int], Optional[str]]:
|
|
4069
|
+
"""Identify the IDE or terminal process that owns this session.
|
|
4070
|
+
|
|
4071
|
+
Returns a tuple: (pid, detection_method).
|
|
4072
|
+
|
|
4073
|
+
Detection order (priority):
|
|
4074
|
+
- VSCODE_PID env var -> method = "vscode_pid"
|
|
4075
|
+
- PYCHARM_HOSTED env -> method = "pycharm_hosted"
|
|
4076
|
+
- Process-tree detection (Code.exe / PyCharm) -> method = "process_tree"
|
|
4077
|
+
- Simple parent-name walk -> method = "simple_walk"
|
|
4078
|
+
- Fallback to immediate parent -> method = "immediate_parent"
|
|
4079
|
+
- Unknown -> (None, "unknown")
|
|
4080
|
+
"""
|
|
4081
|
+
# Priority 1: VSCODE_PID environment variable (most reliable)
|
|
4082
|
+
vspid = os.getenv("VSCODE_PID")
|
|
4083
|
+
logger.debug("VSCODE_PID env var: %s", vspid)
|
|
4084
|
+
if vspid and vspid.isdigit():
|
|
4085
|
+
vspid_int = int(vspid)
|
|
4086
|
+
if self._is_process_alive(vspid_int):
|
|
4087
|
+
logger.info("Detected VSCode via VSCODE_PID: %d", vspid_int)
|
|
4088
|
+
return vspid_int, "vscode_pid"
|
|
4089
|
+
else:
|
|
4090
|
+
logger.debug("VSCODE_PID %d is not alive", vspid_int)
|
|
4091
|
+
|
|
4092
|
+
if os.getenv("PYCHARM_HOSTED") == "1":
|
|
4093
|
+
hosted_ppid = os.getppid()
|
|
4094
|
+
if self._is_process_alive(hosted_ppid):
|
|
4095
|
+
logger.debug("Tying to PyCharm hosted session (PID: %d)", hosted_ppid)
|
|
4096
|
+
return hosted_ppid, "pycharm_hosted"
|
|
4097
|
+
|
|
4098
|
+
# Priority 2: Walk up process tree looking for IDE window process
|
|
4099
|
+
# For VSCode: walk past conhost/node to find the actual Code.exe
|
|
4100
|
+
try:
|
|
4101
|
+
current_pid: Optional[int] = os.getpid()
|
|
4102
|
+
visited: set[int] = set()
|
|
4103
|
+
code_exe_pid: Optional[int] = None
|
|
4104
|
+
process_chain = [] # For debugging
|
|
4105
|
+
|
|
4106
|
+
logger.debug("Walking process tree starting from PID: %d", current_pid)
|
|
4107
|
+
while current_pid and current_pid not in visited:
|
|
4108
|
+
visited.add(current_pid)
|
|
4109
|
+
active_pid = current_pid
|
|
4110
|
+
if active_pid is None:
|
|
4111
|
+
break
|
|
4112
|
+
name, ppid = self._get_process_info_local(active_pid)
|
|
4113
|
+
|
|
4114
|
+
if not name:
|
|
4115
|
+
logger.debug("PID %d: no name found, stopping walk", current_pid)
|
|
4116
|
+
break
|
|
4117
|
+
|
|
4118
|
+
name_lower = name.lower()
|
|
4119
|
+
process_chain.append(f"{name}({current_pid})")
|
|
4120
|
+
logger.debug("PID %d: %s (parent: %s)", current_pid, name, ppid)
|
|
4121
|
+
|
|
4122
|
+
# Track the outermost terminal we find
|
|
4123
|
+
if name_lower in (
|
|
4124
|
+
"windowsterminal.exe",
|
|
4125
|
+
"conhost.exe",
|
|
4126
|
+
"cmd.exe",
|
|
4127
|
+
"powershell.exe",
|
|
4128
|
+
):
|
|
4129
|
+
pass
|
|
4130
|
+
|
|
4131
|
+
# Found Code.exe - this is the actual IDE window
|
|
4132
|
+
# Use the FIRST one found (closest to terminal), not the deepest one
|
|
4133
|
+
if (
|
|
4134
|
+
name_lower
|
|
4135
|
+
in ("code.exe", "antigravity.exe", "cursor.exe", "vscodium.exe")
|
|
4136
|
+
and code_exe_pid is None
|
|
4137
|
+
):
|
|
4138
|
+
code_exe_pid = current_pid
|
|
4139
|
+
logger.debug(
|
|
4140
|
+
"Found outermost Code.exe in process tree (PID: %d)",
|
|
4141
|
+
current_pid,
|
|
4142
|
+
)
|
|
4143
|
+
# Don't break - continue walking to find if there's a closer one
|
|
4144
|
+
|
|
4145
|
+
# Found node.exe extension host - walk up to find Code.exe
|
|
4146
|
+
if name_lower == "node.exe" and ppid:
|
|
4147
|
+
next_name, next_ppid = self._get_process_info_local(ppid)
|
|
4148
|
+
if next_name and any(
|
|
4149
|
+
x in next_name.lower()
|
|
4150
|
+
for x in ("code", "antigravity", "cursor", "vscodium")
|
|
4151
|
+
):
|
|
4152
|
+
logger.debug(
|
|
4153
|
+
"Detected VSCode-like IDE via node.exe parent (PID: %d)",
|
|
4154
|
+
ppid,
|
|
4155
|
+
)
|
|
4156
|
+
return ppid, "node_parent"
|
|
4157
|
+
|
|
4158
|
+
# Found PyCharm
|
|
4159
|
+
if name_lower in (
|
|
4160
|
+
"pycharm64.exe",
|
|
4161
|
+
"pycharm.exe",
|
|
4162
|
+
"idea64.exe",
|
|
4163
|
+
"idea.exe",
|
|
4164
|
+
):
|
|
4165
|
+
logger.debug("Detected %s (PID: %d)", name, current_pid)
|
|
4166
|
+
return current_pid, "pycharm_process"
|
|
4167
|
+
|
|
4168
|
+
if not ppid or ppid == current_pid:
|
|
4169
|
+
break
|
|
4170
|
+
current_pid = ppid
|
|
4171
|
+
|
|
4172
|
+
logger.debug("Process chain: %s", " -> ".join(process_chain))
|
|
4173
|
+
|
|
4174
|
+
# Return Code.exe if we found it (it's the outermost IDE window)
|
|
4175
|
+
if code_exe_pid:
|
|
4176
|
+
logger.debug("Tying to VSCode Code.exe (PID: %d)", code_exe_pid)
|
|
4177
|
+
return code_exe_pid, "process_tree"
|
|
4178
|
+
|
|
4179
|
+
except Exception as e:
|
|
4180
|
+
logger.debug("Process tree walk failed: %s", e)
|
|
4181
|
+
|
|
4182
|
+
# Fallback: Simple parent chain walking using os.getppid()
|
|
4183
|
+
# This works when WMIC fails in subprocess contexts
|
|
4184
|
+
try:
|
|
4185
|
+
logger.debug("Using simple parent chain fallback")
|
|
4186
|
+
current = os.getpid()
|
|
4187
|
+
visited = set()
|
|
4188
|
+
while current and current not in visited and len(visited) < 20:
|
|
4189
|
+
visited.add(current)
|
|
4190
|
+
try:
|
|
4191
|
+
parent = os.getppid()
|
|
4192
|
+
if parent <= 0 or parent == current:
|
|
4193
|
+
break
|
|
4194
|
+
# Get process name using tasklist (simpler than WMIC)
|
|
4195
|
+
name = self._get_process_name_via_tasklist(parent)
|
|
4196
|
+
logger.info(
|
|
4197
|
+
"Simple walk: PID %d -> parent %d (%s)",
|
|
4198
|
+
current,
|
|
4199
|
+
parent,
|
|
4200
|
+
name or "unknown",
|
|
4201
|
+
)
|
|
4202
|
+
if name:
|
|
4203
|
+
name_lower = name.lower()
|
|
4204
|
+
if name_lower in (
|
|
4205
|
+
"code.exe",
|
|
4206
|
+
"antigravity.exe",
|
|
4207
|
+
"cursor.exe",
|
|
4208
|
+
"vscodium.exe",
|
|
4209
|
+
):
|
|
4210
|
+
logger.info(
|
|
4211
|
+
"Found VSCode-like IDE %s via simple walk (PID: %d)",
|
|
4212
|
+
name,
|
|
4213
|
+
parent,
|
|
4214
|
+
)
|
|
4215
|
+
return parent, "simple_walk"
|
|
4216
|
+
if name_lower in ("pycharm64.exe", "pycharm.exe"):
|
|
4217
|
+
logger.info(
|
|
4218
|
+
"Found PyCharm via simple walk (PID: %d)", parent
|
|
4219
|
+
)
|
|
4220
|
+
return parent, "simple_walk"
|
|
4221
|
+
current = parent
|
|
4222
|
+
except Exception as e:
|
|
4223
|
+
logger.debug("Simple walk error at PID %d: %s", current, e)
|
|
4224
|
+
break
|
|
4225
|
+
except Exception as e:
|
|
4226
|
+
logger.debug("Simple parent walk failed: %s", e)
|
|
4227
|
+
|
|
4228
|
+
# Fallback 2: Return immediate parent if alive (last resort)
|
|
4229
|
+
try:
|
|
4230
|
+
ppid = os.getppid()
|
|
4231
|
+
if ppid > 0 and self._is_process_alive(ppid):
|
|
4232
|
+
logger.info("Falling back to immediate parent (PID: %d)", ppid)
|
|
4233
|
+
return ppid, "immediate_parent"
|
|
4234
|
+
except Exception as e:
|
|
4235
|
+
logger.debug("Immediate parent fallback failed: %s", e)
|
|
4236
|
+
|
|
4237
|
+
logger.warning("Could not determine parent IDE/terminal PID")
|
|
4238
|
+
return None, "unknown"
|
|
4239
|
+
|
|
4240
|
+
def _get_process_name_via_tasklist(self, pid: int) -> Optional[str]:
|
|
4241
|
+
"""Get process name using tasklist - simpler and more reliable than WMIC."""
|
|
4242
|
+
try:
|
|
4243
|
+
tasklist_exe = _resolve_executable_path("tasklist")
|
|
4244
|
+
if not tasklist_exe:
|
|
4245
|
+
return None
|
|
4246
|
+
result = subprocess.run(
|
|
4247
|
+
[tasklist_exe, "/FI", f"PID eq {pid}", "/NH", "/FO", "CSV"],
|
|
4248
|
+
capture_output=True,
|
|
4249
|
+
text=True,
|
|
4250
|
+
creationflags=0x08000000,
|
|
4251
|
+
timeout=3,
|
|
4252
|
+
errors="ignore",
|
|
4253
|
+
)
|
|
4254
|
+
if result.returncode == 0 and result.stdout:
|
|
4255
|
+
# Format: "Image Name","PID","Session Name","Session#","Mem Usage"
|
|
4256
|
+
lines = result.stdout.strip().split("\n")
|
|
4257
|
+
for line in lines:
|
|
4258
|
+
if line.startswith('"'):
|
|
4259
|
+
parts = [p.strip('"') for p in line.split(",")]
|
|
4260
|
+
if len(parts) >= 2:
|
|
4261
|
+
return parts[0]
|
|
4262
|
+
except Exception as exc:
|
|
4263
|
+
logger.debug("tasklist name lookup failed for PID %s: %s", pid, exc)
|
|
4264
|
+
return None
|
|
4265
|
+
|
|
4266
|
+
|
|
4267
|
+
if __name__ == "__main__":
|
|
4268
|
+
main()
|