comfy-env 0.1.20__py3-none-any.whl → 0.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- comfy_env/__init__.py +0 -2
- comfy_env/cli.py +15 -0
- comfy_env/install.py +4 -1
- comfy_env/isolation/__init__.py +0 -2
- comfy_env/isolation/workers/__init__.py +1 -3
- comfy_env/isolation/workers/base.py +1 -1
- comfy_env/isolation/workers/subprocess.py +213 -17
- comfy_env/isolation/wrap.py +6 -14
- {comfy_env-0.1.20.dist-info → comfy_env-0.1.22.dist-info}/METADATA +2 -1
- {comfy_env-0.1.20.dist-info → comfy_env-0.1.22.dist-info}/RECORD +13 -14
- comfy_env/isolation/workers/mp.py +0 -875
- {comfy_env-0.1.20.dist-info → comfy_env-0.1.22.dist-info}/WHEEL +0 -0
- {comfy_env-0.1.20.dist-info → comfy_env-0.1.22.dist-info}/entry_points.txt +0 -0
- {comfy_env-0.1.20.dist-info → comfy_env-0.1.22.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,875 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
MPWorker - Same-venv isolation using multiprocessing.
|
|
3
|
-
|
|
4
|
-
This is the simplest and fastest worker type:
|
|
5
|
-
- Uses multiprocessing.Queue for IPC
|
|
6
|
-
- Zero-copy tensor transfer via shared memory (automatic)
|
|
7
|
-
- Fresh CUDA context in subprocess
|
|
8
|
-
- ~30ms overhead per call
|
|
9
|
-
|
|
10
|
-
Use this when you need:
|
|
11
|
-
- Memory isolation between nodes
|
|
12
|
-
- Fresh CUDA context (automatic VRAM cleanup on worker death)
|
|
13
|
-
- Same Python environment as host
|
|
14
|
-
|
|
15
|
-
Example:
|
|
16
|
-
worker = MPWorker()
|
|
17
|
-
|
|
18
|
-
def gpu_work(image):
|
|
19
|
-
import torch
|
|
20
|
-
return image * 2
|
|
21
|
-
|
|
22
|
-
result = worker.call(gpu_work, image=my_tensor)
|
|
23
|
-
worker.shutdown()
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
import logging
|
|
27
|
-
import traceback
|
|
28
|
-
from queue import Empty as QueueEmpty
|
|
29
|
-
from typing import Any, Callable, Optional
|
|
30
|
-
|
|
31
|
-
from .base import Worker, WorkerError
|
|
32
|
-
from ..tensor_utils import prepare_for_ipc_recursive, keep_tensors_recursive
|
|
33
|
-
|
|
34
|
-
logger = logging.getLogger("comfy_env")
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
# Sentinel value for shutdown
|
|
38
|
-
_SHUTDOWN = object()
|
|
39
|
-
|
|
40
|
-
# Message type for method calls (avoids pickling issues with functions)
|
|
41
|
-
_CALL_METHOD = "call_method"
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def _can_use_cuda_ipc():
|
|
45
|
-
"""
|
|
46
|
-
Check if CUDA IPC is available.
|
|
47
|
-
|
|
48
|
-
CUDA IPC works with native allocator but breaks with cudaMallocAsync.
|
|
49
|
-
If no backend is specified, CUDA IPC should work (PyTorch default is native).
|
|
50
|
-
"""
|
|
51
|
-
import os
|
|
52
|
-
conf = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
|
|
53
|
-
return 'cudaMallocAsync' not in conf
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
# ---------------------------------------------------------------------------
|
|
57
|
-
# Tensor file transfer - fallback for cudaMallocAsync (CUDA IPC doesn't work)
|
|
58
|
-
# ---------------------------------------------------------------------------
|
|
59
|
-
|
|
60
|
-
def _save_tensors_to_files(obj, file_registry=None):
|
|
61
|
-
"""Recursively save torch tensors to temp files for IPC."""
|
|
62
|
-
if file_registry is None:
|
|
63
|
-
file_registry = []
|
|
64
|
-
|
|
65
|
-
try:
|
|
66
|
-
import torch
|
|
67
|
-
if isinstance(obj, torch.Tensor):
|
|
68
|
-
import tempfile
|
|
69
|
-
f = tempfile.NamedTemporaryFile(suffix='.pt', delete=False)
|
|
70
|
-
torch.save(obj.cpu(), f.name) # Always save as CPU tensor
|
|
71
|
-
f.close()
|
|
72
|
-
file_registry.append(f.name)
|
|
73
|
-
return {"__tensor_file__": f.name, "dtype": str(obj.dtype), "device": str(obj.device)}
|
|
74
|
-
except ImportError:
|
|
75
|
-
pass
|
|
76
|
-
|
|
77
|
-
if isinstance(obj, dict):
|
|
78
|
-
return {k: _save_tensors_to_files(v, file_registry) for k, v in obj.items()}
|
|
79
|
-
elif isinstance(obj, list):
|
|
80
|
-
return [_save_tensors_to_files(v, file_registry) for v in obj]
|
|
81
|
-
elif isinstance(obj, tuple):
|
|
82
|
-
return tuple(_save_tensors_to_files(v, file_registry) for v in obj)
|
|
83
|
-
return obj
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def _load_tensors_from_files(obj):
|
|
87
|
-
"""Recursively load torch tensors from temp files."""
|
|
88
|
-
if isinstance(obj, dict):
|
|
89
|
-
if "__tensor_file__" in obj:
|
|
90
|
-
import os
|
|
91
|
-
import torch
|
|
92
|
-
tensor = torch.load(obj["__tensor_file__"], weights_only=True)
|
|
93
|
-
os.unlink(obj["__tensor_file__"]) # Cleanup temp file
|
|
94
|
-
return tensor
|
|
95
|
-
return {k: _load_tensors_from_files(v) for k, v in obj.items()}
|
|
96
|
-
elif isinstance(obj, list):
|
|
97
|
-
return [_load_tensors_from_files(v) for v in obj]
|
|
98
|
-
elif isinstance(obj, tuple):
|
|
99
|
-
return tuple(_load_tensors_from_files(v) for v in obj)
|
|
100
|
-
return obj
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def _dump_worker_env(worker_name: str = "unknown", print_to_terminal: bool = False):
|
|
104
|
-
"""Dump worker environment to .comfy-env/logs/ (always) and optionally print."""
|
|
105
|
-
import json
|
|
106
|
-
import os
|
|
107
|
-
import platform
|
|
108
|
-
import sys
|
|
109
|
-
from datetime import datetime
|
|
110
|
-
from pathlib import Path
|
|
111
|
-
|
|
112
|
-
log_dir = Path.cwd() / ".comfy-env" / "logs"
|
|
113
|
-
log_dir.mkdir(parents=True, exist_ok=True)
|
|
114
|
-
|
|
115
|
-
debug_info = {
|
|
116
|
-
"timestamp": datetime.now().isoformat(),
|
|
117
|
-
"worker_name": worker_name,
|
|
118
|
-
"pid": os.getpid(),
|
|
119
|
-
"cwd": os.getcwd(),
|
|
120
|
-
"python": {
|
|
121
|
-
"executable": sys.executable,
|
|
122
|
-
"version": sys.version,
|
|
123
|
-
"prefix": sys.prefix,
|
|
124
|
-
},
|
|
125
|
-
"platform": {
|
|
126
|
-
"system": platform.system(),
|
|
127
|
-
"machine": platform.machine(),
|
|
128
|
-
"release": platform.release(),
|
|
129
|
-
},
|
|
130
|
-
"env_vars": dict(os.environ),
|
|
131
|
-
"sys_path": sys.path,
|
|
132
|
-
"modules_loaded": sorted(sys.modules.keys()),
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
log_file = log_dir / f"worker_{worker_name}_{os.getpid()}.json"
|
|
136
|
-
log_file.write_text(json.dumps(debug_info, indent=2, default=str))
|
|
137
|
-
|
|
138
|
-
if print_to_terminal:
|
|
139
|
-
print(f"[comfy-env] === WORKER ENV DEBUG: {worker_name} ===")
|
|
140
|
-
print(f"[comfy-env] Python: {sys.executable}")
|
|
141
|
-
print(f"[comfy-env] Version: {sys.version.split()[0]}")
|
|
142
|
-
print(f"[comfy-env] PID: {os.getpid()}, CWD: {os.getcwd()}")
|
|
143
|
-
for var in ['PATH', 'LD_LIBRARY_PATH', 'DYLD_LIBRARY_PATH', 'PYTHONPATH', 'OMP_NUM_THREADS', 'KMP_DUPLICATE_LIB_OK']:
|
|
144
|
-
val = os.environ.get(var, '<unset>')
|
|
145
|
-
if len(val) > 100:
|
|
146
|
-
val = val[:100] + '...'
|
|
147
|
-
print(f"[comfy-env] {var}={val}")
|
|
148
|
-
print(f"[comfy-env] Env dumped to: {log_file}")
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def _worker_loop(queue_in, queue_out, sys_path_additions=None, lib_path=None, env_vars=None, worker_name=None):
|
|
152
|
-
"""
|
|
153
|
-
Worker process main loop.
|
|
154
|
-
|
|
155
|
-
Receives work items and executes them:
|
|
156
|
-
- ("call_method", module_name, class_name, method_name, self_state, kwargs): Call a method on a class
|
|
157
|
-
- (func, args, kwargs): Execute a function directly
|
|
158
|
-
- _SHUTDOWN: Shutdown the worker
|
|
159
|
-
|
|
160
|
-
Runs until receiving _SHUTDOWN sentinel.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
queue_in: Input queue for receiving work items
|
|
164
|
-
queue_out: Output queue for sending results
|
|
165
|
-
sys_path_additions: Paths to add to sys.path
|
|
166
|
-
lib_path: Path to add to LD_LIBRARY_PATH (for conda libraries)
|
|
167
|
-
env_vars: Environment variables to set (from comfy-env.toml)
|
|
168
|
-
worker_name: Name of the worker (for logging)
|
|
169
|
-
"""
|
|
170
|
-
import os
|
|
171
|
-
import sys
|
|
172
|
-
from pathlib import Path
|
|
173
|
-
|
|
174
|
-
# Apply env_vars FIRST (before any library imports that might check them)
|
|
175
|
-
if env_vars:
|
|
176
|
-
os.environ.update(env_vars)
|
|
177
|
-
|
|
178
|
-
# Set worker mode env var
|
|
179
|
-
os.environ["COMFYUI_ISOLATION_WORKER"] = "1"
|
|
180
|
-
|
|
181
|
-
# Always dump env to file, print to terminal if debug enabled
|
|
182
|
-
print_debug = os.environ.get("COMFY_ENV_DEBUG", "").lower() in ("1", "true", "yes")
|
|
183
|
-
_dump_worker_env(worker_name or "unknown", print_to_terminal=print_debug)
|
|
184
|
-
|
|
185
|
-
# DLL/library isolation - match SubprocessWorker's isolation level
|
|
186
|
-
# Filter out conflicting paths from conda/mamba/etc and use proper DLL registration
|
|
187
|
-
path_sep = ";" if sys.platform == "win32" else ":"
|
|
188
|
-
|
|
189
|
-
if sys.platform == "win32":
|
|
190
|
-
# Use os.add_dll_directory() for explicit DLL registration (Python 3.8+)
|
|
191
|
-
if lib_path and hasattr(os, "add_dll_directory"):
|
|
192
|
-
try:
|
|
193
|
-
os.add_dll_directory(lib_path)
|
|
194
|
-
except Exception:
|
|
195
|
-
pass
|
|
196
|
-
|
|
197
|
-
# Filter conflicting paths from PATH (matches subprocess.py:1203-1212)
|
|
198
|
-
current_path = os.environ.get("PATH", "")
|
|
199
|
-
clean_parts = [
|
|
200
|
-
p for p in current_path.split(path_sep)
|
|
201
|
-
if not any(x in p.lower() for x in (".ct-envs", "conda", "mamba", "miniforge", "miniconda", "anaconda", "mingw"))
|
|
202
|
-
]
|
|
203
|
-
if lib_path:
|
|
204
|
-
clean_parts.insert(0, lib_path)
|
|
205
|
-
os.environ["PATH"] = path_sep.join(clean_parts)
|
|
206
|
-
elif sys.platform == "darwin":
|
|
207
|
-
# macOS: ONLY use the isolated lib_path, don't inherit
|
|
208
|
-
if lib_path:
|
|
209
|
-
os.environ["DYLD_LIBRARY_PATH"] = lib_path
|
|
210
|
-
else:
|
|
211
|
-
os.environ.pop("DYLD_LIBRARY_PATH", None)
|
|
212
|
-
else:
|
|
213
|
-
# Linux: Use LD_LIBRARY_PATH
|
|
214
|
-
current = os.environ.get("LD_LIBRARY_PATH", "")
|
|
215
|
-
clean_parts = [
|
|
216
|
-
p for p in current.split(path_sep) if p
|
|
217
|
-
and not any(x in p.lower() for x in (".ct-envs", "conda", "mamba", "miniforge", "miniconda", "anaconda"))
|
|
218
|
-
]
|
|
219
|
-
if lib_path:
|
|
220
|
-
clean_parts.insert(0, lib_path)
|
|
221
|
-
os.environ["LD_LIBRARY_PATH"] = path_sep.join(clean_parts)
|
|
222
|
-
|
|
223
|
-
# Find ComfyUI base and add to sys.path for real folder_paths/comfy modules
|
|
224
|
-
# This works because comfy.options.args_parsing=False by default, so folder_paths
|
|
225
|
-
# auto-detects its base directory from __file__ location
|
|
226
|
-
def _find_comfyui_base():
|
|
227
|
-
cwd = Path.cwd().resolve()
|
|
228
|
-
# Check common child directories (for test environments)
|
|
229
|
-
for base in [cwd, cwd.parent]:
|
|
230
|
-
for child in [".comfy-test-env/ComfyUI", "ComfyUI"]:
|
|
231
|
-
candidate = base / child
|
|
232
|
-
if (candidate / "main.py").exists() and (candidate / "comfy").exists():
|
|
233
|
-
return candidate
|
|
234
|
-
# Walk up from cwd looking for ComfyUI
|
|
235
|
-
current = cwd
|
|
236
|
-
for _ in range(10):
|
|
237
|
-
if (current / "main.py").exists() and (current / "comfy").exists():
|
|
238
|
-
return current
|
|
239
|
-
current = current.parent
|
|
240
|
-
# Check COMFYUI_BASE env var as fallback
|
|
241
|
-
if os.environ.get("COMFYUI_BASE"):
|
|
242
|
-
return Path(os.environ["COMFYUI_BASE"])
|
|
243
|
-
return None
|
|
244
|
-
|
|
245
|
-
comfyui_base = _find_comfyui_base()
|
|
246
|
-
if comfyui_base and str(comfyui_base) not in sys.path:
|
|
247
|
-
sys.path.insert(0, str(comfyui_base))
|
|
248
|
-
|
|
249
|
-
# Add custom paths to sys.path for module discovery
|
|
250
|
-
if sys_path_additions:
|
|
251
|
-
for path in sys_path_additions:
|
|
252
|
-
if path not in sys.path:
|
|
253
|
-
sys.path.insert(0, path)
|
|
254
|
-
|
|
255
|
-
while True:
|
|
256
|
-
try:
|
|
257
|
-
item = queue_in.get()
|
|
258
|
-
|
|
259
|
-
# Check for shutdown signal
|
|
260
|
-
if item is _SHUTDOWN:
|
|
261
|
-
queue_out.put(("shutdown", None))
|
|
262
|
-
break
|
|
263
|
-
|
|
264
|
-
try:
|
|
265
|
-
# Handle method call protocol
|
|
266
|
-
if isinstance(item, tuple) and len(item) == 6 and item[0] == _CALL_METHOD:
|
|
267
|
-
_, module_name, class_name, method_name, self_state, kwargs = item
|
|
268
|
-
# Load tensors from files if using file-based transfer
|
|
269
|
-
if not _can_use_cuda_ipc():
|
|
270
|
-
kwargs = _load_tensors_from_files(kwargs)
|
|
271
|
-
result = _execute_method_call(
|
|
272
|
-
module_name, class_name, method_name, self_state, kwargs
|
|
273
|
-
)
|
|
274
|
-
# Handle result based on allocator
|
|
275
|
-
if _can_use_cuda_ipc():
|
|
276
|
-
keep_tensors_recursive(result)
|
|
277
|
-
else:
|
|
278
|
-
result = _save_tensors_to_files(result)
|
|
279
|
-
queue_out.put(("ok", result))
|
|
280
|
-
else:
|
|
281
|
-
# Direct function call (legacy)
|
|
282
|
-
func, args, kwargs = item
|
|
283
|
-
# Load tensors from files if using file-based transfer
|
|
284
|
-
if not _can_use_cuda_ipc():
|
|
285
|
-
args = tuple(_load_tensors_from_files(a) for a in args)
|
|
286
|
-
kwargs = _load_tensors_from_files(kwargs)
|
|
287
|
-
result = func(*args, **kwargs)
|
|
288
|
-
# Handle result based on allocator
|
|
289
|
-
if _can_use_cuda_ipc():
|
|
290
|
-
keep_tensors_recursive(result)
|
|
291
|
-
else:
|
|
292
|
-
result = _save_tensors_to_files(result)
|
|
293
|
-
queue_out.put(("ok", result))
|
|
294
|
-
|
|
295
|
-
except Exception as e:
|
|
296
|
-
tb = traceback.format_exc()
|
|
297
|
-
queue_out.put(("error", (str(e), tb)))
|
|
298
|
-
|
|
299
|
-
except Exception as e:
|
|
300
|
-
# Queue error - try to report, then exit
|
|
301
|
-
try:
|
|
302
|
-
queue_out.put(("fatal", str(e)))
|
|
303
|
-
except:
|
|
304
|
-
pass
|
|
305
|
-
break
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
class PathBasedModuleFinder:
|
|
309
|
-
"""
|
|
310
|
-
Meta path finder that handles ComfyUI's path-based module names.
|
|
311
|
-
|
|
312
|
-
ComfyUI uses full filesystem paths as module names for custom nodes.
|
|
313
|
-
This finder intercepts imports of such modules and loads them from disk.
|
|
314
|
-
"""
|
|
315
|
-
|
|
316
|
-
def find_spec(self, fullname, path, target=None):
|
|
317
|
-
import importlib.util
|
|
318
|
-
import os
|
|
319
|
-
|
|
320
|
-
# Only handle path-based module names (starting with /)
|
|
321
|
-
if not fullname.startswith('/'):
|
|
322
|
-
return None
|
|
323
|
-
|
|
324
|
-
# Parse the module name to find base path and submodule parts
|
|
325
|
-
parts = fullname.split('.')
|
|
326
|
-
base_path = parts[0]
|
|
327
|
-
submodule_parts = parts[1:] if len(parts) > 1 else []
|
|
328
|
-
|
|
329
|
-
# Walk through parts to find where path ends and module begins
|
|
330
|
-
for i, part in enumerate(submodule_parts):
|
|
331
|
-
test_path = os.path.join(base_path, part)
|
|
332
|
-
if os.path.exists(test_path):
|
|
333
|
-
base_path = test_path
|
|
334
|
-
else:
|
|
335
|
-
# Remaining parts are module names
|
|
336
|
-
submodule_parts = submodule_parts[i:]
|
|
337
|
-
break
|
|
338
|
-
else:
|
|
339
|
-
# All parts were path components
|
|
340
|
-
submodule_parts = []
|
|
341
|
-
|
|
342
|
-
# Determine the file to load
|
|
343
|
-
if submodule_parts:
|
|
344
|
-
# We're importing a submodule
|
|
345
|
-
current_path = base_path
|
|
346
|
-
for part in submodule_parts[:-1]:
|
|
347
|
-
current_path = os.path.join(current_path, part)
|
|
348
|
-
|
|
349
|
-
submod = submodule_parts[-1]
|
|
350
|
-
submod_file = os.path.join(current_path, submod + '.py')
|
|
351
|
-
submod_pkg = os.path.join(current_path, submod, '__init__.py')
|
|
352
|
-
|
|
353
|
-
if os.path.exists(submod_file):
|
|
354
|
-
return importlib.util.spec_from_file_location(fullname, submod_file)
|
|
355
|
-
elif os.path.exists(submod_pkg):
|
|
356
|
-
return importlib.util.spec_from_file_location(
|
|
357
|
-
fullname, submod_pkg,
|
|
358
|
-
submodule_search_locations=[os.path.join(current_path, submod)]
|
|
359
|
-
)
|
|
360
|
-
else:
|
|
361
|
-
# Top-level path-based module
|
|
362
|
-
if os.path.isdir(base_path):
|
|
363
|
-
init_path = os.path.join(base_path, "__init__.py")
|
|
364
|
-
if os.path.exists(init_path):
|
|
365
|
-
return importlib.util.spec_from_file_location(
|
|
366
|
-
fullname, init_path,
|
|
367
|
-
submodule_search_locations=[base_path]
|
|
368
|
-
)
|
|
369
|
-
elif os.path.isfile(base_path):
|
|
370
|
-
return importlib.util.spec_from_file_location(fullname, base_path)
|
|
371
|
-
|
|
372
|
-
return None
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
# Global flag to track if we've installed the finder
|
|
376
|
-
_path_finder_installed = False
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
def _ensure_path_finder_installed():
|
|
380
|
-
"""Install the PathBasedModuleFinder if not already installed."""
|
|
381
|
-
import sys
|
|
382
|
-
global _path_finder_installed
|
|
383
|
-
if not _path_finder_installed:
|
|
384
|
-
sys.meta_path.insert(0, PathBasedModuleFinder())
|
|
385
|
-
_path_finder_installed = True
|
|
386
|
-
logger.debug("[comfy_env] Installed PathBasedModuleFinder for path-based module names")
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
def _load_path_based_module(module_name: str):
|
|
390
|
-
"""
|
|
391
|
-
Load a module that has a filesystem path as its name.
|
|
392
|
-
|
|
393
|
-
ComfyUI uses full filesystem paths as module names for custom nodes.
|
|
394
|
-
This function handles that case by using file-based imports.
|
|
395
|
-
"""
|
|
396
|
-
import importlib.util
|
|
397
|
-
import os
|
|
398
|
-
import sys
|
|
399
|
-
|
|
400
|
-
# Check if it's already in sys.modules
|
|
401
|
-
if module_name in sys.modules:
|
|
402
|
-
return sys.modules[module_name]
|
|
403
|
-
|
|
404
|
-
# Check if module_name contains submodule parts (e.g., "/path/to/pkg.submod.subsubmod")
|
|
405
|
-
# In this case, we need to load the parent packages first
|
|
406
|
-
if '.' in module_name:
|
|
407
|
-
parts = module_name.split('.')
|
|
408
|
-
# Find where the path ends and module parts begin
|
|
409
|
-
# The path part won't exist as a directory when combined with module parts
|
|
410
|
-
base_path = parts[0]
|
|
411
|
-
submodule_parts = []
|
|
412
|
-
|
|
413
|
-
for i, part in enumerate(parts[1:], 1):
|
|
414
|
-
test_path = os.path.join(base_path, part)
|
|
415
|
-
if os.path.exists(test_path):
|
|
416
|
-
base_path = test_path
|
|
417
|
-
else:
|
|
418
|
-
# This and remaining parts are module names, not path components
|
|
419
|
-
submodule_parts = parts[i:]
|
|
420
|
-
break
|
|
421
|
-
|
|
422
|
-
if submodule_parts:
|
|
423
|
-
# Load parent package first
|
|
424
|
-
parent_module = _load_path_based_module(base_path)
|
|
425
|
-
|
|
426
|
-
# Now load submodules
|
|
427
|
-
current_module = parent_module
|
|
428
|
-
current_name = base_path
|
|
429
|
-
for submod in submodule_parts:
|
|
430
|
-
current_name = f"{current_name}.{submod}"
|
|
431
|
-
if current_name in sys.modules:
|
|
432
|
-
current_module = sys.modules[current_name]
|
|
433
|
-
else:
|
|
434
|
-
# Try to import as attribute or load from file
|
|
435
|
-
if hasattr(current_module, submod):
|
|
436
|
-
current_module = getattr(current_module, submod)
|
|
437
|
-
else:
|
|
438
|
-
# Try to load the submodule file
|
|
439
|
-
if hasattr(current_module, '__path__'):
|
|
440
|
-
for parent_path in current_module.__path__:
|
|
441
|
-
submod_file = os.path.join(parent_path, submod + '.py')
|
|
442
|
-
submod_pkg = os.path.join(parent_path, submod, '__init__.py')
|
|
443
|
-
if os.path.exists(submod_file):
|
|
444
|
-
spec = importlib.util.spec_from_file_location(current_name, submod_file)
|
|
445
|
-
current_module = importlib.util.module_from_spec(spec)
|
|
446
|
-
current_module.__package__ = f"{base_path}.{'.'.join(submodule_parts[:-1])}" if len(submodule_parts) > 1 else base_path
|
|
447
|
-
sys.modules[current_name] = current_module
|
|
448
|
-
spec.loader.exec_module(current_module)
|
|
449
|
-
break
|
|
450
|
-
elif os.path.exists(submod_pkg):
|
|
451
|
-
spec = importlib.util.spec_from_file_location(current_name, submod_pkg,
|
|
452
|
-
submodule_search_locations=[os.path.dirname(submod_pkg)])
|
|
453
|
-
current_module = importlib.util.module_from_spec(spec)
|
|
454
|
-
sys.modules[current_name] = current_module
|
|
455
|
-
spec.loader.exec_module(current_module)
|
|
456
|
-
break
|
|
457
|
-
else:
|
|
458
|
-
raise ModuleNotFoundError(f"Cannot find submodule {submod} in {current_name}")
|
|
459
|
-
return current_module
|
|
460
|
-
|
|
461
|
-
# Simple path-based module (no submodule parts)
|
|
462
|
-
if os.path.isdir(module_name):
|
|
463
|
-
init_path = os.path.join(module_name, "__init__.py")
|
|
464
|
-
submodule_search_locations = [module_name]
|
|
465
|
-
else:
|
|
466
|
-
init_path = module_name
|
|
467
|
-
submodule_search_locations = None
|
|
468
|
-
|
|
469
|
-
if not os.path.exists(init_path):
|
|
470
|
-
raise ModuleNotFoundError(f"Cannot find module at path: {module_name}")
|
|
471
|
-
|
|
472
|
-
spec = importlib.util.spec_from_file_location(
|
|
473
|
-
module_name,
|
|
474
|
-
init_path,
|
|
475
|
-
submodule_search_locations=submodule_search_locations
|
|
476
|
-
)
|
|
477
|
-
module = importlib.util.module_from_spec(spec)
|
|
478
|
-
|
|
479
|
-
# Set up package attributes for relative imports
|
|
480
|
-
if os.path.isdir(module_name):
|
|
481
|
-
module.__path__ = [module_name]
|
|
482
|
-
module.__package__ = module_name
|
|
483
|
-
else:
|
|
484
|
-
module.__package__ = module_name.rsplit('.', 1)[0] if '.' in module_name else ''
|
|
485
|
-
|
|
486
|
-
sys.modules[module_name] = module
|
|
487
|
-
spec.loader.exec_module(module)
|
|
488
|
-
|
|
489
|
-
return module
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
def _execute_method_call(module_name: str, class_name: str, method_name: str,
|
|
493
|
-
self_state: dict, kwargs: dict) -> Any:
|
|
494
|
-
"""
|
|
495
|
-
Execute a method call in the worker process.
|
|
496
|
-
|
|
497
|
-
This function imports the class fresh and calls the original (un-decorated) method.
|
|
498
|
-
"""
|
|
499
|
-
import importlib
|
|
500
|
-
import os
|
|
501
|
-
import sys
|
|
502
|
-
|
|
503
|
-
# Import the module
|
|
504
|
-
logger.debug(f"Attempting to import module_name={module_name}")
|
|
505
|
-
|
|
506
|
-
# Check if module_name is a filesystem path (ComfyUI uses paths as module names)
|
|
507
|
-
# This happens because ComfyUI's load_custom_node uses the full path as sys_module_name
|
|
508
|
-
if module_name.startswith('/') or (os.sep in module_name and not module_name.startswith('.')):
|
|
509
|
-
# Check if the base path exists to confirm it's a path-based module
|
|
510
|
-
base_path = module_name.split('.')[0] if '.' in module_name else module_name
|
|
511
|
-
if os.path.exists(base_path):
|
|
512
|
-
logger.debug(f"Detected path-based module name, using file-based import")
|
|
513
|
-
# Install the meta path finder to handle relative imports within the package
|
|
514
|
-
_ensure_path_finder_installed()
|
|
515
|
-
module = _load_path_based_module(module_name)
|
|
516
|
-
else:
|
|
517
|
-
# Doesn't look like a valid path, try standard import
|
|
518
|
-
module = importlib.import_module(module_name)
|
|
519
|
-
else:
|
|
520
|
-
# Standard module name - use importlib.import_module
|
|
521
|
-
module = importlib.import_module(module_name)
|
|
522
|
-
cls = getattr(module, class_name)
|
|
523
|
-
|
|
524
|
-
# Create instance with proper __slots__ handling
|
|
525
|
-
instance = object.__new__(cls)
|
|
526
|
-
|
|
527
|
-
# Handle both __slots__ and __dict__ based classes
|
|
528
|
-
if hasattr(cls, '__slots__'):
|
|
529
|
-
# Class uses __slots__ - set attributes individually
|
|
530
|
-
for slot in cls.__slots__:
|
|
531
|
-
if slot in self_state:
|
|
532
|
-
setattr(instance, slot, self_state[slot])
|
|
533
|
-
# Also check for __dict__ slot (hybrid classes)
|
|
534
|
-
if '__dict__' in cls.__slots__ or hasattr(instance, '__dict__'):
|
|
535
|
-
for key, value in self_state.items():
|
|
536
|
-
if key not in cls.__slots__:
|
|
537
|
-
setattr(instance, key, value)
|
|
538
|
-
else:
|
|
539
|
-
# Standard class with __dict__
|
|
540
|
-
instance.__dict__.update(self_state)
|
|
541
|
-
|
|
542
|
-
# Get the ORIGINAL method stored by the decorator, not the proxy
|
|
543
|
-
# This avoids the infinite recursion of proxy -> worker -> proxy
|
|
544
|
-
original_method = getattr(cls, '_isolated_original_method', None)
|
|
545
|
-
if original_method is None:
|
|
546
|
-
# Fallback: class wasn't decorated, use the method directly
|
|
547
|
-
original_method = getattr(cls, method_name)
|
|
548
|
-
return original_method(instance, **kwargs)
|
|
549
|
-
|
|
550
|
-
# Call the original method (it's an unbound function, pass instance)
|
|
551
|
-
return original_method(instance, **kwargs)
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
class MPWorker(Worker):
|
|
555
|
-
"""
|
|
556
|
-
Worker using torch.multiprocessing for same-venv isolation.
|
|
557
|
-
|
|
558
|
-
Features:
|
|
559
|
-
- Zero-copy CUDA tensor transfer (via CUDA IPC handles)
|
|
560
|
-
- Zero-copy CPU tensor transfer (via shared memory)
|
|
561
|
-
- Fresh CUDA context (subprocess has independent GPU state)
|
|
562
|
-
- Automatic cleanup on worker death
|
|
563
|
-
|
|
564
|
-
The subprocess uses 'spawn' start method, ensuring a clean Python
|
|
565
|
-
interpreter without inherited state from the parent.
|
|
566
|
-
"""
|
|
567
|
-
|
|
568
|
-
def __init__(self, name: Optional[str] = None, sys_path: Optional[list] = None,
|
|
569
|
-
lib_path: Optional[str] = None, env_vars: Optional[dict] = None,
|
|
570
|
-
python: Optional[str] = None):
|
|
571
|
-
"""
|
|
572
|
-
Initialize the worker.
|
|
573
|
-
|
|
574
|
-
Args:
|
|
575
|
-
name: Optional name for logging/debugging.
|
|
576
|
-
sys_path: Optional list of paths to add to sys.path in worker process.
|
|
577
|
-
lib_path: Optional path to add to LD_LIBRARY_PATH (for conda libraries).
|
|
578
|
-
env_vars: Optional environment variables to set in worker process.
|
|
579
|
-
python: Optional path to venv Python executable for true process isolation.
|
|
580
|
-
When provided, spawn uses this Python instead of sys.executable,
|
|
581
|
-
avoiding Windows issues where spawn re-imports main.py.
|
|
582
|
-
"""
|
|
583
|
-
self.name = name or "MPWorker"
|
|
584
|
-
self._sys_path = sys_path or []
|
|
585
|
-
self._lib_path = lib_path
|
|
586
|
-
self._env_vars = env_vars or {}
|
|
587
|
-
self._python = python # Venv Python for true isolation (like pyisolate)
|
|
588
|
-
self._process = None
|
|
589
|
-
self._queue_in = None
|
|
590
|
-
self._queue_out = None
|
|
591
|
-
self._started = False
|
|
592
|
-
self._shutdown = False
|
|
593
|
-
|
|
594
|
-
def _ensure_started(self):
|
|
595
|
-
"""Lazily start the worker process on first call."""
|
|
596
|
-
if self._shutdown:
|
|
597
|
-
raise RuntimeError(f"{self.name}: Worker has been shut down")
|
|
598
|
-
|
|
599
|
-
if self._started:
|
|
600
|
-
if not self._process.is_alive():
|
|
601
|
-
raise RuntimeError(f"{self.name}: Worker process died unexpectedly")
|
|
602
|
-
return
|
|
603
|
-
|
|
604
|
-
# Import torch here to avoid import at module level
|
|
605
|
-
import os
|
|
606
|
-
import sys
|
|
607
|
-
|
|
608
|
-
# Clear conda/pixi environment variables FIRST, before importing multiprocessing
|
|
609
|
-
# These can cause the child process to pick up the wrong Python interpreter
|
|
610
|
-
# or stdlib, leading to sys.version mismatch errors in platform module
|
|
611
|
-
conda_env_vars = [
|
|
612
|
-
'CONDA_PREFIX',
|
|
613
|
-
'CONDA_DEFAULT_ENV',
|
|
614
|
-
'CONDA_PYTHON_EXE',
|
|
615
|
-
'CONDA_EXE',
|
|
616
|
-
'CONDA_SHLVL',
|
|
617
|
-
'PYTHONHOME',
|
|
618
|
-
'PYTHONPATH', # Also clear PYTHONPATH to prevent pixi paths
|
|
619
|
-
'_CE_CONDA',
|
|
620
|
-
'_CE_M',
|
|
621
|
-
]
|
|
622
|
-
saved_env = {}
|
|
623
|
-
for var in conda_env_vars:
|
|
624
|
-
if var in os.environ:
|
|
625
|
-
saved_env[var] = os.environ.pop(var)
|
|
626
|
-
|
|
627
|
-
# Also remove pixi paths from LD_LIBRARY_PATH
|
|
628
|
-
ld_lib = os.environ.get('LD_LIBRARY_PATH', '')
|
|
629
|
-
if '.pixi' in ld_lib:
|
|
630
|
-
saved_env['LD_LIBRARY_PATH'] = ld_lib
|
|
631
|
-
# Filter out pixi paths
|
|
632
|
-
new_ld_lib = ':'.join(p for p in ld_lib.split(':') if '.pixi' not in p)
|
|
633
|
-
if new_ld_lib:
|
|
634
|
-
os.environ['LD_LIBRARY_PATH'] = new_ld_lib
|
|
635
|
-
else:
|
|
636
|
-
os.environ.pop('LD_LIBRARY_PATH', None)
|
|
637
|
-
|
|
638
|
-
import torch.multiprocessing as mp
|
|
639
|
-
|
|
640
|
-
try:
|
|
641
|
-
# Use spawn to get clean subprocess (no inherited CUDA context)
|
|
642
|
-
ctx = mp.get_context('spawn')
|
|
643
|
-
|
|
644
|
-
# Set the spawn executable for true process isolation (like pyisolate)
|
|
645
|
-
# When venv python is provided, use it to avoid Windows spawn importing main.py
|
|
646
|
-
import multiprocessing.spawn as mp_spawn
|
|
647
|
-
original_exe = mp_spawn.get_executable()
|
|
648
|
-
if self._python:
|
|
649
|
-
# True isolation: use venv Python (fixes Windows spawn __main__ issue)
|
|
650
|
-
mp_spawn.set_executable(self._python)
|
|
651
|
-
else:
|
|
652
|
-
# Fallback: use current Python (may fail on Windows with ComfyUI)
|
|
653
|
-
if original_exe != sys.executable.encode() and original_exe != sys.executable:
|
|
654
|
-
print(f"[comfy-env] Warning: spawn executable was {original_exe}, forcing to {sys.executable}")
|
|
655
|
-
mp_spawn.set_executable(sys.executable)
|
|
656
|
-
|
|
657
|
-
self._queue_in = ctx.Queue()
|
|
658
|
-
self._queue_out = ctx.Queue()
|
|
659
|
-
self._process = ctx.Process(
|
|
660
|
-
target=_worker_loop,
|
|
661
|
-
args=(self._queue_in, self._queue_out, self._sys_path, self._lib_path, self._env_vars, self.name),
|
|
662
|
-
daemon=True,
|
|
663
|
-
)
|
|
664
|
-
self._process.start()
|
|
665
|
-
self._started = True
|
|
666
|
-
|
|
667
|
-
# Restore original executable setting
|
|
668
|
-
mp_spawn.set_executable(original_exe)
|
|
669
|
-
finally:
|
|
670
|
-
# Restore env vars in parent process
|
|
671
|
-
os.environ.update(saved_env)
|
|
672
|
-
|
|
673
|
-
def call(
|
|
674
|
-
self,
|
|
675
|
-
func: Callable,
|
|
676
|
-
*args,
|
|
677
|
-
timeout: Optional[float] = None,
|
|
678
|
-
**kwargs
|
|
679
|
-
) -> Any:
|
|
680
|
-
"""
|
|
681
|
-
Execute a function in the worker process.
|
|
682
|
-
|
|
683
|
-
Args:
|
|
684
|
-
func: Function to execute. Must be picklable (module-level or staticmethod).
|
|
685
|
-
*args: Positional arguments.
|
|
686
|
-
timeout: Timeout in seconds (None = no timeout, default).
|
|
687
|
-
**kwargs: Keyword arguments.
|
|
688
|
-
|
|
689
|
-
Returns:
|
|
690
|
-
Return value of func(*args, **kwargs).
|
|
691
|
-
|
|
692
|
-
Raises:
|
|
693
|
-
WorkerError: If func raises an exception.
|
|
694
|
-
TimeoutError: If execution exceeds timeout.
|
|
695
|
-
RuntimeError: If worker process dies.
|
|
696
|
-
"""
|
|
697
|
-
self._ensure_started()
|
|
698
|
-
|
|
699
|
-
# Handle tensors based on allocator
|
|
700
|
-
if _can_use_cuda_ipc():
|
|
701
|
-
# CUDA IPC - zero copy (works with native allocator)
|
|
702
|
-
kwargs = {k: prepare_for_ipc_recursive(v) for k, v in kwargs.items()}
|
|
703
|
-
args = tuple(prepare_for_ipc_recursive(a) for a in args)
|
|
704
|
-
else:
|
|
705
|
-
# File-based transfer (fallback for cudaMallocAsync)
|
|
706
|
-
kwargs = _save_tensors_to_files(kwargs)
|
|
707
|
-
args = tuple(_save_tensors_to_files(a) for a in args)
|
|
708
|
-
|
|
709
|
-
# Send work item
|
|
710
|
-
self._queue_in.put((func, args, kwargs))
|
|
711
|
-
|
|
712
|
-
return self._get_result(timeout)
|
|
713
|
-
|
|
714
|
-
def call_method(
|
|
715
|
-
self,
|
|
716
|
-
module_name: str,
|
|
717
|
-
class_name: str,
|
|
718
|
-
method_name: str,
|
|
719
|
-
self_state: dict,
|
|
720
|
-
kwargs: dict,
|
|
721
|
-
timeout: Optional[float] = None,
|
|
722
|
-
) -> Any:
|
|
723
|
-
"""
|
|
724
|
-
Execute a class method in the worker process.
|
|
725
|
-
|
|
726
|
-
This uses a string-based protocol to avoid pickle issues with decorated methods.
|
|
727
|
-
The worker imports the module fresh and calls the original (un-decorated) method.
|
|
728
|
-
|
|
729
|
-
Args:
|
|
730
|
-
module_name: Full module path (e.g., 'my_package.nodes.my_node')
|
|
731
|
-
class_name: Class name (e.g., 'MyNode')
|
|
732
|
-
method_name: Method name (e.g., 'process')
|
|
733
|
-
self_state: Instance __dict__ to restore
|
|
734
|
-
kwargs: Method keyword arguments
|
|
735
|
-
timeout: Timeout in seconds (None = no timeout, default).
|
|
736
|
-
|
|
737
|
-
Returns:
|
|
738
|
-
Return value of method.
|
|
739
|
-
|
|
740
|
-
Raises:
|
|
741
|
-
WorkerError: If method raises an exception.
|
|
742
|
-
TimeoutError: If execution exceeds timeout.
|
|
743
|
-
RuntimeError: If worker process dies.
|
|
744
|
-
"""
|
|
745
|
-
self._ensure_started()
|
|
746
|
-
|
|
747
|
-
# Handle tensors based on allocator
|
|
748
|
-
if _can_use_cuda_ipc():
|
|
749
|
-
# CUDA IPC - zero copy (works with native allocator)
|
|
750
|
-
kwargs = prepare_for_ipc_recursive(kwargs)
|
|
751
|
-
else:
|
|
752
|
-
# File-based transfer (fallback for cudaMallocAsync)
|
|
753
|
-
kwargs = _save_tensors_to_files(kwargs)
|
|
754
|
-
|
|
755
|
-
# Send method call request using protocol
|
|
756
|
-
self._queue_in.put((
|
|
757
|
-
_CALL_METHOD,
|
|
758
|
-
module_name,
|
|
759
|
-
class_name,
|
|
760
|
-
method_name,
|
|
761
|
-
self_state,
|
|
762
|
-
kwargs,
|
|
763
|
-
))
|
|
764
|
-
|
|
765
|
-
return self._get_result(timeout)
|
|
766
|
-
|
|
767
|
-
def _get_result(self, timeout: Optional[float]) -> Any:
|
|
768
|
-
"""Wait for and return result from worker."""
|
|
769
|
-
try:
|
|
770
|
-
status, result = self._queue_out.get(timeout=timeout)
|
|
771
|
-
except QueueEmpty:
|
|
772
|
-
# Timeout - use graceful escalation
|
|
773
|
-
self._handle_timeout(timeout)
|
|
774
|
-
# _handle_timeout always raises, but just in case:
|
|
775
|
-
raise TimeoutError(f"{self.name}: Call timed out after {timeout}s")
|
|
776
|
-
except Exception as e:
|
|
777
|
-
raise RuntimeError(f"{self.name}: Failed to get result: {e}")
|
|
778
|
-
|
|
779
|
-
# Handle response
|
|
780
|
-
if status == "ok":
|
|
781
|
-
# Load tensors from temp files if using file-based transfer
|
|
782
|
-
if not _can_use_cuda_ipc():
|
|
783
|
-
result = _load_tensors_from_files(result)
|
|
784
|
-
return result
|
|
785
|
-
elif status == "error":
|
|
786
|
-
msg, tb = result
|
|
787
|
-
raise WorkerError(msg, traceback=tb)
|
|
788
|
-
elif status == "fatal":
|
|
789
|
-
self._shutdown = True
|
|
790
|
-
raise RuntimeError(f"{self.name}: Fatal worker error: {result}")
|
|
791
|
-
else:
|
|
792
|
-
raise RuntimeError(f"{self.name}: Unknown response status: {status}")
|
|
793
|
-
|
|
794
|
-
def shutdown(self) -> None:
|
|
795
|
-
"""Shut down the worker process."""
|
|
796
|
-
if self._shutdown or not self._started:
|
|
797
|
-
return
|
|
798
|
-
|
|
799
|
-
self._shutdown = True
|
|
800
|
-
|
|
801
|
-
try:
|
|
802
|
-
# Send shutdown signal
|
|
803
|
-
self._queue_in.put(_SHUTDOWN)
|
|
804
|
-
|
|
805
|
-
# Wait for acknowledgment
|
|
806
|
-
try:
|
|
807
|
-
self._queue_out.get(timeout=5.0)
|
|
808
|
-
except:
|
|
809
|
-
pass
|
|
810
|
-
|
|
811
|
-
# Wait for process to exit
|
|
812
|
-
self._process.join(timeout=5.0)
|
|
813
|
-
|
|
814
|
-
if self._process.is_alive():
|
|
815
|
-
self._process.kill()
|
|
816
|
-
self._process.join(timeout=1.0)
|
|
817
|
-
|
|
818
|
-
except Exception:
|
|
819
|
-
# Force kill if anything goes wrong
|
|
820
|
-
if self._process and self._process.is_alive():
|
|
821
|
-
self._process.kill()
|
|
822
|
-
|
|
823
|
-
def _handle_timeout(self, timeout: float) -> None:
|
|
824
|
-
"""
|
|
825
|
-
Handle timeout with graceful escalation.
|
|
826
|
-
|
|
827
|
-
Instead of immediately killing the worker (which can leak GPU memory),
|
|
828
|
-
try graceful shutdown first, then escalate to SIGTERM, then SIGKILL.
|
|
829
|
-
|
|
830
|
-
Inspired by pyisolate's timeout handling pattern.
|
|
831
|
-
"""
|
|
832
|
-
logger.warning(f"{self.name}: Call timed out after {timeout}s, attempting graceful shutdown")
|
|
833
|
-
|
|
834
|
-
# Stage 1: Send shutdown signal, wait 3s for graceful exit
|
|
835
|
-
try:
|
|
836
|
-
self._queue_in.put(_SHUTDOWN)
|
|
837
|
-
self._queue_out.get(timeout=3.0)
|
|
838
|
-
self._process.join(timeout=2.0)
|
|
839
|
-
if not self._process.is_alive():
|
|
840
|
-
self._shutdown = True
|
|
841
|
-
raise TimeoutError(f"{self.name}: Graceful shutdown after timeout ({timeout}s)")
|
|
842
|
-
except QueueEmpty:
|
|
843
|
-
pass
|
|
844
|
-
except TimeoutError:
|
|
845
|
-
raise
|
|
846
|
-
except Exception:
|
|
847
|
-
pass
|
|
848
|
-
|
|
849
|
-
# Stage 2: SIGTERM, wait 5s
|
|
850
|
-
if self._process.is_alive():
|
|
851
|
-
logger.warning(f"{self.name}: Graceful shutdown failed, sending SIGTERM")
|
|
852
|
-
self._process.terminate()
|
|
853
|
-
self._process.join(timeout=5.0)
|
|
854
|
-
|
|
855
|
-
# Stage 3: SIGKILL as last resort
|
|
856
|
-
if self._process.is_alive():
|
|
857
|
-
logger.error(f"{self.name}: SIGTERM failed, force killing worker (may leak GPU memory)")
|
|
858
|
-
self._process.kill()
|
|
859
|
-
self._process.join(timeout=1.0)
|
|
860
|
-
|
|
861
|
-
self._shutdown = True
|
|
862
|
-
raise TimeoutError(f"{self.name}: Call timed out after {timeout}s")
|
|
863
|
-
|
|
864
|
-
def is_alive(self) -> bool:
|
|
865
|
-
"""Check if worker process is running or can be started."""
|
|
866
|
-
if self._shutdown:
|
|
867
|
-
return False
|
|
868
|
-
# Not started yet = can still be started = "alive"
|
|
869
|
-
if not self._started:
|
|
870
|
-
return True
|
|
871
|
-
return self._process.is_alive()
|
|
872
|
-
|
|
873
|
-
def __repr__(self):
|
|
874
|
-
status = "alive" if self.is_alive() else "stopped"
|
|
875
|
-
return f"<MPWorker name={self.name!r} status={status}>"
|