cuda-morph 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. ascend_compat/__init__.py +207 -0
  2. ascend_compat/__main__.py +34 -0
  3. ascend_compat/_backend.py +334 -0
  4. ascend_compat/_exceptions.py +69 -0
  5. ascend_compat/_logging.py +124 -0
  6. ascend_compat/backends/__init__.py +49 -0
  7. ascend_compat/backends/ascend.py +61 -0
  8. ascend_compat/backends/cambricon.py +82 -0
  9. ascend_compat/backends/intel.py +74 -0
  10. ascend_compat/backends/registry.py +138 -0
  11. ascend_compat/backends/rocm.py +106 -0
  12. ascend_compat/bench.py +650 -0
  13. ascend_compat/cli/__init__.py +66 -0
  14. ascend_compat/cli/_info.py +49 -0
  15. ascend_compat/cli/_porter.py +71 -0
  16. ascend_compat/cli/_scanner.py +368 -0
  17. ascend_compat/cli/bench.py +33 -0
  18. ascend_compat/cli/check.py +35 -0
  19. ascend_compat/cli/compile.py +27 -0
  20. ascend_compat/cli/doctor.py +19 -0
  21. ascend_compat/cli/error.py +13 -0
  22. ascend_compat/cli/info.py +12 -0
  23. ascend_compat/cli/port.py +18 -0
  24. ascend_compat/cli/quant.py +14 -0
  25. ascend_compat/cli/run.py +40 -0
  26. ascend_compat/cli/scaffold.py +31 -0
  27. ascend_compat/cli/security.py +29 -0
  28. ascend_compat/cli/verify.py +23 -0
  29. ascend_compat/cli/vllm.py +19 -0
  30. ascend_compat/cli.py +879 -0
  31. ascend_compat/cuda_shim/__init__.py +40 -0
  32. ascend_compat/cuda_shim/_import_hook.py +123 -0
  33. ascend_compat/cuda_shim/_monkey_patch.py +569 -0
  34. ascend_compat/cuda_shim/_patch_manager.py +313 -0
  35. ascend_compat/cuda_shim/_registry.py +195 -0
  36. ascend_compat/cuda_shim/compile_helpers.py +603 -0
  37. ascend_compat/cuda_shim/dtype_manager.py +307 -0
  38. ascend_compat/cuda_shim/quantization.py +279 -0
  39. ascend_compat/device.py +127 -0
  40. ascend_compat/doctor/__init__.py +40 -0
  41. ascend_compat/doctor/env_setup.py +507 -0
  42. ascend_compat/doctor/error_codes.py +408 -0
  43. ascend_compat/doctor/fallback_monitor.py +274 -0
  44. ascend_compat/doctor/op_auditor.py +287 -0
  45. ascend_compat/doctor/security_check.py +274 -0
  46. ascend_compat/doctor/version_check.py +254 -0
  47. ascend_compat/ecosystem/__init__.py +23 -0
  48. ascend_compat/ecosystem/_flash_attn_hook.py +134 -0
  49. ascend_compat/ecosystem/deepspeed_patch.py +180 -0
  50. ascend_compat/ecosystem/flash_attn.py +334 -0
  51. ascend_compat/ecosystem/transformers_patch.py +223 -0
  52. ascend_compat/ecosystem/triton_bridge.py +188 -0
  53. ascend_compat/ecosystem/vllm_patch.py +289 -0
  54. ascend_compat/exceptions.py +41 -0
  55. ascend_compat/kernel_helper/__init__.py +31 -0
  56. ascend_compat/kernel_helper/scaffold.py +681 -0
  57. ascend_compat/kernel_helper/spec.py +128 -0
  58. ascend_compat/memory.py +83 -0
  59. ascend_compat/ops.py +147 -0
  60. ascend_compat/py.typed +0 -0
  61. ascend_compat/streams.py +62 -0
  62. ascend_compat/validation/__init__.py +21 -0
  63. ascend_compat/validation/op_verifier.py +365 -0
  64. cuda_morph-0.9.1.dist-info/METADATA +167 -0
  65. cuda_morph-0.9.1.dist-info/RECORD +69 -0
  66. cuda_morph-0.9.1.dist-info/WHEEL +5 -0
  67. cuda_morph-0.9.1.dist-info/entry_points.txt +3 -0
  68. cuda_morph-0.9.1.dist-info/licenses/LICENSE +189 -0
  69. cuda_morph-0.9.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,207 @@
1
+ """cuda-morph: CUDA → Ascend NPU compatibility shim for PyTorch.
2
+
3
+ This is **not** a replacement for torch_npu. torch_npu already handles the
4
+ hard C++/CANN integration via PyTorch's PrivateUse1 dispatch mechanism.
5
+ cuda-morph is a *thin, high-value ecosystem compatibility bridge* that
6
+ fixes the last mile: existing CUDA-assuming Python code that hard-codes
7
+ ``torch.cuda`` calls.
8
+
9
+ Architecture (four-layer stack)::
10
+
11
+ ┌─────────────────────────────────────────────────────┐
12
+ │ Layer 4: ascend_compat.doctor │
13
+ │ Environment validation, error translation, │
14
+ │ diagnostics CLI │
15
+ ├─────────────────────────────────────────────────────┤
16
+ │ Layer 3: ascend_compat.ecosystem │
17
+ │ HuggingFace, DeepSpeed, flash-attn, vLLM shims │
18
+ ├─────────────────────────────────────────────────────┤
19
+ │ Layer 2: ascend_compat.cuda_shim │
20
+ │ torch.cuda API interception + intelligent routing │
21
+ ├─────────────────────────────────────────────────────┤
22
+ │ Layer 1: torch_npu (Huawei — already exists) │
23
+ │ PrivateUse1 backend, C++ dispatch, CANN/ACL │
24
+ └─────────────────────────────────────────────────────┘
25
+
26
+ Activation Modes
27
+ ----------------
28
+ ``import ascend_compat`` does **not** automatically patch ``torch.cuda``
29
+ by default. This is a deliberate design choice: imports should not have
30
+ global side effects, especially when cuda-morph might be imported
31
+ transitively by a library.
32
+
33
+ There are three ways to activate the shim:
34
+
35
+ 1. **Explicit activation** (recommended for applications)::
36
+
37
+ import ascend_compat
38
+ ascend_compat.activate()
39
+
40
+ 2. **CLI launcher** (recommended for running existing scripts unchanged)::
41
+
42
+ cuda-morph run script.py
43
+
44
+ 3. **Environment variable** (opt-in to auto-activate on import)::
45
+
46
+ export ASCEND_COMPAT_AUTO_ACTIVATE=1
47
+ python script.py # import ascend_compat now auto-activates
48
+
49
+ To prevent activation entirely (e.g. in testing)::
50
+
51
+ export ASCEND_COMPAT_NO_PATCH=1
52
+
53
+ After activation the shim:
54
+ - Detects your hardware (NPU > CUDA > CPU)
55
+ - Routes ``torch.cuda.*`` calls to ``torch.npu.*`` equivalents
56
+ - Makes ``torch.cuda.is_available()`` return ``False`` to prevent the
57
+ NCCL-vs-HCCL misdetection bug in accelerate/DeepSpeed
58
+ - Patches ``torch.device("cuda")`` → ``torch.device("npu")``
59
+ - Patches ``Tensor.cuda()`` → ``Tensor.npu()``
60
+
61
+ For ecosystem-specific fixes::
62
+
63
+ from ascend_compat.ecosystem import transformers_patch
64
+ transformers_patch.apply() # Fixes device_map="auto" on NPU
65
+
66
+ from ascend_compat.ecosystem import flash_attn # Drop-in flash_attn replacement
67
+
68
+ Observability
69
+ -------------
70
+ After activation, you can inspect which patches are being hit::
71
+
72
+ stats = ascend_compat.get_patch_stats()
73
+ # => {"cuda.is_available": 42, "torch.device": 137, ...}
74
+
75
+ Migration from v0.2.x
76
+ ---------------------
77
+ In v0.2.x, ``import ascend_compat`` auto-activated the shim. As of v0.3.0+,
78
+ you must explicitly call ``ascend_compat.activate()`` or use the CLI launcher.
79
+ See MIGRATION.md for details.
80
+
81
+ Environment Variables
82
+ ---------------------
83
+ ``ASCEND_COMPAT_AUTO_ACTIVATE``
84
+ Set to ``1`` to auto-activate on ``import ascend_compat``.
85
+ ``ASCEND_COMPAT_LOG_LEVEL``
86
+ Set to ``DEBUG`` to see every API translation. Default: ``WARNING``.
87
+ ``ASCEND_COMPAT_NO_PATCH``
88
+ Set to ``1`` to prevent activation entirely (even explicit calls).
89
+ """
90
+
91
+ from __future__ import annotations
92
+
93
+ import os
94
+ import warnings
95
+
96
+ __version__ = "0.9.0"
97
+
98
+ # Core infrastructure (always available — no side effects)
99
+ from ascend_compat._backend import (
100
+ Backend,
101
+ detect_backends,
102
+ has_cuda,
103
+ has_mlu,
104
+ has_npu,
105
+ has_rocm,
106
+ has_xpu,
107
+ preferred_backend,
108
+ )
109
+ from ascend_compat._logging import set_log_level
110
+
111
+ # Layer 2: CUDA shim (activation is explicit, not on import)
112
+ from ascend_compat.cuda_shim import (
113
+ activate,
114
+ deactivate,
115
+ get_all_patch_stats,
116
+ get_patch_stats,
117
+ is_activated,
118
+ reset_patch_stats,
119
+ )
120
+
121
+ # ---------------------------------------------------------------------------
122
+ # Backward compatibility: deprecation warning for v0.2.x users
123
+ # ---------------------------------------------------------------------------
124
+ # In v0.2.x, `import ascend_compat` auto-activated the shim. We removed that
125
+ # in v0.3.0+ because library imports shouldn't have global side effects.
126
+ # Emit a one-time deprecation warning if the user appears to be relying on
127
+ # the old behavior (i.e. they imported us but haven't called activate()).
128
+
129
+ import atexit as _atexit
130
+ import sys as _sys
131
+
132
+
133
+ def _check_activation_at_exit() -> None:
134
+ """Emit a deprecation warning at exit if shim was never activated.
135
+
136
+ This catches the v0.2.x pattern where users relied on import-time
137
+ activation. We only warn if:
138
+ 1. The shim was imported but never activated
139
+ 2. We're not in a test runner (pytest sets 'pytest' in sys.modules)
140
+ 3. We haven't already warned
141
+ """
142
+ if is_activated():
143
+ return
144
+ if "pytest" in _sys.modules:
145
+ return # Don't warn during testing
146
+ if os.environ.get("ASCEND_COMPAT_NO_PATCH", "").strip() == "1":
147
+ return # User explicitly disabled patches
148
+
149
+ # Check if ascend_compat was imported in user code (not just transitively)
150
+ import_in_main = False
151
+ main_mod = _sys.modules.get("__main__")
152
+ if main_mod is not None:
153
+ src = getattr(main_mod, "__file__", "") or ""
154
+ if src:
155
+ try:
156
+ with open(src) as f:
157
+ content = f.read()
158
+ import_in_main = "ascend_compat" in content
159
+ except (OSError, IOError):
160
+ pass
161
+
162
+ if import_in_main:
163
+ warnings.warn(
164
+ "cuda-morph was imported but activate() was never called. "
165
+ "Since v0.3.0, auto-activation on import is removed. "
166
+ "Add `ascend_compat.activate()` after import, use "
167
+ "`cuda-morph run script.py`, or set "
168
+ "ASCEND_COMPAT_AUTO_ACTIVATE=1. "
169
+ "See MIGRATION.md for details.",
170
+ DeprecationWarning,
171
+ stacklevel=1,
172
+ )
173
+
174
+
175
+ _atexit.register(_check_activation_at_exit)
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # Conditional auto-activation
179
+ # ---------------------------------------------------------------------------
180
+ # We only auto-activate when the user has explicitly opted in via env var.
181
+ # This prevents the "library import has global side effects" problem.
182
+ # The CLI launcher (cuda-morph run) sets this var automatically.
183
+ if os.environ.get("ASCEND_COMPAT_AUTO_ACTIVATE", "").strip() == "1":
184
+ activate()
185
+
186
+ __all__ = [
187
+ # Version
188
+ "__version__",
189
+ # Backend introspection
190
+ "Backend",
191
+ "detect_backends",
192
+ "preferred_backend",
193
+ "has_npu",
194
+ "has_mlu",
195
+ "has_rocm",
196
+ "has_xpu",
197
+ "has_cuda",
198
+ # Shim control
199
+ "activate",
200
+ "deactivate",
201
+ "is_activated",
202
+ "set_log_level",
203
+ # Telemetry
204
+ "get_patch_stats",
205
+ "get_all_patch_stats",
206
+ "reset_patch_stats",
207
+ ]
@@ -0,0 +1,34 @@
1
+ """Support ``python -m ascend_compat`` as a launcher.
2
+
3
+ Usage::
4
+
5
+ # Run a script with all shims active:
6
+ python -m ascend_compat run train.py --batch-size 32
7
+
8
+ # Equivalent to adding ``import ascend_compat`` at the top of train.py,
9
+ # plus auto-applying ecosystem patches.
10
+
11
+ # Or use any CLI command:
12
+ python -m ascend_compat doctor
13
+ python -m ascend_compat check model.py
14
+ python -m ascend_compat error 507035
15
+
16
+ The ``run`` subcommand is the primary addition here. It:
17
+ 1. Activates the cuda_shim (torch.cuda → torch.npu)
18
+ 2. Installs the flash_attn import hook
19
+ 3. Applies ecosystem patches (transformers, deepspeed)
20
+ 4. Executes the user's script with full compatibility
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import sys
26
+
27
+
28
+ def main() -> None:
29
+ from ascend_compat.cli import main as cli_main
30
+ cli_main()
31
+
32
+
33
+ if __name__ == "__main__":
34
+ main()
@@ -0,0 +1,334 @@
1
+ """Backend detection and capability probing.
2
+
3
+ This module is the single source of truth for "what hardware is available right
4
+ now?" Every other module in cuda-morph imports from here rather than
5
+ re-running its own detection logic.
6
+
7
+ Architecture note
8
+ -----------------
9
+ We intentionally *lazy-import* ``torch``, ``torch_npu``, etc. so that
10
+ cuda-morph can be imported even when PyTorch isn't installed (useful
11
+ for the CLI static-analysis tool ``cuda-morph check``).
12
+
13
+ Multi-backend support
14
+ ---------------------
15
+ cuda-morph supports multiple domestic AI chip backends:
16
+
17
+ 1. **Ascend NPU** via ``torch_npu`` (Huawei)
18
+ 2. **Cambricon MLU** via ``torch_mlu`` (Cambricon)
19
+ 3. **NVIDIA CUDA** via ``torch.cuda`` (reference/fallback)
20
+ 4. **CPU** — always available, used for development & CI
21
+
22
+ Backend detection uses the pluggable registry in ``backends/``. Each
23
+ backend module implements a common protocol (``BackendInfo``). The detection
24
+ loop probes each registered backend in priority order and selects the first
25
+ one that reports hardware available.
26
+
27
+ Why a dedicated module?
28
+ -----------------------
29
+ Centralising detection avoids import-order bugs. For example, if ``device.py``
30
+ and ``memory.py`` both independently tried ``import torch_npu``, a race or
31
+ circular-import could surface. By funnelling everything through ``_backend``
32
+ we guarantee a single, well-ordered detection pass.
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ import enum
38
+ import functools
39
+ from typing import Any, Dict, Optional, Type
40
+
41
+ from ascend_compat._logging import get_logger
42
+
43
+ logger = get_logger(__name__)
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # Backend enumeration
48
+ # ---------------------------------------------------------------------------
49
+
50
+
51
+ class Backend(enum.Enum):
52
+ """Available compute backends, ordered by preference."""
53
+
54
+ NPU = "npu" # Huawei Ascend via torch_npu
55
+ MLU = "mlu" # Cambricon via torch_mlu
56
+ ROCM = "rocm" # AMD via ROCm/HIP (presents as "cuda" device)
57
+ XPU = "xpu" # Intel via IPEX/Level Zero
58
+ CUDA = "cuda" # NVIDIA via torch.cuda
59
+ CPU = "cpu" # Always available
60
+
61
+
62
+ # Map backend device type strings to enum values
63
+ _BACKEND_DEVICE_TYPES: Dict[str, Backend] = {
64
+ "npu": Backend.NPU,
65
+ "mlu": Backend.MLU,
66
+ "rocm": Backend.ROCM,
67
+ "xpu": Backend.XPU,
68
+ "cuda": Backend.CUDA,
69
+ "cpu": Backend.CPU,
70
+ }
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # Lazy module references (populated on first access)
75
+ # ---------------------------------------------------------------------------
76
+
77
+ _torch: Optional[Any] = None
78
+ _torch_npu: Optional[Any] = None
79
+
80
+
81
+ def _import_torch() -> Any:
82
+ """Lazily import torch, caching the result."""
83
+ global _torch # noqa: PLW0603
84
+ if _torch is None:
85
+ try:
86
+ import torch # type: ignore[import-untyped]
87
+ _torch = torch
88
+ except ImportError:
89
+ raise ImportError(
90
+ "PyTorch is required but not installed. "
91
+ "Install it with: pip install torch>=2.0"
92
+ ) from None
93
+ return _torch
94
+
95
+
96
+ def _import_torch_npu() -> Optional[Any]:
97
+ """Lazily import torch_npu, returning None if unavailable.
98
+
99
+ torch_npu is Huawei's official PyTorch adapter for Ascend NPUs.
100
+ It monkey-patches torch to add NPU device support. If it isn't
101
+ installed, we gracefully fall back to CUDA or CPU.
102
+
103
+ See: https://gitee.com/ascend/pytorch
104
+ """
105
+ global _torch_npu # noqa: PLW0603
106
+ if _torch_npu is None:
107
+ try:
108
+ import torch_npu # type: ignore[import-untyped]
109
+ _torch_npu = torch_npu
110
+ logger.debug("torch_npu imported successfully — Ascend backend available")
111
+ except ImportError:
112
+ logger.debug("torch_npu not found — Ascend backend unavailable")
113
+ _torch_npu = False # sentinel: tried and failed
114
+ return _torch_npu if _torch_npu is not False else None
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # Active backend tracking
119
+ # ---------------------------------------------------------------------------
120
+
121
+ _active_backend_info: Optional[Any] = None # BackendInfo subclass, set by activate()
122
+
123
+
124
+ def get_active_backend_info() -> Optional[Any]:
125
+ """Return the active backend's BackendInfo, or None if not set."""
126
+ return _active_backend_info
127
+
128
+
129
+ def set_active_backend_info(info: Optional[Any]) -> None:
130
+ """Set the active backend info (called by activate())."""
131
+ global _active_backend_info # noqa: PLW0603
132
+ _active_backend_info = info
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # Detection logic
137
+ # ---------------------------------------------------------------------------
138
+
139
+
140
+ @functools.lru_cache(maxsize=1)
141
+ def detect_backends() -> tuple[Backend, ...]:
142
+ """Probe the system and return all available backends, best-first.
143
+
144
+ The result is cached for the lifetime of the process because hardware
145
+ doesn't change at runtime.
146
+
147
+ Detection order:
148
+ 1. Check each registered backend in the pluggable registry
149
+ 2. Check NVIDIA CUDA
150
+ 3. CPU (always available)
151
+
152
+ Returns:
153
+ Tuple of :class:`Backend` values, ordered from most-preferred to
154
+ least-preferred.
155
+ """
156
+ available: list[Backend] = []
157
+
158
+ # 1. Check pluggable backends from the registry
159
+ try:
160
+ from ascend_compat.backends import BACKEND_REGISTRY
161
+ for name, backend_cls in BACKEND_REGISTRY.items():
162
+ try:
163
+ if backend_cls.is_available():
164
+ device_type = backend_cls.device_type
165
+ backend_enum = _BACKEND_DEVICE_TYPES.get(device_type)
166
+ if backend_enum and backend_enum not in available:
167
+ available.append(backend_enum)
168
+ logger.info(
169
+ "%s detected (%d device(s))",
170
+ backend_cls.display_name,
171
+ backend_cls.device_count(),
172
+ )
173
+ except Exception as exc: # noqa: BLE001
174
+ logger.warning(
175
+ "Backend '%s' detection failed: %s", name, exc
176
+ )
177
+ except ImportError:
178
+ # Fallback: probe directly if backends package fails to import
179
+ logger.debug("backends package not available, using legacy detection")
180
+ _detect_legacy(available)
181
+
182
+ # 2. Check for NVIDIA CUDA (if not already found via registry)
183
+ if Backend.CUDA not in available:
184
+ torch = _import_torch()
185
+ if torch.cuda.is_available():
186
+ available.append(Backend.CUDA)
187
+ logger.info(
188
+ "NVIDIA CUDA detected (%d device(s))",
189
+ torch.cuda.device_count(),
190
+ )
191
+
192
+ # 3. CPU is always available
193
+ if Backend.CPU not in available:
194
+ available.append(Backend.CPU)
195
+
196
+ logger.debug("Detected backends (preference order): %s", available)
197
+ return tuple(available)
198
+
199
+
200
+ def _detect_legacy(available: list[Backend]) -> None:
201
+ """Legacy detection path (before pluggable backends existed).
202
+
203
+ This is the fallback for when the ``backends`` subpackage can't be
204
+ imported (e.g. during early development or if the package structure
205
+ changes).
206
+ """
207
+ # Check for Ascend NPU
208
+ npu_mod = _import_torch_npu()
209
+ if npu_mod is not None:
210
+ torch = _import_torch()
211
+ try:
212
+ if hasattr(torch, "npu") and torch.npu.is_available():
213
+ available.append(Backend.NPU)
214
+ logger.info(
215
+ "Ascend NPU detected (%d device(s))",
216
+ torch.npu.device_count(),
217
+ )
218
+ except Exception as exc: # noqa: BLE001
219
+ logger.warning("torch_npu installed but NPU detection failed: %s", exc)
220
+
221
+
222
+ @functools.lru_cache(maxsize=1)
223
+ def preferred_backend() -> Backend:
224
+ """Return the single best backend for this system.
225
+
226
+ This drives the default behaviour of ``cuda-morph`` — all CUDA
227
+ calls are routed to whichever backend this function returns.
228
+ """
229
+ return detect_backends()[0]
230
+
231
+
232
+ # ---------------------------------------------------------------------------
233
+ # Convenience predicates
234
+ # ---------------------------------------------------------------------------
235
+
236
+
237
+ def has_npu() -> bool:
238
+ """Return True if at least one Ascend NPU is usable."""
239
+ return Backend.NPU in detect_backends()
240
+
241
+
242
+ def has_mlu() -> bool:
243
+ """Return True if at least one Cambricon MLU is usable."""
244
+ return Backend.MLU in detect_backends()
245
+
246
+
247
+ def has_rocm() -> bool:
248
+ """Return True if AMD ROCm GPU is detected."""
249
+ return Backend.ROCM in detect_backends()
250
+
251
+
252
+ def has_xpu() -> bool:
253
+ """Return True if at least one Intel XPU is usable."""
254
+ return Backend.XPU in detect_backends()
255
+
256
+
257
+ def has_cuda() -> bool:
258
+ """Return True if at least one NVIDIA GPU is usable."""
259
+ return Backend.CUDA in detect_backends()
260
+
261
+
262
+ def get_torch() -> Any:
263
+ """Return the ``torch`` module (importing it if necessary).
264
+
265
+ This is the canonical way for other cuda-morph modules to get a
266
+ reference to torch without redundant try/except blocks.
267
+ """
268
+ return _import_torch()
269
+
270
+
271
+ def get_torch_npu() -> Optional[Any]:
272
+ """Return the ``torch_npu`` module, or None if not installed."""
273
+ return _import_torch_npu()
274
+
275
+
276
+ # ---------------------------------------------------------------------------
277
+ # Device-string translation
278
+ # ---------------------------------------------------------------------------
279
+
280
+
281
+ def translate_device_string(device: str) -> str:
282
+ """Translate a CUDA device string to the appropriate backend string.
283
+
284
+ Mapping rules:
285
+ - If a domestic backend (NPU, MLU) is preferred, ``"cuda"`` → backend device type
286
+ - If CUDA is preferred (or we're on CPU), return the string unchanged.
287
+
288
+ Args:
289
+ device: A PyTorch device string, e.g. ``"cuda"``, ``"cuda:0"``,
290
+ ``"cpu"``, ``"npu:1"``.
291
+
292
+ Returns:
293
+ The translated device string.
294
+
295
+ Examples::
296
+
297
+ # On an Ascend system:
298
+ translate_device_string("cuda") # → "npu"
299
+ translate_device_string("cuda:2") # → "npu:2"
300
+
301
+ # On a Cambricon system:
302
+ translate_device_string("cuda") # → "mlu"
303
+ translate_device_string("cuda:0") # → "mlu:0"
304
+
305
+ # On CPU:
306
+ translate_device_string("cuda") # → "cpu"
307
+ """
308
+ backend = preferred_backend()
309
+
310
+ # Backends that need "cuda" → their device type translation
311
+ _TRANSLATE_BACKENDS = {
312
+ Backend.NPU: "npu",
313
+ Backend.MLU: "mlu",
314
+ Backend.XPU: "xpu",
315
+ # ROCm does NOT need translation — it presents as "cuda" via HIP
316
+ }
317
+
318
+ if backend in _TRANSLATE_BACKENDS and device.startswith("cuda"):
319
+ target_type = _TRANSLATE_BACKENDS[backend]
320
+ translated = device.replace("cuda", target_type, 1)
321
+ logger.debug("Device string translated: %r → %r", device, translated)
322
+ return translated
323
+
324
+ if backend == Backend.CPU and device.startswith("cuda"):
325
+ # No accelerator at all — fall back to CPU so the code doesn't crash.
326
+ translated = "cpu"
327
+ logger.warning(
328
+ "No GPU/NPU/MLU available — translating device %r → 'cpu'. "
329
+ "Performance will be significantly lower.",
330
+ device,
331
+ )
332
+ return translated
333
+
334
+ return device
@@ -0,0 +1,69 @@
1
+ """Custom exception hierarchy for cuda-morph.
2
+
3
+ Provides distinct exception types so consumers can distinguish
4
+ cuda-morph errors from unrelated failures in their except clauses.
5
+
6
+ Usage::
7
+
8
+ from ascend_compat._exceptions import ActivationError
9
+
10
+ try:
11
+ ascend_compat.activate()
12
+ except ActivationError as e:
13
+ print(f"Shim activation failed: {e}")
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+
19
+ class AscendCompatError(Exception):
20
+ """Base exception for all cuda-morph errors.
21
+
22
+ Catch this to handle any error raised by the library without
23
+ catching unrelated exceptions.
24
+ """
25
+
26
+
27
+ class ActivationError(AscendCompatError):
28
+ """Raised when shim activation fails (e.g. patch application error).
29
+
30
+ The shim guarantees atomic rollback — if this is raised, no patches
31
+ were left in a half-applied state.
32
+ """
33
+
34
+
35
+ class BackendNotFoundError(AscendCompatError):
36
+ """Raised when a required backend or adapter is not available.
37
+
38
+ Examples:
39
+ - ``torch_npu`` is not installed but NPU operations are requested
40
+ - ``npu_fusion_attention`` is missing from torch_npu
41
+ """
42
+
43
+
44
+ class PatchError(AscendCompatError):
45
+ """Raised when an individual patch cannot be applied or reverted."""
46
+
47
+
48
+ class CompatibilityError(AscendCompatError):
49
+ """Raised when a version or compatibility check fails hard.
50
+
51
+ Soft failures emit warnings; this is for fatal incompatibilities
52
+ (e.g. known-bad torch_npu + PyTorch combinations).
53
+ """
54
+
55
+
56
+ class PortError(AscendCompatError):
57
+ """Raised when code porting/rewriting fails."""
58
+
59
+
60
+ class ValidationError(AscendCompatError):
61
+ """Raised when operator verification fails."""
62
+
63
+
64
+ class SecurityError(AscendCompatError):
65
+ """Raised when a security or integrity check fails."""
66
+
67
+
68
+ # Keep the public alias for backward compat
69
+ CudaMorphError = AscendCompatError