PyPI - kernels - Versions diffs - 0.4.3__tar.gz → 0.5.0.dev0__tar.gz - Mend

kernels 0.4.3tar.gz → 0.5.0.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{kernels-0.4.3/src/kernels.egg-info → kernels-0.5.0.dev0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: kernels
-Version: 0.4.3
+Version: 0.5.0.dev0
 Summary: Download compute kernels
 Author-email: OlivierDehaene <olivier@huggingface.co>, Daniel de Kok <daniel@huggingface.co>, David Holtz <david@huggingface.co>, Nicolas Patry <nicolas@huggingface.co>
 License: Apache-2.0
@@ -12,9 +12,21 @@ Requires-Dist: packaging>=20.0
 Requires-Dist: tomli>=2.0; python_version < "3.11"
 Provides-Extra: torch
 Requires-Dist: torch; extra == "torch"
+Dynamic: license-file
 # kernels
+<div align="center">
+<img src="https://github.com/user-attachments/assets/64a652f3-0cd3-4829-b3c1-df13f7933569" width="450" height="450" alt="kernel-builder logo">
+<p align="center">
+    <a href="https://pypi.org/project/kernels"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/kernels"></a>
+    <a href="https://github.com/huggingface/kernels/tags"><img alt="GitHub tag" src="https://img.shields.io/github/v/tag/huggingface/kernels"></a>
+    <a href="https://github.com/huggingface/kernels/actions/workflows/docker-build-push.yaml"><img alt="Test kernels" src="https://img.shields.io/github/actions/workflow/status/huggingface/kernels/test.yml?label=test"></a>
+</p>
+</div>
+<hr/>
 The Kernel Hub allows Python libraries and applications to load compute
 kernels directly from the [Hub](https://hf.co/). To support this kind
 of dynamic loading, Hub kernels differ from traditional Python kernel

{kernels-0.4.3 → kernels-0.5.0.dev0}/README.md RENAMED Viewed

@@ -1,5 +1,16 @@
 # kernels
+<div align="center">
+<img src="https://github.com/user-attachments/assets/64a652f3-0cd3-4829-b3c1-df13f7933569" width="450" height="450" alt="kernel-builder logo">
+<p align="center">
+    <a href="https://pypi.org/project/kernels"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/kernels"></a>
+    <a href="https://github.com/huggingface/kernels/tags"><img alt="GitHub tag" src="https://img.shields.io/github/v/tag/huggingface/kernels"></a>
+    <a href="https://github.com/huggingface/kernels/actions/workflows/docker-build-push.yaml"><img alt="Test kernels" src="https://img.shields.io/github/actions/workflow/status/huggingface/kernels/test.yml?label=test"></a>
+</p>
+</div>
+<hr/>
 The Kernel Hub allows Python libraries and applications to load compute
 kernels directly from the [Hub](https://hf.co/). To support this kind
 of dynamic loading, Hub kernels differ from traditional Python kernel

{kernels-0.4.3 → kernels-0.5.0.dev0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "kernels"
-version = "0.4.3"
+version = "0.5.0.dev0"
 description = "Download compute kernels"
 authors = [
   { name = "OlivierDehaene", email = "olivier@huggingface.co" },

{kernels-0.4.3 → kernels-0.5.0.dev0}/src/kernels/__init__.py RENAMED Viewed

@@ -9,6 +9,7 @@ from kernels.layer import (
 from kernels.utils import (
     get_kernel,
     get_locked_kernel,
+    has_kernel,
     install_kernel,
     load_kernel,
 )
@@ -16,6 +17,7 @@ from kernels.utils import (
 __all__ = [
     "get_kernel",
     "get_locked_kernel",
+    "has_kernel",
     "load_kernel",
     "install_kernel",
     "use_kernel_forward_from_hub",

{kernels-0.4.3 → kernels-0.5.0.dev0}/src/kernels/layer.py RENAMED Viewed

@@ -4,7 +4,7 @@ import warnings
 from contextvars import ContextVar
 from copy import deepcopy
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Callable, Dict, Union
+from typing import TYPE_CHECKING, Dict, Union
 from .utils import get_kernel
@@ -131,12 +131,15 @@ def replace_kernel_forward_from_hub(cls, layer_name: str, *, use_fallback: bool
     fallback_forward = cls.forward
-    cached_forward: Dict[LayerRepository, Callable] = {}
+    cached_layer: Dict[LayerRepository, nn.Module] = {}
     def forward(self, x, *args, **kwargs):
         if _DISABLE_KERNEL_MAPPING:
             return fallback_forward(self, x, *args, **kwargs)
+        needs_backward = self.training
+        is_compiling = _is_torchdynamo_compiling()
         kernel = _KERNEL_MAPPING.get().get(layer_name)
         if kernel is None:
             warnings.warn(
@@ -162,9 +165,18 @@ def replace_kernel_forward_from_hub(cls, layer_name: str, *, use_fallback: bool
             return fallback_forward(self, x, *args, **kwargs)
         # Short-circuit if we already loaded the layer.
-        layer_forward = cached_forward.get(repo, None)
-        if layer_forward is not None:
-            return layer_forward(self, x, *args, **kwargs)
+        layer = cached_layer.get(repo, None)
+        if layer is not None:
+            # Switch to fallback when the layer does not support:
+            # compilation/compile when needed.
+            # backward when needed
+            needs_fallback = needs_backward and not getattr(layer, "has_backward", True)
+            needs_fallback |= is_compiling and not getattr(
+                layer, "can_torch_compile", False
+            )
+            if needs_fallback:
+                return fallback_forward(self, x, *args, **kwargs)
+            return layer.forward(self, x, *args, **kwargs)
         layer = _get_kernel_layer(
             repo_id=repo.repo_id,
@@ -180,10 +192,18 @@ def replace_kernel_forward_from_hub(cls, layer_name: str, *, use_fallback: bool
         finally:
             cls.forward = orig_forward
-        layer_forward = layer.forward
-        cached_forward[repo] = layer_forward
+        cached_layer[repo] = layer
+        # Switch to fallback when the layer does not support
+        # compilation/compile when needed.
+        needs_fallback = needs_backward and not getattr(layer, "has_backward", True)
+        needs_fallback |= is_compiling and not getattr(
+            layer, "can_torch_compile", False
+        )
+        if needs_fallback:
+            return fallback_forward(self, x, *args, **kwargs)
-        return layer_forward(self, x, *args, **kwargs)
+        return layer.forward(self, x, *args, **kwargs)
     cls.forward = forward
@@ -240,7 +260,9 @@ def _validate_layer(*, check_cls, cls):
     # ... or predefined member variables.
     torch_module_members = {name for name, _ in inspect.getmembers(nn.Module)}
     cls_members = {name for name, _ in inspect.getmembers(cls)}
-    if cls_members - torch_module_members != set():
+    difference = cls_members - torch_module_members
+    # verify if : difference ⊄ {"can_torch_compile", "has_backward"}
+    if not difference <= {"can_torch_compile", "has_backward"}:
         raise TypeError("Layer must not contain additional members.")
     # Check whether the forward signatures are similar.
@@ -257,3 +279,19 @@ def _validate_layer(*, check_cls, cls):
             raise TypeError(
                 f"Forward signature does not match: different kind of arguments ({param} ({param.kind}) and {ref_param} ({ref_param.kind})"
             )
+def _is_torchdynamo_compiling():
+    # Importing torch._dynamo causes issues with PyTorch profiler (https://github.com/pytorch/pytorch/issues/130622)
+    # hence rather relying on `torch.compiler.is_compiling()` when possible (torch>=2.3)
+    try:
+        import torch
+        return torch.compiler.is_compiling()
+    except Exception:
+        try:
+            import torch._dynamo as dynamo  # noqa: F401
+            return dynamo.is_compiling()
+        except Exception:
+            return False

{kernels-0.4.3 → kernels-0.5.0.dev0}/src/kernels/utils.py RENAMED Viewed

@@ -13,7 +13,7 @@ from pathlib import Path
 from types import ModuleType
 from typing import Dict, List, Optional, Tuple
-from huggingface_hub import snapshot_download
+from huggingface_hub import file_exists, snapshot_download
 from packaging.version import parse
 from kernels.lockfile import KernelLock, VariantLock
@@ -161,6 +161,29 @@ def get_kernel(repo_id: str, revision: str = "main") -> ModuleType:
     return import_from_path(package_name, package_path / package_name / "__init__.py")
+def has_kernel(repo_id: str, revision: str = "main") -> bool:
+    """
+    Check whether a kernel build exists for the current environment
+    (Torch version and compute framework).
+    """
+    package_name = package_name_from_repo_id(repo_id)
+    variant = build_variant()
+    universal_variant = universal_build_variant()
+    if file_exists(
+        repo_id,
+        revision=revision,
+        filename=f"build/{universal_variant}/{package_name}/__init__.py",
+    ):
+        return True
+    return file_exists(
+        repo_id,
+        revision=revision,
+        filename=f"build/{variant}/{package_name}/__init__.py",
+    )
 def load_kernel(repo_id: str, *, lockfile: Optional[Path] = None) -> ModuleType:
     """
     Get a pre-downloaded, locked kernel.

{kernels-0.4.3 → kernels-0.5.0.dev0/src/kernels.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: kernels
-Version: 0.4.3
+Version: 0.5.0.dev0
 Summary: Download compute kernels
 Author-email: OlivierDehaene <olivier@huggingface.co>, Daniel de Kok <daniel@huggingface.co>, David Holtz <david@huggingface.co>, Nicolas Patry <nicolas@huggingface.co>
 License: Apache-2.0
@@ -12,9 +12,21 @@ Requires-Dist: packaging>=20.0
 Requires-Dist: tomli>=2.0; python_version < "3.11"
 Provides-Extra: torch
 Requires-Dist: torch; extra == "torch"
+Dynamic: license-file
 # kernels
+<div align="center">
+<img src="https://github.com/user-attachments/assets/64a652f3-0cd3-4829-b3c1-df13f7933569" width="450" height="450" alt="kernel-builder logo">
+<p align="center">
+    <a href="https://pypi.org/project/kernels"><img alt="PyPI - Version" src="https://img.shields.io/pypi/v/kernels"></a>
+    <a href="https://github.com/huggingface/kernels/tags"><img alt="GitHub tag" src="https://img.shields.io/github/v/tag/huggingface/kernels"></a>
+    <a href="https://github.com/huggingface/kernels/actions/workflows/docker-build-push.yaml"><img alt="Test kernels" src="https://img.shields.io/github/actions/workflow/status/huggingface/kernels/test.yml?label=test"></a>
+</p>
+</div>
+<hr/>
 The Kernel Hub allows Python libraries and applications to load compute
 kernels directly from the [Hub](https://hf.co/). To support this kind
 of dynamic loading, Hub kernels differ from traditional Python kernel

{kernels-0.4.3 → kernels-0.5.0.dev0}/tests/test_basic.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import pytest
 import torch
-from kernels import get_kernel
+from kernels import get_kernel, has_kernel
 @pytest.fixture
@@ -36,6 +36,22 @@ def test_gelu_fast(kernel, device):
     assert torch.allclose(y, expected)
+@pytest.mark.parametrize(
+    "kernel_exists",
+    [
+        ("kernels-community/activation", "main", True),
+        ("kernels-community/triton-layer-norm", "main", True),
+        # Repo only contains Torch 2.4 kernels (and we don't
+        # support/test against this version).
+        ("kernels-test/only-torch-2.4", "main", False),
+        ("google-bert/bert-base-uncased", "87565a309", False),
+    ],
+)
+def test_has_kernel(kernel_exists):
+    repo_id, revision, kernel = kernel_exists
+    assert has_kernel(repo_id, revision=revision) == kernel
 def test_universal_kernel(universal_kernel):
     torch.manual_seed(0)
     A = torch.randint(-10, 10, (64, 128), dtype=torch.int8, device="cuda")

{kernels-0.4.3 → kernels-0.5.0.dev0}/tests/test_layer.py RENAMED Viewed

@@ -19,6 +19,12 @@ kernel_layer_mapping = {
             revision="layers",
         )
     },
+    "SiluAndMulNoCompile": {
+        "cuda": LayerRepository(
+            repo_id="kernels-test/op-without-fake-test",
+            layer_name="SiluAndMul",
+        )
+    },
     "SiluAndMulStringDevice": {
         "cuda": LayerRepository(
             repo_id="kernels-community/activation",
@@ -43,6 +49,11 @@ class SiluAndMul(nn.Module):
         return F.silu(input[..., :d]) * input[..., d:]
+@use_kernel_forward_from_hub("SiluAndMulNoCompile")
+class SiluAndMulNoCompileKernel(SiluAndMul):
+    pass
 @use_kernel_forward_from_hub("SiluAndMul")
 class SiluAndMulWithKernel(SiluAndMul):
     pass
@@ -101,8 +112,29 @@ def test_layer_fallback_works():
     SiluAndMulWithKernelFallback()
+@pytest.mark.parametrize("cls", [SiluAndMulWithKernel, SiluAndMulNoCompileKernel])
+@pytest.mark.parametrize("device", ["cuda", "cpu"])
+def test_torch_compile_layer(cls, device):
+    silu_and_mul = SiluAndMul()
+    X = torch.randn((32, 64), dtype=torch.float32, device=device)
+    Y = silu_and_mul(X)
+    silu_and_mul_with_kernel = cls()
+    silu_and_mul_with_kernel.eval()
+    silu_and_mul_compiled = torch.compile(silu_and_mul_with_kernel)
+    Y_compiled = silu_and_mul_compiled(X)
+    torch.testing.assert_close(Y_compiled, Y)
 def test_mapping_contexts():
-    assert set(_KERNEL_MAPPING.get().keys()) == {"SiluAndMul", "SiluAndMulStringDevice"}
+    assert set(_KERNEL_MAPPING.get().keys()) == {
+        "SiluAndMul",
+        "SiluAndMulStringDevice",
+        "SiluAndMulNoCompile",
+    }
     extra_mapping1 = {
         "TestKernel": {
@@ -118,6 +150,7 @@ def test_mapping_contexts():
         assert set(_KERNEL_MAPPING.get().keys()) == {
             "SiluAndMul",
             "SiluAndMulStringDevice",
+            "SiluAndMulNoCompile",
             "TestKernel",
         }
@@ -135,6 +168,7 @@ def test_mapping_contexts():
             assert set(_KERNEL_MAPPING.get().keys()) == {
                 "SiluAndMul",
                 "SiluAndMulStringDevice",
+                "SiluAndMulNoCompile",
                 "TestKernel",
             }
             assert (
@@ -145,6 +179,7 @@ def test_mapping_contexts():
         assert set(_KERNEL_MAPPING.get().keys()) == {
             "SiluAndMul",
             "SiluAndMulStringDevice",
+            "SiluAndMulNoCompile",
             "TestKernel",
         }
         assert (
@@ -164,6 +199,7 @@ def test_mapping_contexts():
         assert set(_KERNEL_MAPPING.get().keys()) == {
             "SiluAndMul",
             "SiluAndMulStringDevice",
+            "SiluAndMulNoCompile",
             "TestKernel",
         }
         assert (
@@ -174,6 +210,7 @@ def test_mapping_contexts():
     assert set(_KERNEL_MAPPING.get().keys()) == {
         "SiluAndMul",
         "SiluAndMulStringDevice",
+        "SiluAndMulNoCompile",
     }
@@ -203,3 +240,75 @@ def test_validate_kernel_layer():
     with pytest.raises(TypeError, match="different kind of arguments"):
         _validate_layer(cls=BadLayer4, check_cls=SiluAndMul)
+def test_fallback_used_when_training():
+    @use_kernel_forward_from_hub("Linear")
+    class TorchLinear(nn.Linear):
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+            # Used to check that we called hub kernel.
+            self.n_calls = 0
+        def forward(self, input: torch.Tensor) -> torch.Tensor:
+            self.n_calls += 1
+            return super().forward(input)
+    linear = TorchLinear(32, 32).to("cuda")
+    with use_kernel_mapping(
+        {
+            "Linear": {
+                Device(type="cuda"): LayerRepository(
+                    repo_id="kernels-test/backward-marker-test",
+                    layer_name="LinearImplicitBackward",
+                )
+            }
+        }
+    ):
+        linear.train()
+        X = torch.randn(10, 32, device="cuda")
+        linear(X)
+        assert linear.n_calls == 0
+        linear.eval()
+        linear(X)
+        assert linear.n_calls == 0
+    with use_kernel_mapping(
+        {
+            "Linear": {
+                Device(type="cuda"): LayerRepository(
+                    repo_id="kernels-test/backward-marker-test",
+                    layer_name="LinearBackward",
+                )
+            }
+        }
+    ):
+        linear.train()
+        X = torch.randn(10, 32, device="cuda")
+        linear(X)
+        assert linear.n_calls == 0
+        linear.eval()
+        linear(X)
+        assert linear.n_calls == 0
+    with use_kernel_mapping(
+        {
+            "Linear": {
+                Device(type="cuda"): LayerRepository(
+                    repo_id="kernels-test/backward-marker-test",
+                    layer_name="LinearNoBackward",
+                )
+            }
+        }
+    ):
+        linear.train()
+        X = torch.randn(10, 32, device="cuda")
+        linear(X)
+        assert linear.n_calls == 1
+        linear.eval()
+        linear(X)
+        assert linear.n_calls == 1