PyPI - ninetoothed - Versions diffs - 0.15.0__tar.gz → 0.15.1__tar.gz - Mend

ninetoothed 0.15.0tar.gz → 0.15.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ninetoothed
-Version: 0.15.0
+Version: 0.15.1
 Summary: A domain-specific language based on Triton but providing higher-level abstraction.
 Project-URL: Homepage, https://github.com/InfiniTensor/ninetoothed
 Project-URL: Issues, https://github.com/InfiniTensor/ninetoothed/issues

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "ninetoothed"
-version = "0.15.0"
+version = "0.15.1"
 authors = [{ name = "Jiacheng Huang", email = "huangjiacheng0709@outlook.com" }]
 description = "A domain-specific language based on Triton but providing higher-level abstraction."
 readme = "README.md"

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/aot.py RENAMED Viewed

@@ -4,7 +4,7 @@ import subprocess
 import tempfile
 import uuid
-from ninetoothed.dtype import int64, uint64
+from ninetoothed.dtype import int64
 from ninetoothed.generation import CACHE_DIR, CodeGenerator
 from ninetoothed.tensor import Tensor
@@ -55,7 +55,7 @@ def _aot(func, caller, kernel_name, num_warps, num_stages):
             param_types.append(f"*{dtype}")
         elif Tensor.size_pattern().fullmatch(param):
-            param_types.append(uint64)
+            param_types.append(int64)
         elif Tensor.stride_pattern().fullmatch(param):
             param_types.append(int64)

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/generation.py RENAMED Viewed

@@ -82,6 +82,7 @@ class CodeGenerator(ast.NodeTransformer):
         dependencies = _find_dependencies(func)
         source = "\n\n".join((unparsed, dependencies)).strip()
         source = source.replace(func.__name__, kernel_name)
+        source += "\n"
         if prettify:
             for original, simplified in name_collector.simplified_names.items():

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/tensor.py RENAMED Viewed

@@ -146,7 +146,7 @@ class Tensor:
             )
             outer_shape.append(new_size)
-            new_stride = self_stride * stride // spacing
+            new_stride = self_stride * stride
             outer_strides.append(new_stride)
             inner_shape.append(tile_size)

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/visualization.py RENAMED Viewed

@@ -118,10 +118,16 @@ def _visualize_unit_square(ax, x, y, color):
 def _visualize_rect(ax, width, height, x, y, color):
-    pos_x, pos_y = zip(*_verts_of_rect(width, height, x, y))
-    ax.fill(pos_x, pos_y, color)
-    ax.plot(pos_x + (pos_x[0],), pos_y + (pos_y[0],), "k")
+    ax.add_patch(
+        plt.Rectangle(
+            (x, y),
+            width,
+            height,
+            edgecolor="k",
+            facecolor=color,
+            linewidth=plt.rcParams["lines.linewidth"],
+        )
+    )
 def _verts_of_rect(width, height, x, y):

ninetoothed-0.15.1/tests/test_aot.py ADDED Viewed

@@ -0,0 +1,153 @@
+import ctypes
+import functools
+import subprocess
+import torch
+import torch.nn.functional as F
+import ninetoothed
+import ninetoothed.generation
+import tests.test_conv2d as conv2d
+import tests.test_matmul as matmul
+from ninetoothed import Tensor
+from tests.skippers import skip_if_cuda_not_available
+@skip_if_cuda_not_available
+class TestCUDA:
+    @classmethod
+    def setup_class(cls):
+        torch.manual_seed(0)
+    def test_matmul(self):
+        arrangement = functools.partial(
+            matmul.arrangement, BLOCK_SIZE_M=64, BLOCK_SIZE_N=64, BLOCK_SIZE_K=64
+        )
+        application = matmul.application
+        tensors = tuple(Tensor(2, dtype=ninetoothed.float16) for _ in range(3))
+        caller = "cuda"
+        kernel_name = "matmul"
+        output_dir = ninetoothed.generation.CACHE_DIR
+        launch_func = _generate_launch_func(
+            arrangement,
+            application,
+            tensors,
+            caller=caller,
+            kernel_name=kernel_name,
+            output_dir=output_dir,
+        )
+        shape = (512, 512)
+        dtype = torch.float16
+        device = caller
+        lhs = torch.randn(shape, dtype=dtype, device=device)
+        rhs = torch.randn(shape, dtype=dtype, device=device)
+        output = torch.empty((lhs.shape[0], rhs.shape[1]), dtype=dtype, device=device)
+        _run_launch_func(launch_func, lhs, rhs, output)
+        assert torch.allclose(output, torch.matmul(lhs, rhs))
+    def test_conv2d(self):
+        arrangement = functools.partial(
+            conv2d.arrangement, BLOCK_SIZE_M=64, BLOCK_SIZE_N=64, BLOCK_SIZE_K=64
+        )
+        application = matmul.application
+        tensors = tuple(Tensor(4, dtype=ninetoothed.float16) for _ in range(3))
+        caller = "cuda"
+        kernel_name = "conv2d"
+        output_dir = ninetoothed.generation.CACHE_DIR
+        launch_func = _generate_launch_func(
+            arrangement,
+            application,
+            tensors,
+            caller=caller,
+            kernel_name=kernel_name,
+            output_dir=output_dir,
+        )
+        n, c, h, w = 4, 64, 16, 16
+        k, _, r, s = 512, c, 3, 3
+        p = h - r + 1
+        q = w - s + 1
+        dtype = torch.float16
+        device = caller
+        input = torch.randn(n, c, h, w, dtype=dtype, device=device)
+        filter = torch.randn(k, c, r, s, dtype=dtype, device=device)
+        output = torch.empty(n, k, p, q, dtype=dtype, device=device)
+        _run_launch_func(launch_func, input, filter, output)
+        assert torch.allclose(output, F.conv2d(input, filter), atol=0.001, rtol=0.001)
+class _ArgumentTensor(ctypes.Structure):
+    _fields_ = [
+        ("data", ctypes.c_void_p),
+        ("shape", ctypes.POINTER(ctypes.c_uint64)),
+        ("strides", ctypes.POINTER(ctypes.c_int64)),
+    ]
+    @staticmethod
+    def from_torch_tensor(tensor):
+        data = ctypes.c_void_p(tensor.data_ptr())
+        shape = (ctypes.c_uint64 * len(tensor.shape))(*tensor.shape)
+        strides = (ctypes.c_int64 * len(tensor.stride()))(*tensor.stride())
+        return _ArgumentTensor(data, shape, strides)
+def _run_launch_func(launch_func, *tensors):
+    stream = torch.cuda.Stream()
+    arg_tensors = tuple(_ArgumentTensor.from_torch_tensor(tensor) for tensor in tensors)
+    with torch.cuda.stream(stream):
+        launch_func(ctypes.c_void_p(stream.cuda_stream), *arg_tensors)
+    stream.synchronize()
+def _generate_launch_func(
+    arrangement, application, tensors, caller, kernel_name, output_dir
+):
+    ninetoothed.make(
+        arrangement,
+        application,
+        tensors,
+        caller=caller,
+        kernel_name=kernel_name,
+        output_dir=output_dir,
+    )
+    _compile_library(kernel_name, output_dir)
+    library = _load_library(kernel_name, output_dir)
+    launch_func_name = f"launch_{kernel_name}"
+    launch_func = getattr(library, launch_func_name)
+    launch_func.argtypes = (ctypes.c_void_p,) + tuple(_ArgumentTensor for _ in tensors)
+    launch_func.restype = ctypes.c_int
+    return launch_func
+def _compile_library(kernel_name, output_dir):
+    command = [
+        "nvcc",
+        "-shared",
+        "-Xcompiler",
+        "-fPIC",
+        "-lcuda",
+        "-o",
+        output_dir / f"{kernel_name}.so",
+        output_dir / f"{kernel_name}.c",
+    ]
+    subprocess.run(command, check=True)
+def _load_library(kernel_name, kernel_dir):
+    return ctypes.CDLL(kernel_dir / f"{kernel_name}.so")

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/test_conv2d.py RENAMED Viewed

@@ -1,3 +1,5 @@
+import functools
 import torch
 import torch.nn.functional as F
@@ -7,7 +9,14 @@ from ninetoothed import Tensor
 from tests.skippers import skip_if_cuda_not_available
-def arrangement(input, filter, output):
+def arrangement(
+    input,
+    filter,
+    output,
+    BLOCK_SIZE_M=matmul.BLOCK_SIZE_M,
+    BLOCK_SIZE_N=matmul.BLOCK_SIZE_N,
+    BLOCK_SIZE_K=matmul.BLOCK_SIZE_K,
+):
     input_tiled = input.tile((1, *filter.shape[1:]), strides=(-1, -1, 1, 1))
     input_squeezed = input_tiled.squeeze(1)
     input_squeezed.dtype = input_squeezed.dtype.squeeze(0)
@@ -19,7 +28,12 @@ def arrangement(input, filter, output):
     output_flattened = output.permute((0, 2, 3, 1)).flatten(end_dim=3)
-    return matmul.arrangement(input_flattened, filter_permuted, output_flattened)
+    return functools.partial(
+        matmul.arrangement,
+        BLOCK_SIZE_M=BLOCK_SIZE_M,
+        BLOCK_SIZE_N=BLOCK_SIZE_N,
+        BLOCK_SIZE_K=BLOCK_SIZE_K,
+    )(input_flattened, filter_permuted, output_flattened)
 def conv2d(input, filter):

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/test_matmul.py RENAMED Viewed

@@ -5,12 +5,19 @@ import ninetoothed.language as ntl
 from ninetoothed import Symbol, Tensor
 from tests.skippers import skip_if_cuda_not_available, skip_if_float8_e5m2_not_supported
-def arrangement(lhs, rhs, output):
-    BLOCK_SIZE_M = Symbol("BLOCK_SIZE_M", meta=True)
-    BLOCK_SIZE_N = Symbol("BLOCK_SIZE_N", meta=True)
-    BLOCK_SIZE_K = Symbol("BLOCK_SIZE_K", meta=True)
+BLOCK_SIZE_M = Symbol("BLOCK_SIZE_M", meta=True)
+BLOCK_SIZE_N = Symbol("BLOCK_SIZE_N", meta=True)
+BLOCK_SIZE_K = Symbol("BLOCK_SIZE_K", meta=True)
+def arrangement(
+    lhs,
+    rhs,
+    output,
+    BLOCK_SIZE_M=BLOCK_SIZE_M,
+    BLOCK_SIZE_N=BLOCK_SIZE_N,
+    BLOCK_SIZE_K=BLOCK_SIZE_K,
+):
     output_tiled = output.tile((BLOCK_SIZE_M, BLOCK_SIZE_N))
     lhs_tiled = (

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/.gitattributes RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/.github/ISSUE_TEMPLATE/bug-report.yml RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/.github/ISSUE_TEMPLATE/feature-request.yml RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/.github/pull_request_template.md RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/.github/workflows/publish-to-pypi.yml RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/.github/workflows/pytest.yml RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/.github/workflows/ruff.yml RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/.github/workflows/sphinx.yml RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/.gitignore RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/LICENSE RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/README.md RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/Makefile RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/README.zh.md RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/make.bat RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/requirements.txt RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/_static/matmul-tiling.png RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/_static/ninetoothed-logo.png RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/_static/vecadd-tiling.png RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/code_generation.rst RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/conf.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/index.rst RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/installation.rst RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/python_api.rst RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/symbol.rst RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/tensor.rst RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/docs/source/visualization.rst RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/requirements.txt RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/__init__.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/cudaifier.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/dtype.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/jit.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/language.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/make.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/naming.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/symbol.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/src/ninetoothed/torchifier.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/__init__.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/skippers.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/test_add.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/test_addmm.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/test_attention.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/test_max_pool2d.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/test_naming.py RENAMED Viewed

File without changes

{ninetoothed-0.15.0 → ninetoothed-0.15.1}/tests/test_softmax.py RENAMED Viewed

File without changes

ninetoothed 0.15.0__tar.gz → 0.15.1__tar.gz

ninetoothed 0.15.0tar.gz → 0.15.1tar.gz