PyPI - ninetoothed - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

ninetoothed 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

ninetoothed/jit.py +157 -36
ninetoothed/symbol.py +30 -7
ninetoothed/tensor.py +17 -3
ninetoothed-0.2.0.dist-info/METADATA +79 -0
ninetoothed-0.2.0.dist-info/RECORD +10 -0
ninetoothed-0.1.0.dist-info/METADATA +0 -19
ninetoothed-0.1.0.dist-info/RECORD +0 -10
{ninetoothed-0.1.0.dist-info → ninetoothed-0.2.0.dist-info}/WHEEL +0 -0
{ninetoothed-0.1.0.dist-info → ninetoothed-0.2.0.dist-info}/licenses/LICENSE +0 -0

ninetoothed/jit.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import ast
+import collections
 import functools
 import inspect
 import itertools
 import math
 import tempfile
-import textwrap
 from ninetoothed.language import attribute, call
 from ninetoothed.symbol import Symbol
@@ -12,6 +12,67 @@ from ninetoothed.tensor import Tensor
 from ninetoothed.torchifier import Torchifier
+def jit(func):
+    return JIT(func)()
+class JIT:
+    handles = collections.defaultdict(dict)
+    def __init__(self, func):
+        self.func = func
+    def __call__(self):
+        source_file = inspect.getsourcefile(self.func)
+        source_line = inspect.getsourcelines(self.func)[1]
+        if (
+            source_file in type(self).handles
+            and source_line in type(self).handles[source_file]
+        ):
+            return type(self).handles[source_file][source_line]
+        tree = self._get_tree()
+        CodeGenerator(inspect.get_annotations(self.func)).visit(tree)
+        Tritonizer().visit(tree)
+        ast.fix_missing_locations(tree)
+        unparsed = ast.unparse(tree).replace("None:", ":").replace(":None", ":")
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".py") as temp_file:
+            temp_file.write(unparsed.encode("utf-8"))
+            temp_file_name = temp_file.name
+        with open(temp_file_name, "r") as temp_file:
+            code = compile(
+                source=temp_file.read(),
+                filename=temp_file_name,
+                mode="exec",
+            )
+        namespace = {}
+        exec(code, namespace)
+        handle = _Handle(
+            namespace[self.func.__name__],
+            namespace[f"launch_{self.func.__name__}"],
+        )
+        type(self).handles[source_file][source_line] = handle
+        return handle
+    def _get_tree(self):
+        module = ast.parse(inspect.getsource(inspect.getmodule(self.func)))
+        _AliasRestorer().visit(module)
+        finder = _FunctionDefFinder(self.func.__name__)
+        finder.visit(module)
+        return ast.Module(body=[finder.result], type_ignores=[])
 class CodeGenerator(ast.NodeTransformer):
     def __init__(self, context):
         super().__init__()
@@ -38,6 +99,18 @@ class CodeGenerator(ast.NodeTransformer):
         self.generic_visit(node)
+        for arg in self._args:
+            if not isinstance(arg, Tensor):
+                continue
+            node.body.insert(
+                0,
+                ast.Assign(
+                    targets=[Symbol(f"{arg.name}_ptrs").node],
+                    value=arg.pointers().node,
+                ),
+            )
         return node
     def visit_arguments(self, node):
@@ -74,12 +147,12 @@ class CodeGenerator(ast.NodeTransformer):
             value = self._context[node.value.id]
             if isinstance(value, Tensor):
-                if isinstance(node.slice, ast.Tuple):
-                    indices = value.indices() + tuple(node.slice.elts)
-                else:
-                    indices = value.indices() + (node.slice,)
-                offsets = value.offsets(indices)
-                pointers = value.pointers(offsets)
+                pointers = type(self)._create_pointers(
+                    value,
+                    node.slice.elts
+                    if isinstance(node.slice, ast.Tuple)
+                    else (node.slice,),
+                )
                 return call("load", pointers).node
@@ -104,7 +177,9 @@ class CodeGenerator(ast.NodeTransformer):
         self.generic_visit(node)
         if node.id in self._context and isinstance(node.ctx, ast.Load):
-            return call("load", self._context[node.id].pointers().node).node
+            return call(
+                "load", type(self)._create_pointers(self._context[node.id], ()).node
+            ).node
         return node
@@ -118,7 +193,7 @@ class CodeGenerator(ast.NodeTransformer):
                 return ast.Expr(
                     call(
                         "store",
-                        self._context[target.id].pointers().node,
+                        type(self)._create_pointers(self._context[target.id], ()).node,
                         node.value,
                     ).node
                 )
@@ -133,13 +208,12 @@ class CodeGenerator(ast.NodeTransformer):
                 if isinstance(value, Tensor):
                     self.generic_visit(node)
-                    indices = value.indices() + tuple(
+                    pointers = type(self)._create_pointers(
+                        value,
                         target.slice.elts
                         if isinstance(target.slice, ast.Tuple)
-                        else target.slice
+                        else (target.slice,),
                     )
-                    offsets = value.offsets(indices)
-                    pointers = value.pointers(offsets)
                     return ast.Expr(
                         call(
@@ -254,6 +328,14 @@ class CodeGenerator(ast.NodeTransformer):
         return ast.parse(f"lambda meta: ({num_elements},)", mode="eval").body
+    @staticmethod
+    def _create_pointers(tensor, indices):
+        return Symbol(f"{tensor.name}_ptrs") + tensor.offsets(
+            [0 for _ in range(tensor.ndim())]
+            + list(indices)
+            + [0 for _ in range(tensor.inmost().ndim())]
+        )
 class Tritonizer(ast.NodeTransformer):
     def visit_Module(self, node):
@@ -267,8 +349,8 @@ class Tritonizer(ast.NodeTransformer):
     def visit_Name(self, node):
         self.generic_visit(node)
-        if node.id == "ninetoothed":
-            node.id = "triton"
+        if node.id == "ninetoothed" or "ninetoothed." in node.id:
+            node.id = node.id.replace("ninetoothed", "triton")
         return node
@@ -288,32 +370,71 @@ class Tritonizer(ast.NodeTransformer):
         return node
-def jit(func):
-    source = textwrap.dedent(inspect.getsource(func))
-    tree = ast.parse(source)
+class _Handle:
+    def __init__(self, kernel, launch):
+        self._kernel = kernel
+        self._launch = launch
+    def __call__(self, *args, **kwargs):
+        return self._launch(*args, **kwargs)
+class _AliasRestorer(ast.NodeTransformer):
+    def __init__(self):
+        super().__init__()
+        self._aliases = {}
+        self._redefined = set()
+    def visit_Import(self, node):
+        for alias in node.names:
+            if alias.asname:
+                self._aliases[alias.asname] = alias.name
+        return node
+    def visit_ImportFrom(self, node):
+        for alias in node.names:
+            full_name = f"{node.module}.{alias.name}"
+            if alias.asname:
+                self._aliases[alias.asname] = full_name
+        return node
-    CodeGenerator(func.__annotations__).visit(tree)
-    Tritonizer().visit(tree)
-    ast.fix_missing_locations(tree)
+    def visit_Assign(self, node):
+        for target in node.targets:
+            if isinstance(target, ast.Name):
+                self._redefined.add(target.id)
+        return self.generic_visit(node)
+    def visit_FunctionDef(self, node):
+        original_redefined = self._redefined.copy()
+        self.generic_visit(node)
+        self._redefined = original_redefined
-    unparsed = ast.unparse(tree).replace("None:", ":").replace(":None", ":")
+        return node
+    def visit_Name(self, node):
+        if node.id in self._redefined:
+            return node
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".py") as temp_file:
-        temp_file.write(unparsed.encode("utf-8"))
-        temp_file_name = temp_file.name
+        if node.id in self._aliases:
+            return ast.Name(id=self._aliases[node.id], ctx=node.ctx)
-    with open(temp_file_name, "r") as temp_file:
-        code = compile(source=temp_file.read(), filename=temp_file_name, mode="exec")
+        return node
-    namespace = {}
-    exec(code, namespace)
-    class Handle:
-        def __init__(self, kernel, launch):
-            self._kernel = kernel
-            self._launch = launch
+class _FunctionDefFinder(ast.NodeVisitor):
+    def __init__(self, name):
+        self._name = name
-        def __call__(self, *args, **kwargs):
-            return self._launch(*args, **kwargs)
+        self.result = None
-    return Handle(namespace[func.__name__], namespace[f"launch_{func.__name__}"])
+    def visit_FunctionDef(self, node):
+        if node.name == self._name:
+            self.result = node
+        self.generic_visit(node)

ninetoothed/symbol.py CHANGED Viewed

@@ -34,24 +34,47 @@ class Symbol:
             self._node.id = type(self)._create_constexpr(self._node.id)
     def __add__(self, other):
-        return type(self)(
-            ast.BinOp(left=self._node, op=ast.Add(), right=type(self)(other)._node)
-        )
+        other = type(self)(other)
+        if isinstance(self._node, ast.Constant) and self._node.value == 0:
+            return other
+        if isinstance(other._node, ast.Constant) and other._node.value == 0:
+            return self
+        return type(self)(ast.BinOp(left=self._node, op=ast.Add(), right=other._node))
     def __radd__(self, other):
         return self.__add__(other)
     def __mul__(self, other):
-        return type(self)(
-            ast.BinOp(left=self._node, op=ast.Mult(), right=type(self)(other)._node)
-        )
+        other = type(self)(other)
+        if isinstance(self._node, ast.Constant) and self._node.value == 0:
+            return type(self)(0)
+        if isinstance(other._node, ast.Constant) and other._node.value == 0:
+            return type(self)(0)
+        if isinstance(self._node, ast.Constant) and self._node.value == 1:
+            return other
+        if isinstance(other._node, ast.Constant) and other._node.value == 1:
+            return self
+        return type(self)(ast.BinOp(left=self._node, op=ast.Mult(), right=other._node))
     def __rmul__(self, other):
         return self.__mul__(other)
     def __floordiv__(self, other):
+        other = type(self)(other)
+        if isinstance(other._node, ast.Constant) and other._node.value == 1:
+            return self
         return type(self)(
-            ast.BinOp(left=self._node, op=ast.FloorDiv(), right=type(self)(other)._node)
+            ast.BinOp(left=self._node, op=ast.FloorDiv(), right=other._node)
         )
     def __mod__(self, other):

ninetoothed/tensor.py CHANGED Viewed

@@ -46,7 +46,7 @@ class Tensor:
             new_size = call("cdiv", size, tile_size)
             outer_shape.append(new_size)
-            new_stride = call("cdiv", stride * size, (new_size * tile_stride))
+            new_stride = stride * tile_size // tile_stride
             outer_strides.append(new_stride)
             inner_shape.append(tile_size)
@@ -103,11 +103,12 @@ class Tensor:
             indices = self.indices()
         if not isinstance(self.dtype, type(self)):
-            if indices:
+            if len(indices) != self.ndim():
                 raise IndexError("Incorrect number of indices.")
             return sum(
-                self.stride(idx)
+                indices[idx]
+                * self.stride(idx)
                 * call("arange", 0, self.size(idx))[
                     tuple(slice(None) if i == idx else None for i in range(self.ndim()))
                 ]
@@ -131,8 +132,21 @@ class Tensor:
             indices.append(index // stride)
             index %= stride
+        curr = self.dtype
+        while isinstance(curr, type(self)):
+            indices.extend(
+                0 if curr is not self.inmost() else 1 for _ in range(curr.ndim())
+            )
+            curr = curr.dtype
         return tuple(indices)
+    def inmost(self):
+        if not isinstance(self.dtype, type(self)):
+            return self
+        return self.dtype.inmost()
     def ndim(self):
         return len(self.shape)

ninetoothed-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,79 @@
+Metadata-Version: 2.3
+Name: ninetoothed
+Version: 0.2.0
+Summary: A domain-specific language based on Triton but providing higher-level abstraction.
+Project-URL: Homepage, https://github.com/InfiniTensor/ninetoothed
+Project-URL: Issues, https://github.com/InfiniTensor/ninetoothed/issues
+Author-email: Jiacheng Huang <huangjiacheng0709@outlook.com>
+License-File: LICENSE
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+# NineToothed
+A domain-specific language (DSL) based on Triton but providing higher-level abstractions.
+**Other language versions: [English](README.md), [简体中文](docs/README.zh.md).**
+## Installation
+We can use `pip` to install `ninetoothed`.
+```shell
+pip install ninetoothed
+```
+After successfully running the above command, `ninetoothed` will be installed. However, to fully utilize its capabilities, you also need to install `triton` and a deep learning framework supported by `ninetoothed`. For trial purposes, we recommend installing `triton` and `torch`.
+## Usage
+Currently, we can use the `Tensor` and `Symbol` classes in the `ninetoothed` package to perform meta-operations like `tile` and `expand` to easily construct kernel functions. Below, we will use these features to create vector addition and matrix multiplication kernel functions.
+### Vector Addition
+```python
+BLOCK_SIZE = Symbol("BLOCK_SIZE", meta=True)
+@ninetoothed.jit
+def add_kernel(
+    x: Tensor(1).tile((BLOCK_SIZE,)),
+    y: Tensor(1).tile((BLOCK_SIZE,)),
+    z: Tensor(1).tile((BLOCK_SIZE,)),
+):
+    z = x + y
+```
+In this code, we first define `BLOCK_SIZE`, which is a `Symbol`. You can think of `"BLOCK_SIZE"` as its name. We see that `meta` is set to `True`, indicating to the compiler that it is a meta-parameter and its value can be determined by the compiler. The `Tensor(1)` constructs a one-dimensional tensor (vector), and `Tensor(1).tile((BLOCK_SIZE,))` means we want to create a vector and divide it into blocks of size `BLOCK_SIZE`. Suppose the size of this vector is `8192` and `BLOCK_SIZE` is `1024`, then the vector will be divided into `8` blocks, each of size `1024`.
+By using type annotations, we tell the compiler that we will have three tensor parameters, which will be divided into blocks, and `x`, `y`, and `z` are these blocks. It's important to understand that `x`, `y`, and `z` are the blocks, not the tensors themselves. In the function body, `x`, `y`, and `z` are also the blocks. The rest is straightforward (only one line `z = x + y` left, haha), we add each block of `x` and `y` and store it in `z`. Since each block of the parameter tensors undergoes this operation, the addition is completed for the whole tensors as well.
+### Matrix Multiplication
+```python
+BLOCK_SIZE_M = Symbol("BLOCK_SIZE_M", meta=True)
+BLOCK_SIZE_N = Symbol("BLOCK_SIZE_N", meta=True)
+BLOCK_SIZE_K = Symbol("BLOCK_SIZE_K", meta=True)
+a_tiled = Tensor(2).tile((BLOCK_SIZE_M, BLOCK_SIZE_K)).tile((1, -1))
+b_tiled = Tensor(2).tile((BLOCK_SIZE_K, BLOCK_SIZE_N)).tile((-1, 1))
+c_tiled = Tensor(2).tile((BLOCK_SIZE_M, BLOCK_SIZE_N))
+a_tiled = a_tiled.expand((-1, c_tiled.shape[1]))
+b_tiled = b_tiled.expand((c_tiled.shape[0], -1))
+@ninetoothed.jit
+def matmul_kernel(a: a_tiled, b: b_tiled, c: c_tiled):
+    accumulator = ninetoothed.language.zeros(
+        c.shape, dtype=ninetoothed.language.float32
+    )
+    for k in range(a.shape[1]):
+        accumulator = ninetoothed.language.dot(a[0, k], b[k, 0], accumulator)
+    c = accumulator.to(ninetoothed.language.float16)
+```
+For matrix multiplication, we also have three tensor parameters, but the tiling method is more complex than vector addition. We denote the three matrices as $A$, $B$, and $C$, where $A$ and $B$ are inputs, and $C$ is the output. Tiling $C$ is simple; we just need to divide it into blocks of size `(BLOCK_SIZE_M, BLOCK_SIZE_N)` by rows and columns. Once each block computes its result, the entire $C$ is computed. However, how should we tile $A$ and $B$? The answer is to introduce another meta-parameter `BLOCK_SIZE_K`. This way, we can divide $A$ into blocks of size `(BLOCK_SIZE_M, BLOCK_SIZE_K)` and $B$ into blocks of size `(BLOCK_SIZE_K, BLOCK_SIZE_N)`. However, for matrix multiplication, $A$ and $B$ do not correspond block by block; each row of $A$ needs to correspond to each column of $B$. Therefore, we need to further `tile` $A$ and $B$ by rows and columns, respectively. Up to this point, we have a set of row blocks of $A$ and column blocks of $B$. However, each row block of $A$ must correspond to every column block of $B$. This is where `expand` comes in. We `expand` the row blocks of $A$ along the columns to the number of columns of $C$ and the column blocks of $B$ along the rows to the number of rows of $C$. This way, we successfully tile $A$, $B$, and $C$.
+With tiling done, the rest is simple. In the function body, we define an `accumulator` to accumulate intermediate results. We then iterate through the corresponding row blocks of $A$ and column blocks of B, multiplying them and accumulating the results in `accumulator`. Finally, we place the `accumulator` in the corresponding block of $C$. Since each block of the parameter tensors undergoes this operation, the multiplication is completed for the whole tensors as well.

ninetoothed-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+ninetoothed/__init__.py,sha256=T5UJXlC-wbo8JKPbLUNT65Kccp12xP52WFV5FsugETI,147
+ninetoothed/jit.py,sha256=hmUzkFZzsiKLgOHbsN0MAr1G1JCiyQ22cFPtmyZ1OyE,12725
+ninetoothed/language.py,sha256=cSuTgi5OwmLFy-dy_AHGZzRm18wz01ByHQ2vioP1vTg,437
+ninetoothed/symbol.py,sha256=I2Mc9D1w7AYAIQtyAXyDQ-FBqowVZrd-PK-JOt_SpgA,3787
+ninetoothed/tensor.py,sha256=RfwYzdYASkr6usJklESm1n8RoxvYjWnPtCjIfipa2fg,5000
+ninetoothed/torchifier.py,sha256=JmIVQE8r0zr_RLExsRDOGNsMu0F7v6J_o22aWqlw81k,841
+ninetoothed-0.2.0.dist-info/METADATA,sha256=w6qkc2riniG0N4nDUCUkZWF8Eve3j5brBQHIWIEqLXQ,5422
+ninetoothed-0.2.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+ninetoothed-0.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ninetoothed-0.2.0.dist-info/RECORD,,

ninetoothed-0.1.0.dist-info/METADATA DELETED Viewed

@@ -1,19 +0,0 @@
-Metadata-Version: 2.3
-Name: ninetoothed
-Version: 0.1.0
-Summary: A domain-specific language based on Triton but providing higher-level abstraction.
-Project-URL: Homepage, https://github.com/InfiniTensor/ninetoothed
-Project-URL: Issues, https://github.com/InfiniTensor/ninetoothed/issues
-Author-email: Jiacheng Huang <huangjiacheng0709@outlook.com>
-License-File: LICENSE
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3
-Requires-Python: >=3.10
-Description-Content-Type: text/markdown
-# Nine-Toothed
-A domain-specific language based on Triton but providing higher-level abstraction.
-**Read this in other languages: [English](README.md), [简体中文](docs/README.zh.md).**

ninetoothed-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-ninetoothed/__init__.py,sha256=T5UJXlC-wbo8JKPbLUNT65Kccp12xP52WFV5FsugETI,147
-ninetoothed/jit.py,sha256=mnBtsrD84usfYEozAclKBqW3Rrl1OEAolhsKRvrOTKU,9735
-ninetoothed/language.py,sha256=cSuTgi5OwmLFy-dy_AHGZzRm18wz01ByHQ2vioP1vTg,437
-ninetoothed/symbol.py,sha256=8BI4ekeLuUdHTEREvMMlAzwrJ93pqiCdSHGc38clBFA,3034
-ninetoothed/tensor.py,sha256=RMHgADBTdj5Q18Ttre4baq6tG_mqC4VrSn0AV6BL6VQ,4610
-ninetoothed/torchifier.py,sha256=JmIVQE8r0zr_RLExsRDOGNsMu0F7v6J_o22aWqlw81k,841
-ninetoothed-0.1.0.dist-info/METADATA,sha256=uM1Bs_zmjwgGtWJMBKejFRyiC0jO209PHS33btFMTGA,783
-ninetoothed-0.1.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-ninetoothed-0.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ninetoothed-0.1.0.dist-info/RECORD,,

{ninetoothed-0.1.0.dist-info → ninetoothed-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{ninetoothed-0.1.0.dist-info → ninetoothed-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

ninetoothed 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

ninetoothed 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl