PyPI - ninetoothed - Versions diffs - 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl - Mend

ninetoothed 0.16.0py3-none-any.whl → 0.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

ninetoothed/aot.py +76 -16
ninetoothed/generation.py +71 -12
ninetoothed/language.py +4 -0
ninetoothed/tensor.py +17 -0
ninetoothed/visualization.py +19 -12
{ninetoothed-0.16.0.dist-info → ninetoothed-0.18.0.dist-info}/METADATA +1 -1
{ninetoothed-0.16.0.dist-info → ninetoothed-0.18.0.dist-info}/RECORD +9 -9
{ninetoothed-0.16.0.dist-info → ninetoothed-0.18.0.dist-info}/WHEEL +0 -0
{ninetoothed-0.16.0.dist-info → ninetoothed-0.18.0.dist-info}/licenses/LICENSE +0 -0

ninetoothed/aot.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import ast
 import pathlib
+import re
 import subprocess
 import tempfile
 import uuid
+import ninetoothed.naming as naming
 from ninetoothed.dtype import int64
 from ninetoothed.generation import CACHE_DIR, CodeGenerator
 from ninetoothed.tensor import Tensor
@@ -25,13 +27,15 @@ def aot(
 def _aot(func, caller, kernel_name, num_warps, num_stages):
     def _find_tensor_by_source_name(tensors, name):
+        name = naming.remove_prefixes(name)
         for tensor in tensors:
-            if tensor.source.name == name:
+            if naming.remove_prefixes(tensor.source.name) == name:
                 return tensor
     _HEADER_PATH.parent.mkdir(exist_ok=True)
-    if not _HEADER_PATH.exists():
+    if not _HEADER_PATH.exists() or _HEADER_PATH.read_text() != _HEADER_CONTENT:
         _HEADER_PATH.write_text(_HEADER_CONTENT)
     code_generator = CodeGenerator()
@@ -49,11 +53,15 @@ def _aot(func, caller, kernel_name, num_warps, num_stages):
     kernel_func = code_generator.kernel_func
     launch_func = code_generator.launch_func
+    param_strings = ["stream"]
     param_types = []
+    constexpr_param_indices = []
     for arg in kernel_func.args.args:
         param = arg.arg
+        param_strings.append(param)
         if match := Tensor.pointer_pattern().fullmatch(param):
             source_name = match.group(0).removesuffix("_pointer")
             tensor = _find_tensor_by_source_name(tensors, source_name)
@@ -64,9 +72,23 @@ def _aot(func, caller, kernel_name, num_warps, num_stages):
             param_types.append(int64)
         elif Tensor.stride_pattern().fullmatch(param):
             param_types.append(int64)
+        else:
+            source_name = param
+            tensor = _find_tensor_by_source_name(tensors, source_name)
+            dtype = tensor.source.dtype
+            if tensor.constexpr:
+                param_types.append(f"{tensor.value}")
+                constexpr_param_indices.append(len(param_types) - 1)
+            else:
+                param_types.append(dtype)
     signature = ", ".join(param_types)
+    for index in sorted(set(constexpr_param_indices), reverse=True):
+        param_strings.pop(index + 1)
+        param_types.pop(index)
     grid_extractor = _GridExtractor()
     launch_func = grid_extractor.visit(launch_func)
     grid_extractor.visit(code_generator.raw_grid)
@@ -76,41 +98,58 @@ def _aot(func, caller, kernel_name, num_warps, num_stages):
         source_file, kernel_name, signature, grid, num_warps, num_stages
     )
-    unparser = _Unparser()
+    c_source_file_name = f"{kernel_name}.{signature_hash}.c"
+    c_source_file = output_contents[c_source_file_name]
+    c_header_file_name = f"{kernel_name}.{signature_hash}.h"
+    c_header_file = output_contents[c_header_file_name]
+    pattern = rf"\({', '.join(rf'(.*) {param}' for param in param_strings)}\)"
+    c_param_type_strings = re.search(pattern, c_header_file).groups()
+    unparser = _Unparser(c_param_type_strings)
     launch_func_unparsed = unparser.unparse(launch_func)
     launch_func_unparsed = launch_func_unparsed.replace(
         func.__name__, f"{kernel_name}_{signature_hash}"
     )
-    c_source_file_name = f"{kernel_name}.{signature_hash}.c"
-    c_source_file = output_contents[c_source_file_name]
     c_source_file = f"{c_source_file}\n{launch_func_unparsed}\n"
     c_source_file = c_source_file.replace("<stdint.h>", f'"{_HEADER_PATH}"')
     output_contents[c_source_file_name] = c_source_file
-    c_header_file_name = f"{kernel_name}.{signature_hash}.h"
-    c_header_file = output_contents[c_header_file_name]
-    c_header_file = f"{c_header_file}\n{unparser.header};\n"
+    c_header_file = f'{c_header_file}\n#ifdef __cplusplus\nextern "C" {unparser.header};\n#else\n{unparser.header};\n#endif\n'
     c_header_file = c_header_file.replace("<stdint.h>", f'"{_HEADER_PATH}"')
     output_contents[c_header_file_name] = c_header_file
     return output_contents
-_HEADER_CONTENT = """#include <stdint.h>
+_HEADER_CONTENT = """#ifndef NINETOOTHED_H
+#define NINETOOTHED_H
+#include <stdint.h>
 typedef struct {
-    uintptr_t data;
+    void *data;
     uint64_t *shape;
     int64_t *strides;
 } NineToothedTensor;
+typedef void *NineToothedStream;
+typedef int NineToothedResult;
+#endif // NINETOOTHED_H
 """
 _HEADER_PATH = CACHE_DIR / "ninetoothed.h"
 class _Unparser:
+    def __init__(self, param_types):
+        self._param_types = param_types
     def unparse(self, node):
         method_name = "_unparse_" + node.__class__.__name__
@@ -128,29 +167,50 @@ class _Unparser:
     def _unparse_Call(self, node):
         call = ast.Call(
             func=node.func,
-            args=[ast.Name(id="stream", ctx=ast.Load())] + node.args,
+            args=[ast.Name(id="stream", ctx=ast.Load())]
+            + [
+                arg
+                for arg in node.args
+                if not isinstance(arg, ast.Name) or not naming.is_constexpr(arg.id)
+            ],
             keywords=[],
         )
-        return f"return {self._generic_unparse(call)};"
+        unparsed = f"return {self._generic_unparse(call)};"
+        pattern = rf"\((stream), {', '.join(r'([^,]*)' for _ in range(len(self._param_types) - 1))}\)"
+        args = re.search(pattern, unparsed).groups()
+        for i, (arg, type) in enumerate(zip(args, self._param_types)):
+            if i != 0 and "." not in arg:
+                new_arg = f"*({type} *){arg}.data"
+            else:
+                new_arg = f"({type}){arg}"
+            unparsed = unparsed.replace(arg, new_arg)
+        return unparsed
     def _unparse_FunctionDef(self, node):
-        params = ["CUstream stream"]
+        params = ["NineToothedStream stream"]
         params += [f"NineToothedTensor {arg.arg}" for arg in node.args.args]
-        header = f"CUresult {node.name}({', '.join(params)})"
+        header = f"NineToothedResult {node.name}({', '.join(params)})"
         self.header = header
         body_lines = []
         for stmt in node.body:
+            if isinstance(stmt, ast.Assign):
+                continue
             stmt_unparsed = self.unparse(stmt)
             if isinstance(stmt, ast.Expr):
                 stmt_unparsed = stmt_unparsed.strip()
-                if not stmt_unparsed.endswith(";"):
-                    stmt_unparsed += ";"
+            if not stmt_unparsed.endswith(";"):
+                stmt_unparsed += ";"
             body_lines.append("    " + stmt_unparsed)

ninetoothed/generation.py CHANGED Viewed

@@ -19,6 +19,7 @@ import uuid
 import sympy
 import triton
 import triton.language as tl
+from triton.language.extra import libdevice
 import ninetoothed.naming as naming
 from ninetoothed.cudaifier import Cudaifier
@@ -225,6 +226,41 @@ class CodeGenerator(ast.NodeTransformer):
         return node
+    def visit_Call(self, node):
+        def _offsets(tensor, dim=None):
+            if dim is None:
+                return tensor._last_generated_overall_offsets.node
+            offsets = tensor._last_generated_offsets
+            if dim < 0:
+                dim += tensor.source.ndim
+            return sum(
+                offsets[dim][target_dim] for target_dim in range(tensor.target.ndim)
+            ).node
+        func = node.func
+        args = node.args
+        if isinstance(func, ast.Attribute):
+            if func.attr == "offsets":
+                value = func.value
+                if self._in_context(value):
+                    tensor = self._context[value.id]
+                elif isinstance(value, ast.Subscript) and self._in_context(value.value):
+                    tensor = self._context[value.value.id]
+                self.visit(value)
+                # TODO: Add error handling.
+                return _offsets(tensor, ast.literal_eval(args[0]) if args else None)
+        self.generic_visit(node)
+        return node
     def visit_Subscript(self, node):
         if self._in_context(node.value) and isinstance(node.ctx, ast.Load):
             value = self._context[node.value.id]
@@ -242,13 +278,24 @@ class CodeGenerator(ast.NodeTransformer):
         return node
     def visit_Attribute(self, node):
-        if self._in_context(node.value):
-            value = self._context[node.value.id]
+        value = node.value
-            if isinstance(value, Tensor):
-                inner = value.dtype
+        if isinstance(value, ast.Attribute):
+            value = self.visit_Attribute(value)
+        if self._in_context(value):
+            value = self._context[value.id].dtype
+        if isinstance(value, Tensor):
+            attr = getattr(value, node.attr)
+            if isinstance(attr, Tensor):
+                return attr
-                return Symbol(getattr(inner, node.attr)).node
+            if node.attr == "dtype":
+                return Symbol(f"{value.source.pointer_string()}.type.element_ty").node
+            return Symbol(attr).node
         self.generic_visit(node)
@@ -453,16 +500,19 @@ class CodeGenerator(ast.NodeTransformer):
             naming.remove_prefixes(param) for param in next_power_of_2_params
         ]
+        arg_names = [naming.remove_prefixes(arg.source.name) for arg in self._args]
+        arg_names += [
+            param
+            for param in non_next_power_of_2_constexpr_params_without_prefixes
+            if not Tensor.size_pattern().fullmatch(param) and param not in arg_names
+        ]
         launch = ast.FunctionDef(
             name=self.launch_func_name,
             args=ast.arguments(
                 posonlyargs=[],
-                args=[ast.arg(arg=arg.source.name) for arg in self._args]
-                + [
-                    ast.arg(arg=param)
-                    for param in non_next_power_of_2_constexpr_params_without_prefixes
-                    if not Tensor.size_pattern().fullmatch(param)
-                ],
+                args=[ast.arg(arg=name) for name in arg_names],
                 kwonlyargs=[],
                 defaults=[],
             ),
@@ -560,6 +610,8 @@ class CodeGenerator(ast.NodeTransformer):
         indices = self._complete_indices(tensor, indices)
         offsets = type(self)._generate_offsets(tensor, indices)
+        tensor._last_generated_offsets = offsets
         for source_dim in range(tensor.source.ndim):
             for target_dim in range(tensor.target.ndim):
                 if target_dim not in invariant_target_dims:
@@ -584,7 +636,7 @@ class CodeGenerator(ast.NodeTransformer):
                     * tensor.source.strides[source_dim]
                 )
-        pointers = name_for_pointers + sum(
+        overall_offsets = sum(
             offsets[source_dim][target_dim][
                 type(self)._generate_slices(tensor, target_dim)
             ]
@@ -594,6 +646,10 @@ class CodeGenerator(ast.NodeTransformer):
             if target_dim not in invariant_target_dims
             and offsets[source_dim][target_dim] != 0
         )
+        tensor._last_generated_overall_offsets = overall_offsets
+        pointers = name_for_pointers + overall_offsets
         mask = functools.reduce(
             lambda x, y: x & y,
             (
@@ -980,6 +1036,9 @@ class _Inliner(ast.NodeTransformer):
         if func_def is None:
             return None, []
+        if inspect.getmodule(func) is libdevice:
+            return None, []
         collector = _ImportCollector()
         collector.visit(ast.parse(inspect.getsource(inspect.getmodule(func))))
         self.imports.extend(collector.imports)

ninetoothed/language.py CHANGED Viewed

@@ -1,7 +1,11 @@
 import ast
+from triton.language.extra import libdevice
 from ninetoothed.symbol import Symbol
+__all__ = ["libdevice"]
 LANGUAGE = "ninetoothed.language"

ninetoothed/tensor.py CHANGED Viewed

@@ -32,6 +32,8 @@ class Tensor:
         strides=None,
         other=None,
         shape_options=None,
+        constexpr=None,
+        value=None,
         name=None,
         source=None,
         source_dims=None,
@@ -74,6 +76,21 @@ class Tensor:
         self.other = other
+        if constexpr and self.ndim != 0:
+            raise ValueError(
+                "`constexpr` can only be set for zero-dimensional tensors."
+            )
+        self.constexpr = constexpr
+        if self.constexpr:
+            self.name = naming.make_constexpr(self.name)
+        if not constexpr and value is not None:
+            raise ValueError("`value` can only be set for constexpr tensors.")
+        self.value = value
         if source is not None:
             self.source = source
         else:

ninetoothed/visualization.py CHANGED Viewed

@@ -10,8 +10,6 @@ def visualize(tensor, color=None, save_path=None):
     :param color: The color to be used for visualization.
     :param save_path: The path where the visualization should be saved.
     """
-    outline_width = 0.1
-    plt.rcParams["lines.linewidth"] = 72 * outline_width
     if color is None:
         color = f"C{visualize.count}"
@@ -21,6 +19,24 @@ def visualize(tensor, color=None, save_path=None):
     width = max_pos_y + 1
     height = max_pos_x + 1
+    _, ax = _prepare_figure_and_axes(width, height)
+    _visualize_tensor(ax, tensor, 0, 0, color)
+    plt.savefig(save_path, transparent=True, bbox_inches="tight", pad_inches=0)
+    plt.close()
+    visualize.count += 1
+visualize.count = 0
+def _prepare_figure_and_axes(width, height):
+    outline_width = 0.1
+    plt.rcParams["lines.linewidth"] = 72 * outline_width
     fig = plt.figure(figsize=(width + outline_width, height + outline_width))
     h = (Size.Fixed(0), Size.Fixed(width + outline_width))
@@ -41,16 +57,7 @@ def visualize(tensor, color=None, save_path=None):
     plt.xlim((-half_outline_width, width + half_outline_width))
     plt.ylim((-half_outline_width, height + half_outline_width))
-    _visualize_tensor(ax, tensor, 0, 0, color)
-    plt.savefig(save_path, transparent=True, bbox_inches="tight", pad_inches=0)
-    plt.close()
-    visualize.count += 1
-visualize.count = 0
+    return fig, ax
 def _visualize_tensor(ax, tensor, x, y, color, level_spacing=4):

{ninetoothed-0.16.0.dist-info → ninetoothed-0.18.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ninetoothed
-Version: 0.16.0
+Version: 0.18.0
 Summary: A domain-specific language based on Triton but providing higher-level abstraction.
 Project-URL: Homepage, https://github.com/InfiniTensor/ninetoothed
 Project-URL: Issues, https://github.com/InfiniTensor/ninetoothed/issues

{ninetoothed-0.16.0.dist-info → ninetoothed-0.18.0.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 ninetoothed/__init__.py,sha256=F2bxRNhzcGdtADA8RehTuf-QK0xnxno8kxvr6H2L5Tg,552
-ninetoothed/aot.py,sha256=8ZCLtnsign14YvY7SXX5ASidhuUAhPwppTXUJNkQup4,6243
+ninetoothed/aot.py,sha256=VLPFRNZgq82DumuVMi36_qptM5nkORzmhbP4uPa559Q,8173
 ninetoothed/cudaifier.py,sha256=5ylMr1q0B9NwbeXkpCu3o2nMGpDfh65nAQ0Az_qMQuI,877
 ninetoothed/dtype.py,sha256=-0iBleay5gYA4wtT3l17QjCesr7g26M6CSfhNJdI3k4,165
-ninetoothed/generation.py,sha256=VIqSyZT4yHxY_a2QPmWW6jjALv3e1mohDqdRQBRYsAo,36462
+ninetoothed/generation.py,sha256=zbqRWvpa-1q44WuZV9S13DDAxvi4dai2AJ47ihjODsM,38150
 ninetoothed/jit.py,sha256=CpeSkO_zUe9DwtTJ2K2H7Bwpx-FvIHfrgzOcEosfpek,2946
-ninetoothed/language.py,sha256=YwjlBENmmKPTnhaQ2uYbj5MwzrCAT7MLJ6VkQ6NeXJE,504
+ninetoothed/language.py,sha256=ERiA4dpwiow2AT2xFeFWYg1KqlnBo6xxPGp8VZrP0Lk,574
 ninetoothed/make.py,sha256=fQKuRJL7HC2iGTAN323mlIWXz9Z3jotIoN68ur29Qlw,1834
 ninetoothed/naming.py,sha256=Fl0x4eDRStTpkXjJg6179ErEnY7bR5Qi0AT6RX9C3fU,951
 ninetoothed/symbol.py,sha256=lJo3NL2-T7tKbKjb6MCRLMemN94mqS3bIiG943P0Mbo,7454
-ninetoothed/tensor.py,sha256=gQEzHTcXqZVBFLc2YRfXTKxjxPWMxWN7fNl2BCfJwMs,14782
+ninetoothed/tensor.py,sha256=lK8s5-l5cqhM9FCWXMjTle9vA1Nass_92tvuHY8H3OM,15265
 ninetoothed/torchifier.py,sha256=aDijK5UOwK2oLXDHgDo8M959rJclEI0lcfaPr7GQTXY,1012
 ninetoothed/utils.py,sha256=mtRXABBVPnlgd2n1REh9oB3s_5bUsKhd3iwu3oJ5DSQ,338
-ninetoothed/visualization.py,sha256=zlMH-0WplaboePGzcbpcj4UovpX0k2r4SysSPsNS4r4,3674
-ninetoothed-0.16.0.dist-info/METADATA,sha256=nkq3iImebtmcEs-bZq2zfF2_QxrZD9IWky1S86OnUMA,7340
-ninetoothed-0.16.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-ninetoothed-0.16.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-ninetoothed-0.16.0.dist-info/RECORD,,
+ninetoothed/visualization.py,sha256=oc3cA5qqT66_RoAs5D681SCxR5E5wgFwk95ZefdSfZU,3794
+ninetoothed-0.18.0.dist-info/METADATA,sha256=X4TvwcjVuB40X4jmCsRMee8Auj2mGyORYsbk81fd-G0,7340
+ninetoothed-0.18.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ninetoothed-0.18.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+ninetoothed-0.18.0.dist-info/RECORD,,

{ninetoothed-0.16.0.dist-info → ninetoothed-0.18.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{ninetoothed-0.16.0.dist-info → ninetoothed-0.18.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

ninetoothed 0.16.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

ninetoothed 0.16.0py3-none-any.whl → 0.18.0py3-none-any.whl