ninetoothed 0.11.0__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ninetoothed-0.12.0/.gitattributes +1 -0
- ninetoothed-0.12.0/.github/workflows/sphinx.yml +37 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/PKG-INFO +4 -1
- ninetoothed-0.12.0/docs/Makefile +20 -0
- ninetoothed-0.12.0/docs/make.bat +35 -0
- ninetoothed-0.12.0/docs/requirements.txt +2 -0
- ninetoothed-0.12.0/docs/source/_static/matmul-tiling.png +3 -0
- ninetoothed-0.12.0/docs/source/_static/ninetoothed-logo.png +3 -0
- ninetoothed-0.12.0/docs/source/_static/vecadd-tiling.png +3 -0
- ninetoothed-0.12.0/docs/source/code_generation.rst +9 -0
- ninetoothed-0.12.0/docs/source/conf.py +28 -0
- ninetoothed-0.12.0/docs/source/index.rst +14 -0
- ninetoothed-0.12.0/docs/source/installation.rst +12 -0
- ninetoothed-0.12.0/docs/source/python_api.rst +9 -0
- ninetoothed-0.12.0/docs/source/symbol.rst +4 -0
- ninetoothed-0.12.0/docs/source/tensor.rst +18 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/pyproject.toml +4 -1
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/src/ninetoothed/jit.py +31 -3
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/src/ninetoothed/symbol.py +7 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/src/ninetoothed/tensor.py +67 -2
- ninetoothed-0.12.0/src/ninetoothed/visualization.py +122 -0
- ninetoothed-0.12.0/tests/test_attention.py +92 -0
- ninetoothed-0.12.0/tests/test_conv2d.py +62 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/tests/test_matmul.py +18 -12
- ninetoothed-0.11.0/docs/source/_static/matmul-tiling.png +0 -0
- ninetoothed-0.11.0/docs/source/_static/ninetoothed-logo.png +0 -0
- ninetoothed-0.11.0/docs/source/_static/vecadd-tiling.png +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/.github/workflows/publish-to-pypi.yml +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/.github/workflows/pytest.yml +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/.github/workflows/ruff.yml +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/.gitignore +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/LICENSE +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/README.md +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/docs/README.zh.md +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/requirements.txt +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/src/ninetoothed/__init__.py +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/src/ninetoothed/language.py +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/src/ninetoothed/naming.py +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/src/ninetoothed/torchifier.py +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/tests/__init__.py +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/tests/skippers.py +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/tests/test_add.py +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/tests/test_addmm.py +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/tests/test_naming.py +0 -0
- {ninetoothed-0.11.0 → ninetoothed-0.12.0}/tests/test_softmax.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
*.png filter=lfs diff=lfs merge=lfs -text
|
@@ -0,0 +1,37 @@
|
|
1
|
+
name: "Sphinx: Render docs"
|
2
|
+
|
3
|
+
on: push
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
permissions:
|
9
|
+
contents: write
|
10
|
+
steps:
|
11
|
+
- uses: actions/checkout@v4
|
12
|
+
with:
|
13
|
+
persist-credentials: false
|
14
|
+
lfs: true
|
15
|
+
- name: Set up Python
|
16
|
+
uses: actions/setup-python@v5
|
17
|
+
with:
|
18
|
+
python-version: "3.10"
|
19
|
+
- name: Install dependencies
|
20
|
+
run: |
|
21
|
+
python -m pip install --upgrade pip
|
22
|
+
pip install .
|
23
|
+
pip install -r docs/requirements.txt
|
24
|
+
- name: Build HTML
|
25
|
+
run: make -C docs html
|
26
|
+
- name: Upload artifacts
|
27
|
+
uses: actions/upload-artifact@v4
|
28
|
+
with:
|
29
|
+
name: html-docs
|
30
|
+
path: docs/build/html/
|
31
|
+
- name: Deploy
|
32
|
+
uses: peaceiris/actions-gh-pages@v3
|
33
|
+
if: github.ref == 'refs/heads/master'
|
34
|
+
with:
|
35
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
36
|
+
publish_dir: docs/build/html
|
37
|
+
cname: ninetoothed.org
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: ninetoothed
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.12.0
|
4
4
|
Summary: A domain-specific language based on Triton but providing higher-level abstraction.
|
5
5
|
Project-URL: Homepage, https://github.com/InfiniTensor/ninetoothed
|
6
6
|
Project-URL: Issues, https://github.com/InfiniTensor/ninetoothed/issues
|
@@ -11,6 +11,9 @@ Classifier: Operating System :: OS Independent
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
12
|
Requires-Python: >=3.10
|
13
13
|
Requires-Dist: triton>=3.0.0
|
14
|
+
Provides-Extra: visualization
|
15
|
+
Requires-Dist: matplotlib>=3.9.0; extra == 'visualization'
|
16
|
+
Requires-Dist: numpy>=2.1.0; extra == 'visualization'
|
14
17
|
Description-Content-Type: text/markdown
|
15
18
|
|
16
19
|
# NineToothed
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Minimal makefile for Sphinx documentation
|
2
|
+
#
|
3
|
+
|
4
|
+
# You can set these variables from the command line, and also
|
5
|
+
# from the environment for the first two.
|
6
|
+
SPHINXOPTS ?=
|
7
|
+
SPHINXBUILD ?= sphinx-build
|
8
|
+
SOURCEDIR = source
|
9
|
+
BUILDDIR = build
|
10
|
+
|
11
|
+
# Put it first so that "make" without argument is like "make help".
|
12
|
+
help:
|
13
|
+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
14
|
+
|
15
|
+
.PHONY: help Makefile
|
16
|
+
|
17
|
+
# Catch-all target: route all unknown targets to Sphinx using the new
|
18
|
+
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
19
|
+
%: Makefile
|
20
|
+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
@@ -0,0 +1,35 @@
|
|
1
|
+
@ECHO OFF
|
2
|
+
|
3
|
+
pushd %~dp0
|
4
|
+
|
5
|
+
REM Command file for Sphinx documentation
|
6
|
+
|
7
|
+
if "%SPHINXBUILD%" == "" (
|
8
|
+
set SPHINXBUILD=sphinx-build
|
9
|
+
)
|
10
|
+
set SOURCEDIR=source
|
11
|
+
set BUILDDIR=build
|
12
|
+
|
13
|
+
%SPHINXBUILD% >NUL 2>NUL
|
14
|
+
if errorlevel 9009 (
|
15
|
+
echo.
|
16
|
+
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
17
|
+
echo.installed, then set the SPHINXBUILD environment variable to point
|
18
|
+
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
19
|
+
echo.may add the Sphinx directory to PATH.
|
20
|
+
echo.
|
21
|
+
echo.If you don't have Sphinx installed, grab it from
|
22
|
+
echo.https://www.sphinx-doc.org/
|
23
|
+
exit /b 1
|
24
|
+
)
|
25
|
+
|
26
|
+
if "%1" == "" goto help
|
27
|
+
|
28
|
+
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
29
|
+
goto end
|
30
|
+
|
31
|
+
:help
|
32
|
+
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
33
|
+
|
34
|
+
:end
|
35
|
+
popd
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Configuration file for the Sphinx documentation builder.
|
2
|
+
#
|
3
|
+
# For the full list of built-in configuration values, see the documentation:
|
4
|
+
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
5
|
+
|
6
|
+
# -- Project information -----------------------------------------------------
|
7
|
+
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
8
|
+
|
9
|
+
project = "NineToothed"
|
10
|
+
copyright = "2024, NineToothed Contributors"
|
11
|
+
author = "NineToothed Contributors"
|
12
|
+
|
13
|
+
# -- General configuration ---------------------------------------------------
|
14
|
+
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
15
|
+
|
16
|
+
extensions = ["sphinx.ext.autodoc", "sphinx.ext.autosummary"]
|
17
|
+
|
18
|
+
templates_path = ["_templates"]
|
19
|
+
exclude_patterns = []
|
20
|
+
|
21
|
+
|
22
|
+
# -- Options for HTML output -------------------------------------------------
|
23
|
+
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
24
|
+
|
25
|
+
html_theme = "pydata_sphinx_theme"
|
26
|
+
html_static_path = ["_static"]
|
27
|
+
html_title = "NineToothed"
|
28
|
+
html_logo = "_static/ninetoothed-logo.png"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
NineToothed Documentation
|
2
|
+
=========================
|
3
|
+
|
4
|
+
**NineToothed** is a domain-specific language (DSL) based on Triton, offering higher-level abstractions. Through its tensor-oriented metaprogramming (TOM) model, it empowers developers to write high-performance compute kernels intuitively, without the need to manage low-level details like pointer arithmetic or memory access.
|
5
|
+
|
6
|
+
.. note::
|
7
|
+
|
8
|
+
This project is under active development.
|
9
|
+
|
10
|
+
.. toctree::
|
11
|
+
:maxdepth: 2
|
12
|
+
|
13
|
+
installation
|
14
|
+
python_api
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Installation
|
2
|
+
============
|
3
|
+
|
4
|
+
You can install NineToothed using ``pip``:
|
5
|
+
|
6
|
+
.. code-block::
|
7
|
+
|
8
|
+
pip install ninetoothed
|
9
|
+
|
10
|
+
To fully leverage its capabilities, you will also need to install a compatible deep learning framework. Currently, NineToothed supports `PyTorch <https://pytorch.org/>`_.
|
11
|
+
|
12
|
+
It is generally considered good practice to use a virtual environment when installing packages with pip, though it is optional. You may find this `documentation <https://docs.python.org/3/library/venv.html>`_ helpful.
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Tensor
|
2
|
+
======
|
3
|
+
|
4
|
+
.. autoclass:: ninetoothed.Tensor
|
5
|
+
|
6
|
+
Meta-Operations
|
7
|
+
---------------
|
8
|
+
|
9
|
+
.. autosummary::
|
10
|
+
:toctree: generated
|
11
|
+
:nosignatures:
|
12
|
+
|
13
|
+
ninetoothed.Tensor.tile
|
14
|
+
ninetoothed.Tensor.expand
|
15
|
+
ninetoothed.Tensor.squeeze
|
16
|
+
ninetoothed.Tensor.permute
|
17
|
+
ninetoothed.Tensor.flatten
|
18
|
+
ninetoothed.Tensor.ravel
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "ninetoothed"
|
7
|
-
version = "0.
|
7
|
+
version = "0.12.0"
|
8
8
|
authors = [{ name = "Jiacheng Huang", email = "huangjiacheng0709@outlook.com" }]
|
9
9
|
description = "A domain-specific language based on Triton but providing higher-level abstraction."
|
10
10
|
readme = "README.md"
|
@@ -20,6 +20,9 @@ classifiers = [
|
|
20
20
|
Homepage = "https://github.com/InfiniTensor/ninetoothed"
|
21
21
|
Issues = "https://github.com/InfiniTensor/ninetoothed/issues"
|
22
22
|
|
23
|
+
[project.optional-dependencies]
|
24
|
+
visualization = ["matplotlib>=3.9.0", "numpy>=2.1.0"]
|
25
|
+
|
23
26
|
[tool.ruff]
|
24
27
|
src = [".", "src", "tests"]
|
25
28
|
|
@@ -20,6 +20,13 @@ from ninetoothed.torchifier import Torchifier
|
|
20
20
|
|
21
21
|
|
22
22
|
def make(arrangement, application, tensors):
|
23
|
+
"""Integrate the arrangement and the application of the tensors.
|
24
|
+
|
25
|
+
:param arrangement: The arrangement of the tensors.
|
26
|
+
:param application: The application of the tensors.
|
27
|
+
:param tensors: The tensors.
|
28
|
+
:return: A handle to the compute kernel.
|
29
|
+
"""
|
23
30
|
params = inspect.signature(application).parameters
|
24
31
|
types = arrangement(*tensors)
|
25
32
|
annotations = {param: type for param, type in zip(params, types)}
|
@@ -28,14 +35,26 @@ def make(arrangement, application, tensors):
|
|
28
35
|
return jit(application)
|
29
36
|
|
30
37
|
|
31
|
-
def jit(
|
38
|
+
def jit(func=None, *, _prettify=False):
|
39
|
+
"""A decorator for generating compute kernels.
|
40
|
+
|
41
|
+
:param func: The function to be compiled.
|
42
|
+
:param _prettify: Whether to prettify the generated code.
|
43
|
+
:return: A handle to the compute kernel.
|
44
|
+
|
45
|
+
.. note::
|
46
|
+
|
47
|
+
The ``_prettify`` parameter is experimental, which might break
|
48
|
+
the generated code.
|
49
|
+
"""
|
50
|
+
|
32
51
|
def wrapper(func):
|
33
52
|
return JIT(func, _prettify=_prettify)()
|
34
53
|
|
35
|
-
if
|
54
|
+
if func is None:
|
36
55
|
return wrapper
|
37
56
|
|
38
|
-
return wrapper(
|
57
|
+
return wrapper(func)
|
39
58
|
|
40
59
|
|
41
60
|
class JIT:
|
@@ -472,6 +491,15 @@ class CodeGenerator(ast.NodeTransformer):
|
|
472
491
|
for target_dim in range(tensor.target.ndim)
|
473
492
|
if offsets[source_dim][target_dim] != 0
|
474
493
|
),
|
494
|
+
) & functools.reduce(
|
495
|
+
lambda x, y: x & y,
|
496
|
+
(
|
497
|
+
indices[dim - tensor.innermost().target.ndim][
|
498
|
+
type(self)._generate_slices(tensor, target_dim)
|
499
|
+
]
|
500
|
+
< tensor.innermost().target.shape[dim]
|
501
|
+
for dim, target_dim in enumerate(tensor.innermost().target_dims)
|
502
|
+
),
|
475
503
|
)
|
476
504
|
|
477
505
|
return pointers, mask
|
@@ -7,6 +7,13 @@ import ninetoothed.naming as naming
|
|
7
7
|
|
8
8
|
|
9
9
|
class Symbol:
|
10
|
+
"""A class uesed to represent a symbol.
|
11
|
+
|
12
|
+
:param expr: The expression used to construct the symbol.
|
13
|
+
:param constexpr: Whether the symbol is a constexpr.
|
14
|
+
:param mata: Whether the symbol is a meta.
|
15
|
+
"""
|
16
|
+
|
10
17
|
def __init__(self, expr, constexpr=None, meta=None):
|
11
18
|
if isinstance(expr, type(self)):
|
12
19
|
self._node = expr._node
|
@@ -3,11 +3,25 @@ import math
|
|
3
3
|
import re
|
4
4
|
|
5
5
|
import ninetoothed.naming as naming
|
6
|
-
from ninetoothed.language import call
|
7
6
|
from ninetoothed.symbol import Symbol
|
8
7
|
|
9
8
|
|
10
9
|
class Tensor:
|
10
|
+
"""A class uesed to represent a symbolic tensor.
|
11
|
+
|
12
|
+
:param ndim: The number of dimensions of the tensor.
|
13
|
+
:param shape: The shape of the tensor.
|
14
|
+
:param dtype: The element type of the tensor.
|
15
|
+
:param strides: The strides of the tensor.
|
16
|
+
:param other: The values for out-of-bounds positions.
|
17
|
+
:param constexpr_shape: Whether the sizes are constexpr.
|
18
|
+
:param name: The name of the tensor.
|
19
|
+
:param source: For internal use only.
|
20
|
+
:param source_dims: For internal use only.
|
21
|
+
:param target: For internal use only.
|
22
|
+
:param target_dims: For internal use only.
|
23
|
+
"""
|
24
|
+
|
11
25
|
num_instances = 0
|
12
26
|
|
13
27
|
def __init__(
|
@@ -70,6 +84,14 @@ class Tensor:
|
|
70
84
|
type(self).num_instances += 1
|
71
85
|
|
72
86
|
def tile(self, tile_shape, strides=None, dilation=None):
|
87
|
+
"""Tiles the tensor into a hierarchical tensor.
|
88
|
+
|
89
|
+
:param tile_shape: The shape of a tile.
|
90
|
+
:param strides: The interval at which each tile is generated.
|
91
|
+
:param dilation: The spacing between tiles.
|
92
|
+
:return: A hierarchical tensor.
|
93
|
+
"""
|
94
|
+
|
73
95
|
if strides is None:
|
74
96
|
strides = [-1 for _ in tile_shape]
|
75
97
|
|
@@ -90,8 +112,11 @@ class Tensor:
|
|
90
112
|
if stride == -1:
|
91
113
|
stride = tile_size
|
92
114
|
|
115
|
+
def cdiv(x, y):
|
116
|
+
return (x + y - 1) // y
|
117
|
+
|
93
118
|
new_size = (
|
94
|
-
|
119
|
+
(cdiv(self_size - spacing * (tile_size - 1) - 1, stride) + 1)
|
95
120
|
if stride != 0
|
96
121
|
else -1
|
97
122
|
)
|
@@ -119,6 +144,12 @@ class Tensor:
|
|
119
144
|
)
|
120
145
|
|
121
146
|
def expand(self, shape):
|
147
|
+
"""Expands the specified singleton dimensions of the tensor.
|
148
|
+
|
149
|
+
:param shape: The expanded shape.
|
150
|
+
:return: The expanded tensor.
|
151
|
+
"""
|
152
|
+
|
122
153
|
# TODO: Add error handling.
|
123
154
|
return type(self)(
|
124
155
|
shape=[
|
@@ -136,6 +167,12 @@ class Tensor:
|
|
136
167
|
)
|
137
168
|
|
138
169
|
def squeeze(self, dim):
|
170
|
+
"""Removes the specified singleton dimensions of the tensor.
|
171
|
+
|
172
|
+
:param dim: The dimension(s) to be squeezed.
|
173
|
+
:return: The squeezed tensor.
|
174
|
+
"""
|
175
|
+
|
139
176
|
if not isinstance(dim, tuple):
|
140
177
|
dim = (dim,)
|
141
178
|
|
@@ -158,6 +195,12 @@ class Tensor:
|
|
158
195
|
)
|
159
196
|
|
160
197
|
def permute(self, dims):
|
198
|
+
"""Permutes the dimensions of the tensor.
|
199
|
+
|
200
|
+
:param dims: The permuted ordering of the dimensions.
|
201
|
+
:return: The permuted tensor.
|
202
|
+
"""
|
203
|
+
|
161
204
|
# TODO: Add error handling.
|
162
205
|
new_shape = [None for _ in range(self.ndim)]
|
163
206
|
new_strides = [None for _ in range(self.ndim)]
|
@@ -178,6 +221,16 @@ class Tensor:
|
|
178
221
|
)
|
179
222
|
|
180
223
|
def flatten(self, start_dim=None, end_dim=None):
|
224
|
+
"""Flattens the specified dimensions of the tensor.
|
225
|
+
|
226
|
+
See :func:`ravel` for the differences between :func:`flatten`
|
227
|
+
and :func:`ravel`.
|
228
|
+
|
229
|
+
:param start_dim: The first dimension to flatten.
|
230
|
+
:param end_dim: The dimension after the last to flatten.
|
231
|
+
:return: The flattened tensor.
|
232
|
+
"""
|
233
|
+
|
181
234
|
# TODO: Add error handling.
|
182
235
|
if start_dim is None:
|
183
236
|
start_dim = 0
|
@@ -222,6 +275,18 @@ class Tensor:
|
|
222
275
|
)
|
223
276
|
|
224
277
|
def ravel(self):
|
278
|
+
"""Flattens the hierarchy of the tensor.
|
279
|
+
|
280
|
+
:func:`ravel` differs from :func:`flatten`, which only flattens
|
281
|
+
dimensions at a single level. For example, consider a tensor
|
282
|
+
with two levels: the first level has a shape of ``(N, P, Q)``,
|
283
|
+
and the second level has a shape of ``(C, R, S)``. After
|
284
|
+
applying :func:`ravel`, the resulting tensor will have a single
|
285
|
+
flattened level with a shape of ``(N, P, Q, C, R, S)``.
|
286
|
+
|
287
|
+
:return: The raveled tensor.
|
288
|
+
"""
|
289
|
+
|
225
290
|
# TODO: Add error handling.
|
226
291
|
new_shape = []
|
227
292
|
new_strides = []
|
@@ -0,0 +1,122 @@
|
|
1
|
+
import matplotlib.pyplot as plt
|
2
|
+
import numpy as np
|
3
|
+
from mpl_toolkits.axes_grid1 import Divider, Size
|
4
|
+
|
5
|
+
|
6
|
+
def visualize(tensor, color=None, save_path=None):
|
7
|
+
outline_width = 0.1
|
8
|
+
plt.rcParams["lines.linewidth"] = 72 * outline_width
|
9
|
+
|
10
|
+
if color is None:
|
11
|
+
color = f"C{visualize.count}"
|
12
|
+
|
13
|
+
_, max_pos_x, max_pos_y = _visualize_tensor(plt.gca(), tensor, 0, 0, color)
|
14
|
+
|
15
|
+
width = max_pos_y + 1
|
16
|
+
height = max_pos_x + 1
|
17
|
+
|
18
|
+
fig = plt.figure(figsize=(width + outline_width, height + outline_width))
|
19
|
+
|
20
|
+
h = (Size.Fixed(0), Size.Fixed(width + outline_width))
|
21
|
+
v = (Size.Fixed(0), Size.Fixed(height + outline_width))
|
22
|
+
|
23
|
+
divider = Divider(fig, (0, 0, 1, 1), h, v, aspect=False)
|
24
|
+
|
25
|
+
ax = fig.add_axes(
|
26
|
+
divider.get_position(), axes_locator=divider.new_locator(nx=1, ny=1)
|
27
|
+
)
|
28
|
+
|
29
|
+
ax.set_aspect("equal")
|
30
|
+
ax.invert_yaxis()
|
31
|
+
|
32
|
+
plt.axis("off")
|
33
|
+
|
34
|
+
half_outline_width = outline_width / 2
|
35
|
+
plt.xlim((-half_outline_width, width + half_outline_width))
|
36
|
+
plt.ylim((-half_outline_width, height + half_outline_width))
|
37
|
+
|
38
|
+
_visualize_tensor(ax, tensor, 0, 0, color)
|
39
|
+
|
40
|
+
plt.savefig(save_path, transparent=True, bbox_inches="tight", pad_inches=0)
|
41
|
+
|
42
|
+
plt.close()
|
43
|
+
|
44
|
+
visualize.count += 1
|
45
|
+
|
46
|
+
|
47
|
+
visualize.count = 0
|
48
|
+
|
49
|
+
|
50
|
+
def _visualize_tensor(ax, tensor, x, y, color, level_spacing=4):
|
51
|
+
verts = _visualize_level(ax, tensor, x, y, color)
|
52
|
+
|
53
|
+
if tensor.dtype is None:
|
54
|
+
return verts, verts[1][1][0], verts[1][1][1]
|
55
|
+
|
56
|
+
next_x, next_y = verts[0][1]
|
57
|
+
next_y += level_spacing + 1
|
58
|
+
|
59
|
+
next_verts, max_pos_x, max_pos_y = _visualize_tensor(
|
60
|
+
ax, tensor.dtype, next_x, next_y, color
|
61
|
+
)
|
62
|
+
|
63
|
+
conn_verts = _verts_of_rect(1, level_spacing, next_x, next_y - level_spacing)
|
64
|
+
conn_verts = [list(vert) for vert in conn_verts]
|
65
|
+
conn_verts[2][0] += next_verts[1][0][0]
|
66
|
+
|
67
|
+
pos_y, pos_x = zip(*conn_verts)
|
68
|
+
pos_x = pos_x + (pos_x[0],)
|
69
|
+
pos_y = pos_y + (pos_y[0],)
|
70
|
+
|
71
|
+
ax.plot(pos_x[1:3], pos_y[1:3], "k--")
|
72
|
+
ax.plot(pos_x[3:5], pos_y[3:5], "k--")
|
73
|
+
|
74
|
+
max_pos_x = max(max_pos_x, verts[1][1][0])
|
75
|
+
max_pos_y = max(max_pos_y, verts[1][1][1])
|
76
|
+
|
77
|
+
return verts, max_pos_x, max_pos_y
|
78
|
+
|
79
|
+
|
80
|
+
def _visualize_level(ax, level, x, y, color):
|
81
|
+
offsets = [1 for _ in range(level.ndim)]
|
82
|
+
|
83
|
+
for dim in range(-3, -level.ndim - 1, -1):
|
84
|
+
offsets[dim] = offsets[dim + 2] * level.shape[dim + 2] + 1
|
85
|
+
|
86
|
+
indices = np.indices(level.shape)
|
87
|
+
flattened_indices = np.stack(
|
88
|
+
[indices[i].flatten() for i in range(level.ndim)], axis=-1
|
89
|
+
)
|
90
|
+
|
91
|
+
max_pos_x = x
|
92
|
+
max_pos_y = y
|
93
|
+
|
94
|
+
for indices in flattened_indices:
|
95
|
+
pos = [x, y]
|
96
|
+
|
97
|
+
for dim, index in enumerate(indices):
|
98
|
+
pos[(level.ndim - dim) % 2] += index * offsets[dim]
|
99
|
+
|
100
|
+
max_pos_x = max(max_pos_x, pos[0])
|
101
|
+
max_pos_y = max(max_pos_y, pos[1])
|
102
|
+
|
103
|
+
_visualize_unit_square(ax, pos[1], pos[0], color)
|
104
|
+
|
105
|
+
verts = (((x, y), (x, max_pos_y)), ((max_pos_x, y), (max_pos_x, max_pos_y)))
|
106
|
+
|
107
|
+
return verts
|
108
|
+
|
109
|
+
|
110
|
+
def _visualize_unit_square(ax, x, y, color):
|
111
|
+
_visualize_rect(ax, 1, 1, x, y, color)
|
112
|
+
|
113
|
+
|
114
|
+
def _visualize_rect(ax, width, height, x, y, color):
|
115
|
+
pos_x, pos_y = zip(*_verts_of_rect(width, height, x, y))
|
116
|
+
|
117
|
+
ax.fill(pos_x, pos_y, color)
|
118
|
+
ax.plot(pos_x + (pos_x[0],), pos_y + (pos_y[0],), "k")
|
119
|
+
|
120
|
+
|
121
|
+
def _verts_of_rect(width, height, x, y):
|
122
|
+
return ((x, y), (x + width, y), (x + width, y + height), (x, y + height))
|
@@ -0,0 +1,92 @@
|
|
1
|
+
import torch
|
2
|
+
import torch.nn.functional as F
|
3
|
+
|
4
|
+
import ninetoothed
|
5
|
+
import ninetoothed.language as ntl
|
6
|
+
from ninetoothed import Symbol, Tensor
|
7
|
+
from tests.skippers import skip_if_cuda_not_available
|
8
|
+
|
9
|
+
|
10
|
+
def arrangement(q, k, v, o):
|
11
|
+
BLOCK_SIZE_M = Symbol("BLOCK_SIZE_M", constexpr=True)
|
12
|
+
BLOCK_SIZE_N = Symbol("BLOCK_SIZE_N", constexpr=True)
|
13
|
+
|
14
|
+
def arrange_q_or_o(input):
|
15
|
+
arranged = input.tile((1, 1, BLOCK_SIZE_M, -1))
|
16
|
+
arranged.dtype = arranged.dtype.squeeze((0, 1))
|
17
|
+
|
18
|
+
return arranged
|
19
|
+
|
20
|
+
def arrange_k_or_v(input):
|
21
|
+
arranged = (
|
22
|
+
input.tile((1, 1, BLOCK_SIZE_N, -1))
|
23
|
+
.tile((1, 1, -1, -1))
|
24
|
+
.expand((-1, -1, q_arranged.shape[-2], -1))
|
25
|
+
)
|
26
|
+
arranged.dtype = arranged.dtype.squeeze((0, 1, 3))
|
27
|
+
arranged.dtype.dtype = arranged.dtype.dtype.squeeze((0, 1))
|
28
|
+
|
29
|
+
return arranged
|
30
|
+
|
31
|
+
q_arranged = arrange_q_or_o(q)
|
32
|
+
|
33
|
+
return q_arranged, arrange_k_or_v(k), arrange_k_or_v(v), arrange_q_or_o(o)
|
34
|
+
|
35
|
+
|
36
|
+
def application(q, k, v, o):
|
37
|
+
q_loaded = (q * 1.44269504089).to(ntl.float16)
|
38
|
+
|
39
|
+
acc = ntl.zeros((q.shape[-2], q.shape[-1]), dtype=ntl.float32)
|
40
|
+
l_i = ntl.full((q.shape[-2],), 1, dtype=ntl.float32)
|
41
|
+
m_i = ntl.full((q.shape[-2],), float("-inf"), dtype=ntl.float32)
|
42
|
+
|
43
|
+
for i in range(k.shape[0]):
|
44
|
+
qk = ntl.dot(q_loaded, ntl.trans(k[i]))
|
45
|
+
|
46
|
+
m_ij = ntl.maximum(m_i, ntl.max(qk, 1))
|
47
|
+
p = ntl.exp2(qk - m_ij[:, None])
|
48
|
+
l_ij = ntl.sum(p, 1)
|
49
|
+
|
50
|
+
alpha = ntl.exp2(m_i - m_ij)
|
51
|
+
acc = acc * alpha[:, None] + ntl.dot(p.to(ntl.float16), v[i])
|
52
|
+
m_i = m_ij
|
53
|
+
l_i = l_i * alpha + l_ij
|
54
|
+
|
55
|
+
acc /= l_i[:, None]
|
56
|
+
o = acc # noqa: F841
|
57
|
+
|
58
|
+
|
59
|
+
def attention(q, k, v):
|
60
|
+
o = torch.empty_like(q, dtype=v.dtype)
|
61
|
+
|
62
|
+
attention_kernel = ninetoothed.make(
|
63
|
+
arrangement, application, (Tensor(4, constexpr_shape=True) for _ in range(4))
|
64
|
+
)
|
65
|
+
|
66
|
+
attention_kernel(q, k, v, o, BLOCK_SIZE_M=128, BLOCK_SIZE_N=64)
|
67
|
+
|
68
|
+
return o
|
69
|
+
|
70
|
+
|
71
|
+
@skip_if_cuda_not_available
|
72
|
+
class TestCUDA:
|
73
|
+
@classmethod
|
74
|
+
def setup_class(cls):
|
75
|
+
torch.manual_seed(0)
|
76
|
+
|
77
|
+
shape = (2, 4, 1024, 64)
|
78
|
+
|
79
|
+
cls.q = torch.randn(shape, device="cuda")
|
80
|
+
cls.k = torch.randn(shape, device="cuda")
|
81
|
+
cls.v = torch.randn(shape, device="cuda")
|
82
|
+
|
83
|
+
def test_fp16(self):
|
84
|
+
q = type(self).q.to(torch.float16)
|
85
|
+
k = type(self).k.to(torch.float16)
|
86
|
+
v = type(self).v.to(torch.float16)
|
87
|
+
|
88
|
+
assert torch.allclose(
|
89
|
+
attention(q, k, v),
|
90
|
+
F.scaled_dot_product_attention(q, k, v, scale=1),
|
91
|
+
atol=0.01,
|
92
|
+
)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
import torch
|
2
|
+
import torch.nn.functional as F
|
3
|
+
|
4
|
+
import ninetoothed
|
5
|
+
import tests.test_matmul as matmul
|
6
|
+
from ninetoothed import Tensor
|
7
|
+
from tests.skippers import skip_if_cuda_not_available
|
8
|
+
|
9
|
+
|
10
|
+
def arrangement(input, filter, output):
|
11
|
+
input_tiled = input.tile((1, *filter.shape[1:]), strides=(-1, -1, 1, 1))
|
12
|
+
input_squeezed = input_tiled.squeeze(1)
|
13
|
+
input_squeezed.dtype = input_squeezed.dtype.squeeze(0)
|
14
|
+
input_raveled = input_squeezed.ravel()
|
15
|
+
input_flattened = input_raveled.flatten(end_dim=3).flatten(start_dim=1)
|
16
|
+
|
17
|
+
filter_flattened = filter.flatten(start_dim=1)
|
18
|
+
filter_permuted = filter_flattened.permute((1, 0))
|
19
|
+
|
20
|
+
output_flattened = output.permute((0, 2, 3, 1)).flatten(end_dim=3)
|
21
|
+
|
22
|
+
return matmul.arrangement(input_flattened, filter_permuted, output_flattened)
|
23
|
+
|
24
|
+
|
25
|
+
def conv2d(input, filter):
|
26
|
+
n, _, h, w = input.shape
|
27
|
+
k, _, r, s = filter.shape
|
28
|
+
p = h - r + 1
|
29
|
+
q = w - s + 1
|
30
|
+
|
31
|
+
output = torch.empty((n, k, p, q), device=input.device, dtype=input.dtype)
|
32
|
+
|
33
|
+
conv2d_kernel = ninetoothed.make(
|
34
|
+
arrangement,
|
35
|
+
matmul.application,
|
36
|
+
(Tensor(4), Tensor(4, constexpr_shape=True), Tensor(4)),
|
37
|
+
)
|
38
|
+
|
39
|
+
conv2d_kernel(input, filter, output)
|
40
|
+
|
41
|
+
return output
|
42
|
+
|
43
|
+
|
44
|
+
@skip_if_cuda_not_available
|
45
|
+
class TestCUDA:
|
46
|
+
@classmethod
|
47
|
+
def setup_class(cls):
|
48
|
+
torch.manual_seed(0)
|
49
|
+
|
50
|
+
n, c, h, w = 4, 64, 16, 16
|
51
|
+
k, _, r, s = 512, c, 3, 3
|
52
|
+
|
53
|
+
cls.input = torch.randn(n, c, h, w, device="cuda")
|
54
|
+
cls.filter = torch.randn(k, c, r, s, device="cuda")
|
55
|
+
|
56
|
+
def test_fp16(self):
|
57
|
+
input = type(self).input.to(torch.float16)
|
58
|
+
filter = type(self).filter.to(torch.float16)
|
59
|
+
|
60
|
+
assert torch.allclose(
|
61
|
+
conv2d(input, filter), F.conv2d(input, filter), atol=0.001, rtol=0.001
|
62
|
+
)
|
@@ -6,40 +6,46 @@ from ninetoothed import Symbol, Tensor
|
|
6
6
|
from tests.skippers import skip_if_cuda_not_available, skip_if_float8_e5m2_not_supported
|
7
7
|
|
8
8
|
|
9
|
-
def
|
9
|
+
def arrangement(lhs, rhs, output):
|
10
10
|
BLOCK_SIZE_M = Symbol("BLOCK_SIZE_M", meta=True)
|
11
11
|
BLOCK_SIZE_N = Symbol("BLOCK_SIZE_N", meta=True)
|
12
12
|
BLOCK_SIZE_K = Symbol("BLOCK_SIZE_K", meta=True)
|
13
13
|
|
14
|
-
output_tiled =
|
14
|
+
output_tiled = output.tile((BLOCK_SIZE_M, BLOCK_SIZE_N))
|
15
15
|
|
16
16
|
lhs_tiled = (
|
17
|
-
|
18
|
-
.tile((BLOCK_SIZE_M, BLOCK_SIZE_K))
|
17
|
+
lhs.tile((BLOCK_SIZE_M, BLOCK_SIZE_K))
|
19
18
|
.tile((1, -1))
|
20
19
|
.expand((-1, output_tiled.shape[1]))
|
21
20
|
)
|
22
21
|
lhs_tiled.dtype = lhs_tiled.dtype.squeeze(0)
|
23
22
|
|
24
23
|
rhs_tiled = (
|
25
|
-
|
26
|
-
.tile((BLOCK_SIZE_K, BLOCK_SIZE_N))
|
24
|
+
rhs.tile((BLOCK_SIZE_K, BLOCK_SIZE_N))
|
27
25
|
.tile((-1, 1))
|
28
26
|
.expand((output_tiled.shape[0], -1))
|
29
27
|
)
|
30
28
|
rhs_tiled.dtype = rhs_tiled.dtype.squeeze(1)
|
31
29
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
30
|
+
return lhs_tiled, rhs_tiled, output_tiled
|
31
|
+
|
32
|
+
|
33
|
+
def application(lhs, rhs, output):
|
34
|
+
accumulator = ntl.zeros(output.shape, dtype=ntl.float32)
|
35
|
+
for k in range(lhs.shape[0]):
|
36
|
+
accumulator += ntl.dot(lhs[k], rhs[k])
|
37
|
+
output = accumulator.to(ntl.float16)
|
38
|
+
|
38
39
|
|
40
|
+
def matmul(lhs, rhs):
|
39
41
|
output = torch.empty(
|
40
42
|
(lhs.shape[0], rhs.shape[1]), device=lhs.device, dtype=torch.float16
|
41
43
|
)
|
42
44
|
|
45
|
+
matmul_kernel = ninetoothed.make(
|
46
|
+
arrangement, application, (Tensor(2), Tensor(2), Tensor(2))
|
47
|
+
)
|
48
|
+
|
43
49
|
matmul_kernel(lhs, rhs, output)
|
44
50
|
|
45
51
|
return output
|
Binary file
|
Binary file
|
Binary file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|