depthtensor 2.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- depthtensor-2.4.0/LICENSE +21 -0
- depthtensor-2.4.0/PKG-INFO +155 -0
- depthtensor-2.4.0/README.md +115 -0
- depthtensor-2.4.0/pyproject.toml +35 -0
- depthtensor-2.4.0/setup.cfg +4 -0
- depthtensor-2.4.0/src/DepthTensor/__init__.py +9 -0
- depthtensor-2.4.0/src/DepthTensor/_core/__init__.py +4 -0
- depthtensor-2.4.0/src/DepthTensor/_core/exceptions.py +16 -0
- depthtensor-2.4.0/src/DepthTensor/_core/ops/__init__.py +6 -0
- depthtensor-2.4.0/src/DepthTensor/_core/ops/comparison.py +318 -0
- depthtensor-2.4.0/src/DepthTensor/_core/ops/creation.py +126 -0
- depthtensor-2.4.0/src/DepthTensor/_core/ops/diff.py +272 -0
- depthtensor-2.4.0/src/DepthTensor/_core/ops/elementwise.py +785 -0
- depthtensor-2.4.0/src/DepthTensor/_core/ops/reduction.py +146 -0
- depthtensor-2.4.0/src/DepthTensor/_core/random/__init__.py +1 -0
- depthtensor-2.4.0/src/DepthTensor/_core/random/generator.py +131 -0
- depthtensor-2.4.0/src/DepthTensor/_core/utils.py +228 -0
- depthtensor-2.4.0/src/DepthTensor/autodiff.py +75 -0
- depthtensor-2.4.0/src/DepthTensor/tensor.py +576 -0
- depthtensor-2.4.0/src/DepthTensor/typing.py +121 -0
- depthtensor-2.4.0/src/depthtensor.egg-info/PKG-INFO +155 -0
- depthtensor-2.4.0/src/depthtensor.egg-info/SOURCES.txt +23 -0
- depthtensor-2.4.0/src/depthtensor.egg-info/dependency_links.txt +1 -0
- depthtensor-2.4.0/src/depthtensor.egg-info/requires.txt +4 -0
- depthtensor-2.4.0/src/depthtensor.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Le Hong Ha
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: depthtensor
|
|
3
|
+
Version: 2.4.0
|
|
4
|
+
Summary: A Hardware-Accelerated Tensor Computation and Autograd Engine
|
|
5
|
+
Author-email: Le Hong Ha <lehahong1310@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Le Hong Ha
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
Project-URL: Homepage, https://github.com/l-h-ha/DepthTensor
|
|
28
|
+
Project-URL: Bug Tracker, https://github.com/l-h-ha/DepthTensor/issues
|
|
29
|
+
Classifier: Programming Language :: Python :: 3
|
|
30
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
32
|
+
Classifier: Operating System :: OS Independent
|
|
33
|
+
Requires-Python: >=3.8
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
License-File: LICENSE
|
|
36
|
+
Requires-Dist: numpy
|
|
37
|
+
Provides-Extra: gpu
|
|
38
|
+
Requires-Dist: cupy; extra == "gpu"
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
|
|
41
|
+
# DepthTensor: A Hardware-Accelerated Tensor Computation and Autograd Engine
|
|
42
|
+
|
|
43
|
+
**DepthTensor** is a light-weight, high-performance library for reverse-mode automatic differentiation (AD). It is useful in building the mathematical foundation of deep learning frameworks, with the use of a `Tensor` object which dynamically builds computational graphs and computes gradients using **Vector-Jacobian Products (VJP)**, generalized for tensors of abritrary rank.
|
|
44
|
+
|
|
45
|
+
> **Note**: This is the core autograd and tensor computation engine. For the full deep learning framework, refer to [DepthML](https://github.com/l-h-ha/DepthML).
|
|
46
|
+
|
|
47
|
+
## 1. Mathematical Foundation
|
|
48
|
+
|
|
49
|
+
The goal is to compute the gradient of scalar field $L$ with respect to an input tensor $X$, denoted as the adjoint $\bar{X}$.
|
|
50
|
+
|
|
51
|
+
### 1.1. Generalized VJPs via Tensor Contractions
|
|
52
|
+
|
|
53
|
+
Let $X$ be a tensor of shape $\mathcal{I} = (i_1, \dots, i_n)$ and $Y = f(X)$ be a tensor of shape $\mathcal{J} = (j_1, \dots, j_m)$.
|
|
54
|
+
|
|
55
|
+
Mathematically, the Jacobian is the tensor of rank $n + m$ which contains all the partial derivatives $\frac{\partial Y_{\mathcal{J}}}{\partial X_{\mathcal{I}}}$. DepthTensor does not compute this object, but, rather, it computes the contraction of the incoming adjoint $\bar{Y}$ with the local derivative:
|
|
56
|
+
|
|
57
|
+
$$
|
|
58
|
+
\bar{X}_{i_1 \dots i_n} = \sum_{j_1 \dots j_m} \bar{Y}_{j_1 \dots j_m} \frac{\partial Y_{j_1 \dots j_m}}{\partial X_{i_1 \dots i_n}}
|
|
59
|
+
$$
|
|
60
|
+
|
|
61
|
+
If X and Y are scalars, indices vanish, and this reduces to:
|
|
62
|
+
|
|
63
|
+
$$
|
|
64
|
+
\bar{x} = \bar{y} \cdot f'(x)
|
|
65
|
+
$$
|
|
66
|
+
|
|
67
|
+
If X and Y are matrices, this reduces to:
|
|
68
|
+
|
|
69
|
+
$$
|
|
70
|
+
\bar{X} = \bar{Y} W^{T}
|
|
71
|
+
$$
|
|
72
|
+
|
|
73
|
+
### 1.2. Graph Topology and Gradient Accumulation
|
|
74
|
+
|
|
75
|
+
Gradients are accumulated using a Depth-First Search (DFS) topological sort, which ensures that for any node $X$ with multiple gradient flow streams ${Y^{(1)}, \dots, Y^{k}}$, the gradient sum is the sum of contractions:
|
|
76
|
+
|
|
77
|
+
$$
|
|
78
|
+
\bar{X} = \sum_{k} \mathrm{VJP}(Y^{(k)}, X)
|
|
79
|
+
$$
|
|
80
|
+
|
|
81
|
+
All gradients are aggregated from all gradient downstreams.
|
|
82
|
+
|
|
83
|
+
## 2. Architecture and System Design
|
|
84
|
+
|
|
85
|
+
### 2.1. The differentiable Primitive (`Tensor`)
|
|
86
|
+
|
|
87
|
+
The `Tensor` class acts like a node in the Directed Acyclic Graph (DAG).
|
|
88
|
+
|
|
89
|
+
Operations are automatically dispatched to backend computers, which consists of `numpy` (CPU) and `cupy` (GPU).
|
|
90
|
+
|
|
91
|
+
Computational graphs are built dynamically at runtime.
|
|
92
|
+
|
|
93
|
+
## 3. Empirical Validation
|
|
94
|
+
|
|
95
|
+
We will verify the tensor engine by minimizing the Rosenbrock function, which is a non-convex optimization benchmark:
|
|
96
|
+
|
|
97
|
+
$$
|
|
98
|
+
f(x, y) = (a - x)^2 + b(y - x^2)^2
|
|
99
|
+
$$
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import depthtensor as dt
|
|
103
|
+
|
|
104
|
+
# Initialize tensors
|
|
105
|
+
x = dt.Tensor([1.2], device="gpu", requires_grad=True)
|
|
106
|
+
y = dt.Tensor([1.2], device="gpu", requires_grad=True)
|
|
107
|
+
|
|
108
|
+
a, b = dt.Tensor([1], device="gpu"), dt.Tensor(
|
|
109
|
+
[100], device="gpu"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# 2. Optimization Loop
|
|
113
|
+
lr = 0.001
|
|
114
|
+
for i in range(500):
|
|
115
|
+
# Rosenbrock: f(x,y) = (a-x)^2 + b(y-x^2)^2
|
|
116
|
+
loss = (a - x) ** 2 + b * (y - x**2) ** 2
|
|
117
|
+
|
|
118
|
+
# Backpward pass
|
|
119
|
+
dt.differentiate(loss)
|
|
120
|
+
|
|
121
|
+
# Gradient Descent
|
|
122
|
+
x.data -= lr * x.grad # type: ignore
|
|
123
|
+
y.data -= lr * y.grad # type: ignore
|
|
124
|
+
|
|
125
|
+
# Zero grads
|
|
126
|
+
x.zeros_grad()
|
|
127
|
+
y.zeros_grad()
|
|
128
|
+
|
|
129
|
+
if i % 10 == 0:
|
|
130
|
+
print(loss.item())
|
|
131
|
+
|
|
132
|
+
print(f"Converged: ({x.data}, {y.data})")
|
|
133
|
+
# Target: (1.0, 1.0)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## 4. Installation
|
|
137
|
+
|
|
138
|
+
Requirements:
|
|
139
|
+
- `numpy`
|
|
140
|
+
- `cupy` (optional: for NVIDIA GPU acceleration)
|
|
141
|
+
|
|
142
|
+
### 4.1. Bash
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
git clone https://github.com/YourUsername/DepthTensor.git
|
|
146
|
+
pip install -e .
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### 4.2. Pip
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
pip install depthtensor
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Author: Le Hong Ha
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# DepthTensor: A Hardware-Accelerated Tensor Computation and Autograd Engine
|
|
2
|
+
|
|
3
|
+
**DepthTensor** is a light-weight, high-performance library for reverse-mode automatic differentiation (AD). It is useful in building the mathematical foundation of deep learning frameworks, with the use of a `Tensor` object which dynamically builds computational graphs and computes gradients using **Vector-Jacobian Products (VJP)**, generalized for tensors of abritrary rank.
|
|
4
|
+
|
|
5
|
+
> **Note**: This is the core autograd and tensor computation engine. For the full deep learning framework, refer to [DepthML](https://github.com/l-h-ha/DepthML).
|
|
6
|
+
|
|
7
|
+
## 1. Mathematical Foundation
|
|
8
|
+
|
|
9
|
+
The goal is to compute the gradient of scalar field $L$ with respect to an input tensor $X$, denoted as the adjoint $\bar{X}$.
|
|
10
|
+
|
|
11
|
+
### 1.1. Generalized VJPs via Tensor Contractions
|
|
12
|
+
|
|
13
|
+
Let $X$ be a tensor of shape $\mathcal{I} = (i_1, \dots, i_n)$ and $Y = f(X)$ be a tensor of shape $\mathcal{J} = (j_1, \dots, j_m)$.
|
|
14
|
+
|
|
15
|
+
Mathematically, the Jacobian is the tensor of rank $n + m$ which contains all the partial derivatives $\frac{\partial Y_{\mathcal{J}}}{\partial X_{\mathcal{I}}}$. DepthTensor does not compute this object, but, rather, it computes the contraction of the incoming adjoint $\bar{Y}$ with the local derivative:
|
|
16
|
+
|
|
17
|
+
$$
|
|
18
|
+
\bar{X}_{i_1 \dots i_n} = \sum_{j_1 \dots j_m} \bar{Y}_{j_1 \dots j_m} \frac{\partial Y_{j_1 \dots j_m}}{\partial X_{i_1 \dots i_n}}
|
|
19
|
+
$$
|
|
20
|
+
|
|
21
|
+
If X and Y are scalars, indices vanish, and this reduces to:
|
|
22
|
+
|
|
23
|
+
$$
|
|
24
|
+
\bar{x} = \bar{y} \cdot f'(x)
|
|
25
|
+
$$
|
|
26
|
+
|
|
27
|
+
If X and Y are matrices, this reduces to:
|
|
28
|
+
|
|
29
|
+
$$
|
|
30
|
+
\bar{X} = \bar{Y} W^{T}
|
|
31
|
+
$$
|
|
32
|
+
|
|
33
|
+
### 1.2. Graph Topology and Gradient Accumulation
|
|
34
|
+
|
|
35
|
+
Gradients are accumulated using a Depth-First Search (DFS) topological sort, which ensures that for any node $X$ with multiple gradient flow streams ${Y^{(1)}, \dots, Y^{k}}$, the gradient sum is the sum of contractions:
|
|
36
|
+
|
|
37
|
+
$$
|
|
38
|
+
\bar{X} = \sum_{k} \mathrm{VJP}(Y^{(k)}, X)
|
|
39
|
+
$$
|
|
40
|
+
|
|
41
|
+
All gradients are aggregated from all gradient downstreams.
|
|
42
|
+
|
|
43
|
+
## 2. Architecture and System Design
|
|
44
|
+
|
|
45
|
+
### 2.1. The differentiable Primitive (`Tensor`)
|
|
46
|
+
|
|
47
|
+
The `Tensor` class acts like a node in the Directed Acyclic Graph (DAG).
|
|
48
|
+
|
|
49
|
+
Operations are automatically dispatched to backend computers, which consists of `numpy` (CPU) and `cupy` (GPU).
|
|
50
|
+
|
|
51
|
+
Computational graphs are built dynamically at runtime.
|
|
52
|
+
|
|
53
|
+
## 3. Empirical Validation
|
|
54
|
+
|
|
55
|
+
We will verify the tensor engine by minimizing the Rosenbrock function, which is a non-convex optimization benchmark:
|
|
56
|
+
|
|
57
|
+
$$
|
|
58
|
+
f(x, y) = (a - x)^2 + b(y - x^2)^2
|
|
59
|
+
$$
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
import depthtensor as dt
|
|
63
|
+
|
|
64
|
+
# Initialize tensors
|
|
65
|
+
x = dt.Tensor([1.2], device="gpu", requires_grad=True)
|
|
66
|
+
y = dt.Tensor([1.2], device="gpu", requires_grad=True)
|
|
67
|
+
|
|
68
|
+
a, b = dt.Tensor([1], device="gpu"), dt.Tensor(
|
|
69
|
+
[100], device="gpu"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# 2. Optimization Loop
|
|
73
|
+
lr = 0.001
|
|
74
|
+
for i in range(500):
|
|
75
|
+
# Rosenbrock: f(x,y) = (a-x)^2 + b(y-x^2)^2
|
|
76
|
+
loss = (a - x) ** 2 + b * (y - x**2) ** 2
|
|
77
|
+
|
|
78
|
+
# Backpward pass
|
|
79
|
+
dt.differentiate(loss)
|
|
80
|
+
|
|
81
|
+
# Gradient Descent
|
|
82
|
+
x.data -= lr * x.grad # type: ignore
|
|
83
|
+
y.data -= lr * y.grad # type: ignore
|
|
84
|
+
|
|
85
|
+
# Zero grads
|
|
86
|
+
x.zeros_grad()
|
|
87
|
+
y.zeros_grad()
|
|
88
|
+
|
|
89
|
+
if i % 10 == 0:
|
|
90
|
+
print(loss.item())
|
|
91
|
+
|
|
92
|
+
print(f"Converged: ({x.data}, {y.data})")
|
|
93
|
+
# Target: (1.0, 1.0)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## 4. Installation
|
|
97
|
+
|
|
98
|
+
Requirements:
|
|
99
|
+
- `numpy`
|
|
100
|
+
- `cupy` (optional: for NVIDIA GPU acceleration)
|
|
101
|
+
|
|
102
|
+
### 4.1. Bash
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
git clone https://github.com/YourUsername/DepthTensor.git
|
|
106
|
+
pip install -e .
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### 4.2. Pip
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
pip install depthtensor
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Author: Le Hong Ha
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "depthtensor"
|
|
7
|
+
version = "2.4.0"
|
|
8
|
+
description = "A Hardware-Accelerated Tensor Computation and Autograd Engine"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [
|
|
11
|
+
{ name = "Le Hong Ha", email = "lehahong1310@gmail.com" }
|
|
12
|
+
]
|
|
13
|
+
license = { file = "LICENSE" }
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence", # Fixed
|
|
17
|
+
"License :: OSI Approved :: MIT License", # Fixed
|
|
18
|
+
"Operating System :: OS Independent"
|
|
19
|
+
]
|
|
20
|
+
requires-python = ">=3.8"
|
|
21
|
+
dependencies = [
|
|
22
|
+
"numpy"
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.optional-dependencies]
|
|
26
|
+
gpu = [
|
|
27
|
+
"cupy"
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
"Homepage" = "https://github.com/l-h-ha/DepthTensor"
|
|
32
|
+
"Bug Tracker" = "https://github.com/l-h-ha/DepthTensor/issues"
|
|
33
|
+
|
|
34
|
+
[tool.setuptools.packages.find]
|
|
35
|
+
where = ["src"]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
CUPY_NOT_FOUND_MSG = "Module CuPy not found or installed. Please install CuPy."
|
|
2
|
+
DEVICE_MISMATCH_MSG = "There is a mismatch in device between two objects."
|
|
3
|
+
OPERAND_MISMATCH_MSG = "There is a mismatch in the type between two operands."
|
|
4
|
+
GRADIENT_COMPUTATION_ERROR = "An error in gradient computation has occurred."
|
|
5
|
+
|
|
6
|
+
class CuPyNotFound(RuntimeError):
|
|
7
|
+
...
|
|
8
|
+
|
|
9
|
+
class DeviceMismatch(RuntimeError):
|
|
10
|
+
...
|
|
11
|
+
|
|
12
|
+
class OperandMismatch(RuntimeError):
|
|
13
|
+
...
|
|
14
|
+
|
|
15
|
+
class GradientComputationError(RuntimeError):
|
|
16
|
+
...
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
from typing import Union, Optional, Tuple, overload
|
|
2
|
+
|
|
3
|
+
from ...typing import (
|
|
4
|
+
TensorLike,
|
|
5
|
+
DeviceLike,
|
|
6
|
+
NDArrayLikeBool,
|
|
7
|
+
Casting,
|
|
8
|
+
Order,
|
|
9
|
+
OperandLike,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from ..exceptions import (
|
|
13
|
+
CuPyNotFound,
|
|
14
|
+
CUPY_NOT_FOUND_MSG,
|
|
15
|
+
DeviceMismatch,
|
|
16
|
+
DEVICE_MISMATCH_MSG,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from ..utils import to_xp_array, get_device, get_two_operand_op_device
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import cupy as cp
|
|
25
|
+
except (ImportError, ModuleNotFoundError):
|
|
26
|
+
cp = None
|
|
27
|
+
|
|
28
|
+
###
|
|
29
|
+
###
|
|
30
|
+
###
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@overload
|
|
34
|
+
def where(
|
|
35
|
+
condition: OperandLike,
|
|
36
|
+
/,
|
|
37
|
+
*,
|
|
38
|
+
device: DeviceLike = "cpu",
|
|
39
|
+
requires_grad: bool = False,
|
|
40
|
+
) -> Tuple[TensorLike, ...]: ...
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@overload
|
|
44
|
+
def where(
|
|
45
|
+
condition: OperandLike,
|
|
46
|
+
x: Optional[OperandLike],
|
|
47
|
+
y: Optional[OperandLike],
|
|
48
|
+
/,
|
|
49
|
+
*,
|
|
50
|
+
device: DeviceLike = "cpu",
|
|
51
|
+
requires_grad: bool = False,
|
|
52
|
+
) -> TensorLike: ...
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def where(
|
|
56
|
+
condition: OperandLike,
|
|
57
|
+
x: Optional[OperandLike] = None,
|
|
58
|
+
y: Optional[OperandLike] = None,
|
|
59
|
+
/,
|
|
60
|
+
*,
|
|
61
|
+
device: Optional[DeviceLike] = None,
|
|
62
|
+
requires_grad: bool = False,
|
|
63
|
+
) -> Union[Tuple[TensorLike, ...], TensorLike]:
|
|
64
|
+
from ...tensor import Tensor
|
|
65
|
+
|
|
66
|
+
if device is None:
|
|
67
|
+
device = get_device(condition)
|
|
68
|
+
|
|
69
|
+
# * One parameter overload
|
|
70
|
+
if (x is None) and (y is None):
|
|
71
|
+
data = to_xp_array(condition, device=device)
|
|
72
|
+
if device == "cpu":
|
|
73
|
+
result = np.where(data)
|
|
74
|
+
else:
|
|
75
|
+
if cp is None:
|
|
76
|
+
raise CuPyNotFound(CUPY_NOT_FOUND_MSG)
|
|
77
|
+
result = cp.where(data)
|
|
78
|
+
return tuple([Tensor(array, requires_grad=requires_grad) for array in result])
|
|
79
|
+
# * Two parameters overload
|
|
80
|
+
elif x is not None and y is not None:
|
|
81
|
+
if (
|
|
82
|
+
not (get_device(x) == get_device(y) == device)
|
|
83
|
+
and not isinstance(x, (int, float, list, tuple))
|
|
84
|
+
and not isinstance(y, (int, float, list, tuple))
|
|
85
|
+
):
|
|
86
|
+
raise DeviceMismatch(DEVICE_MISMATCH_MSG)
|
|
87
|
+
|
|
88
|
+
data = to_xp_array(condition, device=device)
|
|
89
|
+
x_data = to_xp_array(x, device=device)
|
|
90
|
+
y_data = to_xp_array(y, device=device)
|
|
91
|
+
if device == "cpu":
|
|
92
|
+
result = np.where(data, x_data, y_data)
|
|
93
|
+
else:
|
|
94
|
+
if cp is None:
|
|
95
|
+
raise CuPyNotFound(CUPY_NOT_FOUND_MSG)
|
|
96
|
+
result = cp.where(data, x_data, y_data)
|
|
97
|
+
return Tensor(result, requires_grad=requires_grad)
|
|
98
|
+
else:
|
|
99
|
+
raise ValueError("Both x and y parameters must be given.")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
###
|
|
103
|
+
###
|
|
104
|
+
###
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def wrapper_2in_1out(
|
|
108
|
+
x1: OperandLike,
|
|
109
|
+
x2: OperandLike,
|
|
110
|
+
/,
|
|
111
|
+
out: Optional[NDArrayLikeBool] = None,
|
|
112
|
+
*,
|
|
113
|
+
func_name: str,
|
|
114
|
+
device: Optional[DeviceLike] = None,
|
|
115
|
+
where: Union[bool, NDArrayLikeBool] = True,
|
|
116
|
+
casting: Casting = "same_kind",
|
|
117
|
+
order: Order = "K",
|
|
118
|
+
dtype: None = None,
|
|
119
|
+
subok: bool = True,
|
|
120
|
+
) -> TensorLike:
|
|
121
|
+
from ...tensor import Tensor
|
|
122
|
+
|
|
123
|
+
op_device = get_two_operand_op_device(x1, x2, device)
|
|
124
|
+
|
|
125
|
+
x1, x2 = to_xp_array(x1, device=op_device), to_xp_array(x2, device=op_device)
|
|
126
|
+
if op_device == "cpu":
|
|
127
|
+
y = getattr(np, func_name)(
|
|
128
|
+
x1,
|
|
129
|
+
x2,
|
|
130
|
+
out=out,
|
|
131
|
+
dtype=dtype,
|
|
132
|
+
where=where,
|
|
133
|
+
casting=casting,
|
|
134
|
+
order=order,
|
|
135
|
+
subok=subok,
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
if cp is None:
|
|
139
|
+
raise CuPyNotFound(CUPY_NOT_FOUND_MSG)
|
|
140
|
+
y = getattr(cp, func_name)(x1, x2, out=out, dtype=dtype, casting=casting)
|
|
141
|
+
return Tensor(y)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def equal(
|
|
145
|
+
x1: OperandLike,
|
|
146
|
+
x2: OperandLike,
|
|
147
|
+
/,
|
|
148
|
+
out: Optional[NDArrayLikeBool] = None,
|
|
149
|
+
*,
|
|
150
|
+
device: Optional[DeviceLike] = None,
|
|
151
|
+
where: Union[bool, NDArrayLikeBool] = True,
|
|
152
|
+
casting: Casting = "same_kind",
|
|
153
|
+
order: Order = "K",
|
|
154
|
+
dtype: None = None,
|
|
155
|
+
subok: bool = True,
|
|
156
|
+
) -> TensorLike:
|
|
157
|
+
return wrapper_2in_1out(
|
|
158
|
+
x1,
|
|
159
|
+
x2,
|
|
160
|
+
out=out,
|
|
161
|
+
func_name="equal",
|
|
162
|
+
device=device,
|
|
163
|
+
where=where,
|
|
164
|
+
casting=casting,
|
|
165
|
+
order=order,
|
|
166
|
+
dtype=dtype,
|
|
167
|
+
subok=subok,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def not_equal(
|
|
172
|
+
x1: OperandLike,
|
|
173
|
+
x2: OperandLike,
|
|
174
|
+
/,
|
|
175
|
+
out: Optional[NDArrayLikeBool] = None,
|
|
176
|
+
*,
|
|
177
|
+
device: Optional[DeviceLike] = None,
|
|
178
|
+
where: Union[bool, NDArrayLikeBool] = True,
|
|
179
|
+
casting: Casting = "same_kind",
|
|
180
|
+
order: Order = "K",
|
|
181
|
+
dtype: None = None,
|
|
182
|
+
subok: bool = True,
|
|
183
|
+
) -> TensorLike:
|
|
184
|
+
return wrapper_2in_1out(
|
|
185
|
+
x1,
|
|
186
|
+
x2,
|
|
187
|
+
out=out,
|
|
188
|
+
func_name="not_equal",
|
|
189
|
+
device=device,
|
|
190
|
+
where=where,
|
|
191
|
+
casting=casting,
|
|
192
|
+
order=order,
|
|
193
|
+
dtype=dtype,
|
|
194
|
+
subok=subok,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def greater(
|
|
199
|
+
x1: OperandLike,
|
|
200
|
+
x2: OperandLike,
|
|
201
|
+
/,
|
|
202
|
+
out: Optional[NDArrayLikeBool] = None,
|
|
203
|
+
*,
|
|
204
|
+
device: Optional[DeviceLike] = None,
|
|
205
|
+
where: Union[bool, NDArrayLikeBool] = True,
|
|
206
|
+
casting: Casting = "same_kind",
|
|
207
|
+
order: Order = "K",
|
|
208
|
+
dtype: None = None,
|
|
209
|
+
subok: bool = True,
|
|
210
|
+
) -> TensorLike:
|
|
211
|
+
return wrapper_2in_1out(
|
|
212
|
+
x1,
|
|
213
|
+
x2,
|
|
214
|
+
out=out,
|
|
215
|
+
func_name="greater",
|
|
216
|
+
device=device,
|
|
217
|
+
where=where,
|
|
218
|
+
casting=casting,
|
|
219
|
+
order=order,
|
|
220
|
+
dtype=dtype,
|
|
221
|
+
subok=subok,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def greater_equal(
|
|
226
|
+
x1: OperandLike,
|
|
227
|
+
x2: OperandLike,
|
|
228
|
+
/,
|
|
229
|
+
out: Optional[NDArrayLikeBool] = None,
|
|
230
|
+
*,
|
|
231
|
+
device: Optional[DeviceLike] = None,
|
|
232
|
+
where: Union[bool, NDArrayLikeBool] = True,
|
|
233
|
+
casting: Casting = "same_kind",
|
|
234
|
+
order: Order = "K",
|
|
235
|
+
dtype: None = None,
|
|
236
|
+
subok: bool = True,
|
|
237
|
+
) -> TensorLike:
|
|
238
|
+
return wrapper_2in_1out(
|
|
239
|
+
x1,
|
|
240
|
+
x2,
|
|
241
|
+
out=out,
|
|
242
|
+
func_name="greater_equal",
|
|
243
|
+
device=device,
|
|
244
|
+
where=where,
|
|
245
|
+
casting=casting,
|
|
246
|
+
order=order,
|
|
247
|
+
dtype=dtype,
|
|
248
|
+
subok=subok,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def less(
|
|
253
|
+
x1: OperandLike,
|
|
254
|
+
x2: OperandLike,
|
|
255
|
+
/,
|
|
256
|
+
out: Optional[NDArrayLikeBool] = None,
|
|
257
|
+
*,
|
|
258
|
+
device: Optional[DeviceLike] = None,
|
|
259
|
+
where: Union[bool, NDArrayLikeBool] = True,
|
|
260
|
+
casting: Casting = "same_kind",
|
|
261
|
+
order: Order = "K",
|
|
262
|
+
dtype: None = None,
|
|
263
|
+
subok: bool = True,
|
|
264
|
+
) -> TensorLike:
|
|
265
|
+
return wrapper_2in_1out(
|
|
266
|
+
x1,
|
|
267
|
+
x2,
|
|
268
|
+
out=out,
|
|
269
|
+
func_name="less",
|
|
270
|
+
device=device,
|
|
271
|
+
where=where,
|
|
272
|
+
casting=casting,
|
|
273
|
+
order=order,
|
|
274
|
+
dtype=dtype,
|
|
275
|
+
subok=subok,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def less_equal(
|
|
280
|
+
x1: OperandLike,
|
|
281
|
+
x2: OperandLike,
|
|
282
|
+
/,
|
|
283
|
+
out: Optional[NDArrayLikeBool] = None,
|
|
284
|
+
*,
|
|
285
|
+
device: Optional[DeviceLike] = None,
|
|
286
|
+
where: Union[bool, NDArrayLikeBool] = True,
|
|
287
|
+
casting: Casting = "same_kind",
|
|
288
|
+
order: Order = "K",
|
|
289
|
+
dtype: None = None,
|
|
290
|
+
subok: bool = True,
|
|
291
|
+
) -> TensorLike:
|
|
292
|
+
return wrapper_2in_1out(
|
|
293
|
+
x1,
|
|
294
|
+
x2,
|
|
295
|
+
out=out,
|
|
296
|
+
func_name="less_equal",
|
|
297
|
+
device=device,
|
|
298
|
+
where=where,
|
|
299
|
+
casting=casting,
|
|
300
|
+
order=order,
|
|
301
|
+
dtype=dtype,
|
|
302
|
+
subok=subok,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
###
|
|
307
|
+
###
|
|
308
|
+
###
|
|
309
|
+
|
|
310
|
+
__all__ = [
|
|
311
|
+
"where",
|
|
312
|
+
"equal",
|
|
313
|
+
"not_equal",
|
|
314
|
+
"greater",
|
|
315
|
+
"greater_equal",
|
|
316
|
+
"less",
|
|
317
|
+
"less_equal",
|
|
318
|
+
]
|