deepordinal 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepordinal-0.2.0/LICENSE +21 -0
- deepordinal-0.2.0/PKG-INFO +185 -0
- deepordinal-0.2.0/README.md +163 -0
- deepordinal-0.2.0/deepordinal/__init__.py +2 -0
- deepordinal-0.2.0/deepordinal/tf.py +163 -0
- deepordinal-0.2.0/deepordinal/torch.py +123 -0
- deepordinal-0.2.0/deepordinal.egg-info/PKG-INFO +185 -0
- deepordinal-0.2.0/deepordinal.egg-info/SOURCES.txt +13 -0
- deepordinal-0.2.0/deepordinal.egg-info/dependency_links.txt +1 -0
- deepordinal-0.2.0/deepordinal.egg-info/requires.txt +7 -0
- deepordinal-0.2.0/deepordinal.egg-info/top_level.txt +1 -0
- deepordinal-0.2.0/pyproject.toml +42 -0
- deepordinal-0.2.0/setup.cfg +4 -0
- deepordinal-0.2.0/tests/test_tf.py +220 -0
- deepordinal-0.2.0/tests/test_torch.py +223 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nicholas Hirons
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepordinal
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Ordinal output layers and loss functions (Rennie & Srebro, 2005) for PyTorch and TF/Keras
|
|
5
|
+
Author: Nicholas Hirons
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/nhirons/deepordinal
|
|
8
|
+
Keywords: ordinal-regression,deep-learning,pytorch,tensorflow,keras
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Provides-Extra: tf
|
|
18
|
+
Requires-Dist: tensorflow>=2.0; extra == "tf"
|
|
19
|
+
Provides-Extra: torch
|
|
20
|
+
Requires-Dist: torch>=2.0; extra == "torch"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# DeepOrdinal
|
|
24
|
+
|
|
25
|
+
Ordinal output layers and loss functions ([Rennie & Srebro, 2005](https://ttic.uchicago.edu/~nati/Publications/RennieSrebroIJCAI05.pdf)) for PyTorch and TF/Keras.
|
|
26
|
+
|
|
27
|
+
DeepOrdinal provides an `OrdinalOutput` layer that converts a learned logit into ordinal class probabilities via sorted thresholds, plus loss functions designed specifically for ordinal regression.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install deepordinal
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
With a specific backend:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install ".[tf]" # TensorFlow/Keras
|
|
39
|
+
pip install ".[torch]" # PyTorch
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
For development:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install -e ".[tf,torch]"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Backends
|
|
49
|
+
|
|
50
|
+
DeepOrdinal supports two backends with identical APIs:
|
|
51
|
+
|
|
52
|
+
| | PyTorch | TensorFlow/Keras |
|
|
53
|
+
|---|---|---|
|
|
54
|
+
| Module | `deepordinal.torch` | `deepordinal.tf` |
|
|
55
|
+
| Layer | `OrdinalOutput(input_dim=D, output_dim=K)` | `OrdinalOutput(output_dim=K)` |
|
|
56
|
+
| Loss functions | `ordinal_loss`, `ordistic_loss` | `ordinal_loss`, `ordistic_loss` |
|
|
57
|
+
|
|
58
|
+
## OrdinalOutput Layer
|
|
59
|
+
|
|
60
|
+
The `OrdinalOutput` layer accepts any input size, projects to a single logit, and converts it into K class probabilities using K-1 learned, sorted thresholds:
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
P(y = k | x) = sigmoid(t(k+1) - logit) - sigmoid(t(k) - logit)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
where `t(0) = -inf` and `t(K) = inf` are fixed, and interior thresholds are initialized sorted.
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from deepordinal.torch import OrdinalOutput # or deepordinal.tf
|
|
70
|
+
layer = OrdinalOutput(input_dim=16, output_dim=4) # TF omits input_dim
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Loss Functions
|
|
74
|
+
|
|
75
|
+
DeepOrdinal implements the threshold-based ordinal loss functions from Rennie & Srebro, "Loss Functions for Preference Levels" (IJCAI 2005). These operate on raw logits and thresholds rather than probability output.
|
|
76
|
+
|
|
77
|
+
### `ordinal_loss`
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
ordinal_loss(logits, targets, thresholds, construction='all', penalty='logistic')
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
- **logits**: `(batch,)` or `(batch, 1)` — raw predictor output
|
|
84
|
+
- **targets**: `(batch,)` — integer labels in `[0, K)`
|
|
85
|
+
- **thresholds**: `(K-1,)` — sorted interior thresholds
|
|
86
|
+
- **construction**: `'all'` or `'immediate'`
|
|
87
|
+
- **penalty**: `'hinge'`, `'smooth_hinge'`, `'modified_least_squares'`, or `'logistic'`
|
|
88
|
+
- **Returns**: scalar mean loss over the batch
|
|
89
|
+
|
|
90
|
+
#### Constructions
|
|
91
|
+
|
|
92
|
+
- **All-threshold** (default, eq 13): penalizes violations of every threshold, weighted by direction. Bounds mean absolute error. Best performer in the paper's experiments.
|
|
93
|
+
- **Immediate-threshold** (eq 12): only penalizes violations of the two thresholds bounding the correct class segment.
|
|
94
|
+
|
|
95
|
+
#### Penalty functions
|
|
96
|
+
|
|
97
|
+
| Name | Formula | Reference |
|
|
98
|
+
|---|---|---|
|
|
99
|
+
| `'hinge'` | `max(0, 1-z)` | eq 5 |
|
|
100
|
+
| `'smooth_hinge'` | 0 if z≥1, (1-z)²/2 if 0<z<1, 0.5-z if z≤0 | eq 6 |
|
|
101
|
+
| `'modified_least_squares'` | 0 if z≥1, (1-z)² if z<1 | eq 7 |
|
|
102
|
+
| `'logistic'` | `log(1 + exp(-z))` | eq 9 |
|
|
103
|
+
|
|
104
|
+
The paper recommends **all-threshold + logistic** as the best-performing combination.
|
|
105
|
+
|
|
106
|
+
### `ordistic_loss`
|
|
107
|
+
|
|
108
|
+
Probabilistic generalization of logistic regression to K-class ordinal problems (Section 4).
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
ordistic_loss(logits, targets, means, log_priors=None)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
- **logits**: `(batch,)` or `(batch, 1)` — raw predictor output
|
|
115
|
+
- **targets**: `(batch,)` — integer labels in `[0, K)`
|
|
116
|
+
- **means**: `(K,)` — class means (convention: μ₁=-1, μ_K=1; interior means learned)
|
|
117
|
+
- **log_priors**: `(K,)` or `None` — optional log-prior terms π_i
|
|
118
|
+
- **Returns**: scalar mean negative log-likelihood over the batch
|
|
119
|
+
|
|
120
|
+
### Example usage
|
|
121
|
+
|
|
122
|
+
#### PyTorch
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import torch
|
|
126
|
+
from deepordinal.torch import OrdinalOutput, ordinal_loss
|
|
127
|
+
|
|
128
|
+
layer = OrdinalOutput(input_dim=16, output_dim=4)
|
|
129
|
+
h = torch.randn(8, 16)
|
|
130
|
+
targets = torch.randint(0, 4, (8,))
|
|
131
|
+
|
|
132
|
+
probs = layer(h)
|
|
133
|
+
loss = ordinal_loss(layer.linear(h), targets, layer.interior_thresholds)
|
|
134
|
+
loss.backward()
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### TensorFlow
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
import tensorflow as tf
|
|
141
|
+
from deepordinal.tf import OrdinalOutput, ordinal_loss
|
|
142
|
+
|
|
143
|
+
layer = OrdinalOutput(output_dim=4)
|
|
144
|
+
h = tf.random.normal((8, 16))
|
|
145
|
+
targets = tf.random.uniform((8,), 0, 4, dtype=tf.int32)
|
|
146
|
+
|
|
147
|
+
with tf.GradientTape() as tape:
|
|
148
|
+
probs = layer(h)
|
|
149
|
+
logit = tf.matmul(h, layer.kernel) + layer.bias
|
|
150
|
+
loss = ordinal_loss(logit, targets, tf.squeeze(layer.interior_thresholds))
|
|
151
|
+
grads = tape.gradient(loss, layer.trainable_variables)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Running Tests
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
pip install -e ".[tf,torch]"
|
|
158
|
+
pytest -v
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Changelog
|
|
162
|
+
|
|
163
|
+
### 0.2.0
|
|
164
|
+
|
|
165
|
+
- Added `ordinal_loss` — Rennie & Srebro threshold-based ordinal loss with two constructions (all-threshold, immediate-threshold) and four penalty functions (hinge, smooth hinge, modified least squares, logistic)
|
|
166
|
+
- Added `ordistic_loss` — ordistic negative log-likelihood loss (Rennie & Srebro, Section 4)
|
|
167
|
+
- Both loss functions available in `deepordinal.torch` and `deepordinal.tf`
|
|
168
|
+
|
|
169
|
+
### 0.1.0
|
|
170
|
+
|
|
171
|
+
- Added PyTorch backend (`deepordinal.torch`) with `OrdinalOutput` module
|
|
172
|
+
- Modernized TensorFlow backend to `tf.keras` with self-contained `OrdinalOutput` layer and `SortedInitializer`
|
|
173
|
+
- Dual-backend support (TensorFlow/Keras and PyTorch) with matching APIs
|
|
174
|
+
- `pyproject.toml` build configuration with optional `[tf]` and `[torch]` extras
|
|
175
|
+
|
|
176
|
+
### Initial
|
|
177
|
+
|
|
178
|
+
- `OrdinalOutput` Keras layer for deep ordinal regression
|
|
179
|
+
- Example notebook with synthetic ordinal data
|
|
180
|
+
|
|
181
|
+
## Examples
|
|
182
|
+
|
|
183
|
+
- `examples/example_tf.ipynb` — TensorFlow/Keras with `ordinal_loss` and `GradientTape` training loop
|
|
184
|
+
- `examples/example_torch.ipynb` — PyTorch with `ordinal_loss` and standard training loop
|
|
185
|
+
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# DeepOrdinal
|
|
2
|
+
|
|
3
|
+
Ordinal output layers and loss functions ([Rennie & Srebro, 2005](https://ttic.uchicago.edu/~nati/Publications/RennieSrebroIJCAI05.pdf)) for PyTorch and TF/Keras.
|
|
4
|
+
|
|
5
|
+
DeepOrdinal provides an `OrdinalOutput` layer that converts a learned logit into ordinal class probabilities via sorted thresholds, plus loss functions designed specifically for ordinal regression.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install deepordinal
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
With a specific backend:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install ".[tf]" # TensorFlow/Keras
|
|
17
|
+
pip install ".[torch]" # PyTorch
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
For development:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install -e ".[tf,torch]"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Backends
|
|
27
|
+
|
|
28
|
+
DeepOrdinal supports two backends with identical APIs:
|
|
29
|
+
|
|
30
|
+
| | PyTorch | TensorFlow/Keras |
|
|
31
|
+
|---|---|---|
|
|
32
|
+
| Module | `deepordinal.torch` | `deepordinal.tf` |
|
|
33
|
+
| Layer | `OrdinalOutput(input_dim=D, output_dim=K)` | `OrdinalOutput(output_dim=K)` |
|
|
34
|
+
| Loss functions | `ordinal_loss`, `ordistic_loss` | `ordinal_loss`, `ordistic_loss` |
|
|
35
|
+
|
|
36
|
+
## OrdinalOutput Layer
|
|
37
|
+
|
|
38
|
+
The `OrdinalOutput` layer accepts any input size, projects to a single logit, and converts it into K class probabilities using K-1 learned, sorted thresholds:
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
P(y = k | x) = sigmoid(t(k+1) - logit) - sigmoid(t(k) - logit)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
where `t(0) = -inf` and `t(K) = inf` are fixed, and interior thresholds are initialized sorted.
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from deepordinal.torch import OrdinalOutput # or deepordinal.tf
|
|
48
|
+
layer = OrdinalOutput(input_dim=16, output_dim=4) # TF omits input_dim
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Loss Functions
|
|
52
|
+
|
|
53
|
+
DeepOrdinal implements the threshold-based ordinal loss functions from Rennie & Srebro, "Loss Functions for Preference Levels" (IJCAI 2005). These operate on raw logits and thresholds rather than probability output.
|
|
54
|
+
|
|
55
|
+
### `ordinal_loss`
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
ordinal_loss(logits, targets, thresholds, construction='all', penalty='logistic')
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
- **logits**: `(batch,)` or `(batch, 1)` — raw predictor output
|
|
62
|
+
- **targets**: `(batch,)` — integer labels in `[0, K)`
|
|
63
|
+
- **thresholds**: `(K-1,)` — sorted interior thresholds
|
|
64
|
+
- **construction**: `'all'` or `'immediate'`
|
|
65
|
+
- **penalty**: `'hinge'`, `'smooth_hinge'`, `'modified_least_squares'`, or `'logistic'`
|
|
66
|
+
- **Returns**: scalar mean loss over the batch
|
|
67
|
+
|
|
68
|
+
#### Constructions
|
|
69
|
+
|
|
70
|
+
- **All-threshold** (default, eq 13): penalizes violations of every threshold, weighted by direction. Bounds mean absolute error. Best performer in the paper's experiments.
|
|
71
|
+
- **Immediate-threshold** (eq 12): only penalizes violations of the two thresholds bounding the correct class segment.
|
|
72
|
+
|
|
73
|
+
#### Penalty functions
|
|
74
|
+
|
|
75
|
+
| Name | Formula | Reference |
|
|
76
|
+
|---|---|---|
|
|
77
|
+
| `'hinge'` | `max(0, 1-z)` | eq 5 |
|
|
78
|
+
| `'smooth_hinge'` | 0 if z≥1, (1-z)²/2 if 0<z<1, 0.5-z if z≤0 | eq 6 |
|
|
79
|
+
| `'modified_least_squares'` | 0 if z≥1, (1-z)² if z<1 | eq 7 |
|
|
80
|
+
| `'logistic'` | `log(1 + exp(-z))` | eq 9 |
|
|
81
|
+
|
|
82
|
+
The paper recommends **all-threshold + logistic** as the best-performing combination.
|
|
83
|
+
|
|
84
|
+
### `ordistic_loss`
|
|
85
|
+
|
|
86
|
+
Probabilistic generalization of logistic regression to K-class ordinal problems (Section 4).
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
ordistic_loss(logits, targets, means, log_priors=None)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
- **logits**: `(batch,)` or `(batch, 1)` — raw predictor output
|
|
93
|
+
- **targets**: `(batch,)` — integer labels in `[0, K)`
|
|
94
|
+
- **means**: `(K,)` — class means (convention: μ₁=-1, μ_K=1; interior means learned)
|
|
95
|
+
- **log_priors**: `(K,)` or `None` — optional log-prior terms π_i
|
|
96
|
+
- **Returns**: scalar mean negative log-likelihood over the batch
|
|
97
|
+
|
|
98
|
+
### Example usage
|
|
99
|
+
|
|
100
|
+
#### PyTorch
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
import torch
|
|
104
|
+
from deepordinal.torch import OrdinalOutput, ordinal_loss
|
|
105
|
+
|
|
106
|
+
layer = OrdinalOutput(input_dim=16, output_dim=4)
|
|
107
|
+
h = torch.randn(8, 16)
|
|
108
|
+
targets = torch.randint(0, 4, (8,))
|
|
109
|
+
|
|
110
|
+
probs = layer(h)
|
|
111
|
+
loss = ordinal_loss(layer.linear(h), targets, layer.interior_thresholds)
|
|
112
|
+
loss.backward()
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
#### TensorFlow
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
import tensorflow as tf
|
|
119
|
+
from deepordinal.tf import OrdinalOutput, ordinal_loss
|
|
120
|
+
|
|
121
|
+
layer = OrdinalOutput(output_dim=4)
|
|
122
|
+
h = tf.random.normal((8, 16))
|
|
123
|
+
targets = tf.random.uniform((8,), 0, 4, dtype=tf.int32)
|
|
124
|
+
|
|
125
|
+
with tf.GradientTape() as tape:
|
|
126
|
+
probs = layer(h)
|
|
127
|
+
logit = tf.matmul(h, layer.kernel) + layer.bias
|
|
128
|
+
loss = ordinal_loss(logit, targets, tf.squeeze(layer.interior_thresholds))
|
|
129
|
+
grads = tape.gradient(loss, layer.trainable_variables)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Running Tests
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
pip install -e ".[tf,torch]"
|
|
136
|
+
pytest -v
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Changelog
|
|
140
|
+
|
|
141
|
+
### 0.2.0
|
|
142
|
+
|
|
143
|
+
- Added `ordinal_loss` — Rennie & Srebro threshold-based ordinal loss with two constructions (all-threshold, immediate-threshold) and four penalty functions (hinge, smooth hinge, modified least squares, logistic)
|
|
144
|
+
- Added `ordistic_loss` — ordistic negative log-likelihood loss (Rennie & Srebro, Section 4)
|
|
145
|
+
- Both loss functions available in `deepordinal.torch` and `deepordinal.tf`
|
|
146
|
+
|
|
147
|
+
### 0.1.0
|
|
148
|
+
|
|
149
|
+
- Added PyTorch backend (`deepordinal.torch`) with `OrdinalOutput` module
|
|
150
|
+
- Modernized TensorFlow backend to `tf.keras` with self-contained `OrdinalOutput` layer and `SortedInitializer`
|
|
151
|
+
- Dual-backend support (TensorFlow/Keras and PyTorch) with matching APIs
|
|
152
|
+
- `pyproject.toml` build configuration with optional `[tf]` and `[torch]` extras
|
|
153
|
+
|
|
154
|
+
### Initial
|
|
155
|
+
|
|
156
|
+
- `OrdinalOutput` Keras layer for deep ordinal regression
|
|
157
|
+
- Example notebook with synthetic ordinal data
|
|
158
|
+
|
|
159
|
+
## Examples
|
|
160
|
+
|
|
161
|
+
- `examples/example_tf.ipynb` — TensorFlow/Keras with `ordinal_loss` and `GradientTape` training loop
|
|
162
|
+
- `examples/example_torch.ipynb` — PyTorch with `ordinal_loss` and standard training loop
|
|
163
|
+
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import tensorflow as tf
|
|
2
|
+
from tensorflow.keras import initializers
|
|
3
|
+
from tensorflow.keras.layers import Layer
|
|
4
|
+
|
|
5
|
+
__all__ = ["OrdinalOutput", "SortedInitializer", "ordinal_loss", "ordistic_loss"]
|
|
6
|
+
|
|
7
|
+
_INF = tf.constant(float("inf"))
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _penalty(z, name):
|
|
11
|
+
if name == "hinge":
|
|
12
|
+
return tf.maximum(0.0, 1.0 - z)
|
|
13
|
+
elif name == "smooth_hinge":
|
|
14
|
+
return tf.where(
|
|
15
|
+
z >= 1.0,
|
|
16
|
+
tf.zeros_like(z),
|
|
17
|
+
tf.where(z > 0.0, (1.0 - z) ** 2 / 2.0, 0.5 - z),
|
|
18
|
+
)
|
|
19
|
+
elif name == "modified_least_squares":
|
|
20
|
+
return tf.where(z >= 1.0, tf.zeros_like(z), (1.0 - z) ** 2)
|
|
21
|
+
elif name == "logistic":
|
|
22
|
+
return tf.math.softplus(-z)
|
|
23
|
+
else:
|
|
24
|
+
raise ValueError(f"Unknown penalty: {name}")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def ordinal_loss(logits, targets, thresholds, construction="all", penalty="logistic"):
|
|
28
|
+
"""Rennie & Srebro ordinal loss (IJCAI 2005).
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
logits: (batch,) or (batch, 1) — raw predictor output z(x).
|
|
32
|
+
targets: (batch,) — integer labels in [0, K).
|
|
33
|
+
thresholds: (K-1,) — sorted interior thresholds.
|
|
34
|
+
construction: ``'all'`` or ``'immediate'``.
|
|
35
|
+
penalty: ``'hinge'``, ``'smooth_hinge'``, ``'modified_least_squares'``,
|
|
36
|
+
or ``'logistic'``.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Scalar mean loss over the batch.
|
|
40
|
+
"""
|
|
41
|
+
logits = tf.cast(tf.reshape(logits, [-1]), tf.float32)
|
|
42
|
+
targets = tf.cast(targets, tf.int32)
|
|
43
|
+
thresholds = tf.cast(thresholds, tf.float32)
|
|
44
|
+
K = thresholds.shape[0] + 1
|
|
45
|
+
y = targets + 1 # 1-indexed
|
|
46
|
+
|
|
47
|
+
if construction == "all":
|
|
48
|
+
l_idx = tf.cast(tf.range(1, K), tf.float32) # (K-1,)
|
|
49
|
+
y_f = tf.cast(y, tf.float32)
|
|
50
|
+
signs = tf.where(
|
|
51
|
+
tf.expand_dims(l_idx, 0) < tf.expand_dims(y_f, 1), -1.0, 1.0
|
|
52
|
+
) # (batch, K-1)
|
|
53
|
+
diff = tf.expand_dims(thresholds, 0) - tf.expand_dims(logits, 1) # (batch, K-1)
|
|
54
|
+
loss = tf.reduce_sum(_penalty(signs * diff, penalty), axis=1)
|
|
55
|
+
elif construction == "immediate":
|
|
56
|
+
t_low = tf.concat([[float("-inf")], thresholds], axis=0)
|
|
57
|
+
t_high = tf.concat([thresholds, [float("inf")]], axis=0)
|
|
58
|
+
theta_low = tf.gather(t_low, targets)
|
|
59
|
+
theta_high = tf.gather(t_high, targets)
|
|
60
|
+
loss = _penalty(logits - theta_low, penalty) + _penalty(theta_high - logits, penalty)
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError(f"Unknown construction: {construction}")
|
|
63
|
+
|
|
64
|
+
return tf.reduce_mean(loss)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def ordistic_loss(logits, targets, means, log_priors=None):
|
|
68
|
+
"""Ordistic loss (Rennie & Srebro, Section 4).
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
logits: (batch,) or (batch, 1) — raw predictor output z(x).
|
|
72
|
+
targets: (batch,) — integer labels in [0, K).
|
|
73
|
+
means: (K,) — class means.
|
|
74
|
+
log_priors: (K,) or None — log-prior terms. Defaults to zeros.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Scalar mean negative log-likelihood over the batch.
|
|
78
|
+
"""
|
|
79
|
+
logits = tf.cast(tf.reshape(logits, [-1]), tf.float32)
|
|
80
|
+
targets = tf.cast(targets, tf.int32)
|
|
81
|
+
means = tf.cast(means, tf.float32)
|
|
82
|
+
K = means.shape[0]
|
|
83
|
+
if log_priors is None:
|
|
84
|
+
log_priors = tf.zeros([K], dtype=tf.float32)
|
|
85
|
+
else:
|
|
86
|
+
log_priors = tf.cast(log_priors, tf.float32)
|
|
87
|
+
energy = (
|
|
88
|
+
tf.expand_dims(means, 0) * tf.expand_dims(logits, 1)
|
|
89
|
+
+ tf.expand_dims(log_priors, 0)
|
|
90
|
+
- tf.expand_dims(means, 0) ** 2 / 2.0
|
|
91
|
+
)
|
|
92
|
+
batch_idx = tf.range(tf.shape(targets)[0])
|
|
93
|
+
indices = tf.stack([batch_idx, targets], axis=1)
|
|
94
|
+
target_energy = tf.gather_nd(energy, indices)
|
|
95
|
+
log_partition = tf.reduce_logsumexp(energy, axis=1)
|
|
96
|
+
return tf.reduce_mean(log_partition - target_energy)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class SortedInitializer(initializers.Initializer):
|
|
100
|
+
"""Wraps a Keras initializer and returns its output sorted along the last axis."""
|
|
101
|
+
|
|
102
|
+
def __init__(self, base="glorot_uniform"):
|
|
103
|
+
self.base = initializers.get(base)
|
|
104
|
+
|
|
105
|
+
def __call__(self, shape, dtype=None):
|
|
106
|
+
values = self.base(shape, dtype=dtype)
|
|
107
|
+
return tf.sort(values, axis=-1)
|
|
108
|
+
|
|
109
|
+
def get_config(self):
|
|
110
|
+
return {"base": initializers.serialize(self.base)}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class OrdinalOutput(Layer):
|
|
114
|
+
"""Ordinal regression output layer.
|
|
115
|
+
|
|
116
|
+
Projects an arbitrary input down to a single logit and converts it
|
|
117
|
+
into *output_dim* class probabilities via learned, sorted thresholds.
|
|
118
|
+
|
|
119
|
+
The layer learns ``output_dim - 1`` interior thresholds ``t(1)…t(K-1)``
|
|
120
|
+
(with ``t(0) = -∞`` and ``t(K) = +∞`` fixed) and computes::
|
|
121
|
+
|
|
122
|
+
P(y = k | x) = σ(t(k+1) - logit) - σ(t(k) - logit)
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
def __init__(self, output_dim, **kwargs):
|
|
126
|
+
super().__init__(**kwargs)
|
|
127
|
+
self.output_dim = output_dim
|
|
128
|
+
|
|
129
|
+
def build(self, input_shape):
|
|
130
|
+
self.kernel = self.add_weight(
|
|
131
|
+
name="kernel",
|
|
132
|
+
shape=(input_shape[-1], 1),
|
|
133
|
+
initializer="glorot_uniform",
|
|
134
|
+
trainable=True,
|
|
135
|
+
)
|
|
136
|
+
self.bias = self.add_weight(
|
|
137
|
+
name="bias",
|
|
138
|
+
shape=(1,),
|
|
139
|
+
initializer="zeros",
|
|
140
|
+
trainable=True,
|
|
141
|
+
)
|
|
142
|
+
self.interior_thresholds = self.add_weight(
|
|
143
|
+
name="thresholds",
|
|
144
|
+
shape=(1, self.output_dim - 1),
|
|
145
|
+
initializer=SortedInitializer("glorot_uniform"),
|
|
146
|
+
trainable=True,
|
|
147
|
+
)
|
|
148
|
+
super().build(input_shape)
|
|
149
|
+
|
|
150
|
+
def call(self, inputs):
|
|
151
|
+
logit = tf.matmul(inputs, self.kernel) + self.bias
|
|
152
|
+
t_low = tf.fill([1, 1], -_INF)
|
|
153
|
+
t_high = tf.fill([1, 1], _INF)
|
|
154
|
+
thresholds = tf.concat([t_low, self.interior_thresholds, t_high], axis=-1)
|
|
155
|
+
return tf.sigmoid(thresholds[:, 1:] - logit) - tf.sigmoid(thresholds[:, :-1] - logit)
|
|
156
|
+
|
|
157
|
+
def compute_output_shape(self, input_shape):
|
|
158
|
+
return (input_shape[0], self.output_dim)
|
|
159
|
+
|
|
160
|
+
def get_config(self):
|
|
161
|
+
config = super().get_config()
|
|
162
|
+
config["output_dim"] = self.output_dim
|
|
163
|
+
return config
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
|
|
4
|
+
__all__ = ["OrdinalOutput", "ordinal_loss", "ordistic_loss"]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _penalty(z, name):
|
|
8
|
+
if name == "hinge":
|
|
9
|
+
return torch.clamp(1 - z, min=0)
|
|
10
|
+
elif name == "smooth_hinge":
|
|
11
|
+
return torch.where(
|
|
12
|
+
z >= 1,
|
|
13
|
+
torch.zeros_like(z),
|
|
14
|
+
torch.where(z > 0, (1 - z) ** 2 / 2, 0.5 - z),
|
|
15
|
+
)
|
|
16
|
+
elif name == "modified_least_squares":
|
|
17
|
+
return torch.where(z >= 1, torch.zeros_like(z), (1 - z) ** 2)
|
|
18
|
+
elif name == "logistic":
|
|
19
|
+
return torch.nn.functional.softplus(-z)
|
|
20
|
+
else:
|
|
21
|
+
raise ValueError(f"Unknown penalty: {name}")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def ordinal_loss(logits, targets, thresholds, construction="all", penalty="logistic"):
|
|
25
|
+
"""Rennie & Srebro ordinal loss (IJCAI 2005).
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
logits: (batch,) or (batch, 1) — raw predictor output z(x).
|
|
29
|
+
targets: (batch,) — integer labels in [0, K).
|
|
30
|
+
thresholds: (K-1,) — sorted interior thresholds.
|
|
31
|
+
construction: ``'all'`` or ``'immediate'``.
|
|
32
|
+
penalty: ``'hinge'``, ``'smooth_hinge'``, ``'modified_least_squares'``,
|
|
33
|
+
or ``'logistic'``.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Scalar mean loss over the batch.
|
|
37
|
+
"""
|
|
38
|
+
logits = logits.reshape(-1)
|
|
39
|
+
targets = targets.long()
|
|
40
|
+
K = thresholds.shape[0] + 1
|
|
41
|
+
# Paper uses 1-indexed labels; convert 0-indexed targets
|
|
42
|
+
y = targets + 1 # (batch,) in [1, K]
|
|
43
|
+
|
|
44
|
+
if construction == "all":
|
|
45
|
+
# eq 13: sum over l=1..K-1 of f(s(l;y) * (theta_l - z))
|
|
46
|
+
# s(l;y) = -1 if l < y, +1 if l >= y
|
|
47
|
+
l_idx = torch.arange(1, K, device=logits.device).float() # (K-1,)
|
|
48
|
+
signs = torch.where(l_idx.unsqueeze(0) < y.unsqueeze(1), -1.0, 1.0) # (batch, K-1)
|
|
49
|
+
diff = thresholds.unsqueeze(0) - logits.unsqueeze(1) # (batch, K-1)
|
|
50
|
+
loss = _penalty(signs * diff, penalty).sum(dim=1) # (batch,)
|
|
51
|
+
elif construction == "immediate":
|
|
52
|
+
# eq 12: f(z - theta_{y-1}) + f(theta_y - z)
|
|
53
|
+
t_low = torch.cat([torch.tensor([float("-inf")], device=thresholds.device), thresholds])
|
|
54
|
+
t_high = torch.cat([thresholds, torch.tensor([float("inf")], device=thresholds.device)])
|
|
55
|
+
theta_low = t_low[targets] # theta_{y-1} (0-indexed: targets maps to y-1)
|
|
56
|
+
theta_high = t_high[targets] # theta_y
|
|
57
|
+
loss = _penalty(logits - theta_low, penalty) + _penalty(theta_high - logits, penalty)
|
|
58
|
+
else:
|
|
59
|
+
raise ValueError(f"Unknown construction: {construction}")
|
|
60
|
+
|
|
61
|
+
return loss.mean()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def ordistic_loss(logits, targets, means, log_priors=None):
|
|
65
|
+
"""Ordistic loss (Rennie & Srebro, Section 4).
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
logits: (batch,) or (batch, 1) — raw predictor output z(x).
|
|
69
|
+
targets: (batch,) — integer labels in [0, K).
|
|
70
|
+
means: (K,) — class means (mu_1=-1, mu_K=1 by convention; interior learned).
|
|
71
|
+
log_priors: (K,) or None — log-prior terms pi_i. Defaults to zeros.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Scalar mean negative log-likelihood over the batch.
|
|
75
|
+
"""
|
|
76
|
+
logits = logits.reshape(-1)
|
|
77
|
+
targets = targets.long()
|
|
78
|
+
K = means.shape[0]
|
|
79
|
+
if log_priors is None:
|
|
80
|
+
log_priors = torch.zeros(K, device=logits.device, dtype=logits.dtype)
|
|
81
|
+
# energy_ik = mu_k * z_i + pi_k - mu_k^2 / 2
|
|
82
|
+
energy = means.unsqueeze(0) * logits.unsqueeze(1) + log_priors.unsqueeze(0) - means.unsqueeze(0) ** 2 / 2
|
|
83
|
+
# loss = -log P(y|z) = -energy[y] + log(sum_k exp(energy[k]))
|
|
84
|
+
target_energy = energy[torch.arange(len(targets), device=targets.device), targets]
|
|
85
|
+
log_partition = torch.logsumexp(energy, dim=1)
|
|
86
|
+
return (log_partition - target_energy).mean()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class OrdinalOutput(nn.Module):
|
|
90
|
+
"""Ordinal regression output layer.
|
|
91
|
+
|
|
92
|
+
Projects an arbitrary input down to a single logit and converts it
|
|
93
|
+
into *output_dim* class probabilities via learned, sorted thresholds.
|
|
94
|
+
|
|
95
|
+
The layer learns ``output_dim - 1`` interior thresholds ``t(1)…t(K-1)``
|
|
96
|
+
(with ``t(0) = -∞`` and ``t(K) = +∞`` fixed) and computes::
|
|
97
|
+
|
|
98
|
+
P(y = k | x) = σ(t(k+1) - logit) - σ(t(k) - logit)
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
input_dim: Size of the input feature dimension.
|
|
102
|
+
output_dim: Number of ordinal classes.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
def __init__(self, input_dim, output_dim):
|
|
106
|
+
super().__init__()
|
|
107
|
+
self.input_dim = input_dim
|
|
108
|
+
self.output_dim = output_dim
|
|
109
|
+
self.linear = nn.Linear(input_dim, 1)
|
|
110
|
+
self.interior_thresholds = nn.Parameter(torch.empty(output_dim - 1))
|
|
111
|
+
self._init_thresholds()
|
|
112
|
+
|
|
113
|
+
def _init_thresholds(self):
|
|
114
|
+
nn.init.xavier_uniform_(self.interior_thresholds.unsqueeze(0))
|
|
115
|
+
with torch.no_grad():
|
|
116
|
+
self.interior_thresholds.copy_(self.interior_thresholds.sort().values)
|
|
117
|
+
|
|
118
|
+
def forward(self, x):
|
|
119
|
+
logit = self.linear(x) # (batch, 1)
|
|
120
|
+
t_low = torch.full((1,), float("-inf"), device=logit.device, dtype=logit.dtype)
|
|
121
|
+
t_high = torch.full((1,), float("inf"), device=logit.device, dtype=logit.dtype)
|
|
122
|
+
thresholds = torch.cat([t_low, self.interior_thresholds, t_high]) # (K+1,)
|
|
123
|
+
return torch.sigmoid(thresholds[1:] - logit) - torch.sigmoid(thresholds[:-1] - logit)
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepordinal
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Ordinal output layers and loss functions (Rennie & Srebro, 2005) for PyTorch and TF/Keras
|
|
5
|
+
Author: Nicholas Hirons
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/nhirons/deepordinal
|
|
8
|
+
Keywords: ordinal-regression,deep-learning,pytorch,tensorflow,keras
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Provides-Extra: tf
|
|
18
|
+
Requires-Dist: tensorflow>=2.0; extra == "tf"
|
|
19
|
+
Provides-Extra: torch
|
|
20
|
+
Requires-Dist: torch>=2.0; extra == "torch"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# DeepOrdinal
|
|
24
|
+
|
|
25
|
+
Ordinal output layers and loss functions ([Rennie & Srebro, 2005](https://ttic.uchicago.edu/~nati/Publications/RennieSrebroIJCAI05.pdf)) for PyTorch and TF/Keras.
|
|
26
|
+
|
|
27
|
+
DeepOrdinal provides an `OrdinalOutput` layer that converts a learned logit into ordinal class probabilities via sorted thresholds, plus loss functions designed specifically for ordinal regression.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install deepordinal
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
With a specific backend:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install ".[tf]" # TensorFlow/Keras
|
|
39
|
+
pip install ".[torch]" # PyTorch
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
For development:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install -e ".[tf,torch]"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Backends
|
|
49
|
+
|
|
50
|
+
DeepOrdinal supports two backends with identical APIs:
|
|
51
|
+
|
|
52
|
+
| | PyTorch | TensorFlow/Keras |
|
|
53
|
+
|---|---|---|
|
|
54
|
+
| Module | `deepordinal.torch` | `deepordinal.tf` |
|
|
55
|
+
| Layer | `OrdinalOutput(input_dim=D, output_dim=K)` | `OrdinalOutput(output_dim=K)` |
|
|
56
|
+
| Loss functions | `ordinal_loss`, `ordistic_loss` | `ordinal_loss`, `ordistic_loss` |
|
|
57
|
+
|
|
58
|
+
## OrdinalOutput Layer
|
|
59
|
+
|
|
60
|
+
The `OrdinalOutput` layer accepts any input size, projects to a single logit, and converts it into K class probabilities using K-1 learned, sorted thresholds:
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
P(y = k | x) = sigmoid(t(k+1) - logit) - sigmoid(t(k) - logit)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
where `t(0) = -inf` and `t(K) = inf` are fixed, and interior thresholds are initialized sorted.
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from deepordinal.torch import OrdinalOutput # or deepordinal.tf
|
|
70
|
+
layer = OrdinalOutput(input_dim=16, output_dim=4) # TF omits input_dim
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Loss Functions
|
|
74
|
+
|
|
75
|
+
DeepOrdinal implements the threshold-based ordinal loss functions from Rennie & Srebro, "Loss Functions for Preference Levels" (IJCAI 2005). These operate on raw logits and thresholds rather than probability output.
|
|
76
|
+
|
|
77
|
+
### `ordinal_loss`
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
ordinal_loss(logits, targets, thresholds, construction='all', penalty='logistic')
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
- **logits**: `(batch,)` or `(batch, 1)` — raw predictor output
|
|
84
|
+
- **targets**: `(batch,)` — integer labels in `[0, K)`
|
|
85
|
+
- **thresholds**: `(K-1,)` — sorted interior thresholds
|
|
86
|
+
- **construction**: `'all'` or `'immediate'`
|
|
87
|
+
- **penalty**: `'hinge'`, `'smooth_hinge'`, `'modified_least_squares'`, or `'logistic'`
|
|
88
|
+
- **Returns**: scalar mean loss over the batch
|
|
89
|
+
|
|
90
|
+
#### Constructions
|
|
91
|
+
|
|
92
|
+
- **All-threshold** (default, eq 13): penalizes violations of every threshold, weighted by direction. Bounds mean absolute error. Best performer in the paper's experiments.
|
|
93
|
+
- **Immediate-threshold** (eq 12): only penalizes violations of the two thresholds bounding the correct class segment.
|
|
94
|
+
|
|
95
|
+
#### Penalty functions
|
|
96
|
+
|
|
97
|
+
| Name | Formula | Reference |
|
|
98
|
+
|---|---|---|
|
|
99
|
+
| `'hinge'` | `max(0, 1-z)` | eq 5 |
|
|
100
|
+
| `'smooth_hinge'` | 0 if z≥1, (1-z)²/2 if 0<z<1, 0.5-z if z≤0 | eq 6 |
|
|
101
|
+
| `'modified_least_squares'` | 0 if z≥1, (1-z)² if z<1 | eq 7 |
|
|
102
|
+
| `'logistic'` | `log(1 + exp(-z))` | eq 9 |
|
|
103
|
+
|
|
104
|
+
The paper recommends **all-threshold + logistic** as the best-performing combination.
|
|
105
|
+
|
|
106
|
+
### `ordistic_loss`
|
|
107
|
+
|
|
108
|
+
Probabilistic generalization of logistic regression to K-class ordinal problems (Section 4).
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
ordistic_loss(logits, targets, means, log_priors=None)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
- **logits**: `(batch,)` or `(batch, 1)` — raw predictor output
|
|
115
|
+
- **targets**: `(batch,)` — integer labels in `[0, K)`
|
|
116
|
+
- **means**: `(K,)` — class means (convention: μ₁=-1, μ_K=1; interior means learned)
|
|
117
|
+
- **log_priors**: `(K,)` or `None` — optional log-prior terms π_i
|
|
118
|
+
- **Returns**: scalar mean negative log-likelihood over the batch
|
|
119
|
+
|
|
120
|
+
### Example usage
|
|
121
|
+
|
|
122
|
+
#### PyTorch
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import torch
|
|
126
|
+
from deepordinal.torch import OrdinalOutput, ordinal_loss
|
|
127
|
+
|
|
128
|
+
layer = OrdinalOutput(input_dim=16, output_dim=4)
|
|
129
|
+
h = torch.randn(8, 16)
|
|
130
|
+
targets = torch.randint(0, 4, (8,))
|
|
131
|
+
|
|
132
|
+
probs = layer(h)
|
|
133
|
+
loss = ordinal_loss(layer.linear(h), targets, layer.interior_thresholds)
|
|
134
|
+
loss.backward()
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### TensorFlow
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
import tensorflow as tf
|
|
141
|
+
from deepordinal.tf import OrdinalOutput, ordinal_loss
|
|
142
|
+
|
|
143
|
+
layer = OrdinalOutput(output_dim=4)
|
|
144
|
+
h = tf.random.normal((8, 16))
|
|
145
|
+
targets = tf.random.uniform((8,), 0, 4, dtype=tf.int32)
|
|
146
|
+
|
|
147
|
+
with tf.GradientTape() as tape:
|
|
148
|
+
probs = layer(h)
|
|
149
|
+
logit = tf.matmul(h, layer.kernel) + layer.bias
|
|
150
|
+
loss = ordinal_loss(logit, targets, tf.squeeze(layer.interior_thresholds))
|
|
151
|
+
grads = tape.gradient(loss, layer.trainable_variables)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Running Tests
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
pip install -e ".[tf,torch]"
|
|
158
|
+
pytest -v
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Changelog
|
|
162
|
+
|
|
163
|
+
### 0.2.0
|
|
164
|
+
|
|
165
|
+
- Added `ordinal_loss` — Rennie & Srebro threshold-based ordinal loss with two constructions (all-threshold, immediate-threshold) and four penalty functions (hinge, smooth hinge, modified least squares, logistic)
|
|
166
|
+
- Added `ordistic_loss` — ordistic negative log-likelihood loss (Rennie & Srebro, Section 4)
|
|
167
|
+
- Both loss functions available in `deepordinal.torch` and `deepordinal.tf`
|
|
168
|
+
|
|
169
|
+
### 0.1.0
|
|
170
|
+
|
|
171
|
+
- Added PyTorch backend (`deepordinal.torch`) with `OrdinalOutput` module
|
|
172
|
+
- Modernized TensorFlow backend to `tf.keras` with self-contained `OrdinalOutput` layer and `SortedInitializer`
|
|
173
|
+
- Dual-backend support (TensorFlow/Keras and PyTorch) with matching APIs
|
|
174
|
+
- `pyproject.toml` build configuration with optional `[tf]` and `[torch]` extras
|
|
175
|
+
|
|
176
|
+
### Initial
|
|
177
|
+
|
|
178
|
+
- `OrdinalOutput` Keras layer for deep ordinal regression
|
|
179
|
+
- Example notebook with synthetic ordinal data
|
|
180
|
+
|
|
181
|
+
## Examples
|
|
182
|
+
|
|
183
|
+
- `examples/example_tf.ipynb` — TensorFlow/Keras with `ordinal_loss` and `GradientTape` training loop
|
|
184
|
+
- `examples/example_torch.ipynb` — PyTorch with `ordinal_loss` and standard training loop
|
|
185
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
deepordinal/__init__.py
|
|
5
|
+
deepordinal/tf.py
|
|
6
|
+
deepordinal/torch.py
|
|
7
|
+
deepordinal.egg-info/PKG-INFO
|
|
8
|
+
deepordinal.egg-info/SOURCES.txt
|
|
9
|
+
deepordinal.egg-info/dependency_links.txt
|
|
10
|
+
deepordinal.egg-info/requires.txt
|
|
11
|
+
deepordinal.egg-info/top_level.txt
|
|
12
|
+
tests/test_tf.py
|
|
13
|
+
tests/test_torch.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
deepordinal
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "deepordinal"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "Ordinal output layers and loss functions (Rennie & Srebro, 2005) for PyTorch and TF/Keras"
|
|
5
|
+
requires-python = ">=3.10"
|
|
6
|
+
dependencies = ["numpy"]
|
|
7
|
+
license = "MIT"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Nicholas Hirons" },
|
|
10
|
+
]
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Science/Research",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
17
|
+
]
|
|
18
|
+
keywords = ["ordinal-regression", "deep-learning", "pytorch", "tensorflow", "keras"]
|
|
19
|
+
|
|
20
|
+
[project.optional-dependencies]
|
|
21
|
+
tf = ["tensorflow>=2.0"]
|
|
22
|
+
torch = ["torch>=2.0"]
|
|
23
|
+
|
|
24
|
+
[dependency-groups]
|
|
25
|
+
dev = [
|
|
26
|
+
"pytest>=8.0.0",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[build-system]
|
|
30
|
+
requires = ["setuptools>=61.0"]
|
|
31
|
+
build-backend = "setuptools.build_meta"
|
|
32
|
+
|
|
33
|
+
[tool.setuptools.packages.find]
|
|
34
|
+
include = ["deepordinal*"]
|
|
35
|
+
|
|
36
|
+
[tool.pytest.ini_options]
|
|
37
|
+
testpaths = ["tests"]
|
|
38
|
+
python_files = ["test_*.py"]
|
|
39
|
+
python_functions = ["test_*"]
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Repository = "https://github.com/nhirons/deepordinal"
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
tf = pytest.importorskip("tensorflow")
|
|
4
|
+
|
|
5
|
+
from deepordinal.tf import OrdinalOutput, SortedInitializer, ordinal_loss, ordistic_loss
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_output_shape():
|
|
9
|
+
layer = OrdinalOutput(output_dim=5)
|
|
10
|
+
x = tf.random.normal((4, 8))
|
|
11
|
+
out = layer(x)
|
|
12
|
+
assert out.shape == (4, 5)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_probabilities_sum_to_one():
|
|
16
|
+
layer = OrdinalOutput(output_dim=3)
|
|
17
|
+
x = tf.random.normal((16, 4))
|
|
18
|
+
out = layer(x)
|
|
19
|
+
sums = tf.reduce_sum(out, axis=-1)
|
|
20
|
+
tf.debugging.assert_near(sums, tf.ones(16), atol=1e-5)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_probabilities_non_negative():
|
|
24
|
+
layer = OrdinalOutput(output_dim=6)
|
|
25
|
+
x = tf.random.normal((32, 4))
|
|
26
|
+
out = layer(x)
|
|
27
|
+
assert tf.reduce_all(out >= 0).numpy()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_thresholds_initialized_sorted():
|
|
31
|
+
layer = OrdinalOutput(output_dim=5)
|
|
32
|
+
layer.build((None, 4))
|
|
33
|
+
t = layer.interior_thresholds.numpy().flatten()
|
|
34
|
+
assert list(t) == sorted(t)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_gradients_flow():
|
|
38
|
+
layer = OrdinalOutput(output_dim=3)
|
|
39
|
+
x = tf.random.normal((8, 4))
|
|
40
|
+
with tf.GradientTape() as tape:
|
|
41
|
+
out = layer(x)
|
|
42
|
+
loss = tf.reduce_sum(out)
|
|
43
|
+
grads = tape.gradient(loss, layer.trainable_variables)
|
|
44
|
+
assert all(g is not None for g in grads)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_sorted_initializer():
|
|
48
|
+
init = SortedInitializer("glorot_uniform")
|
|
49
|
+
values = init((1, 10))
|
|
50
|
+
v = values.numpy().flatten()
|
|
51
|
+
assert list(v) == sorted(v)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_get_config_roundtrip():
|
|
55
|
+
layer = OrdinalOutput(output_dim=4)
|
|
56
|
+
config = layer.get_config()
|
|
57
|
+
assert config["output_dim"] == 4
|
|
58
|
+
restored = OrdinalOutput.from_config(config)
|
|
59
|
+
assert restored.output_dim == 4
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_single_sample():
|
|
63
|
+
layer = OrdinalOutput(output_dim=4)
|
|
64
|
+
x = tf.random.normal((1, 2))
|
|
65
|
+
out = layer(x)
|
|
66
|
+
assert out.shape == (1, 4)
|
|
67
|
+
tf.debugging.assert_near(tf.reduce_sum(out), 1.0, atol=1e-5)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# --- Loss function tests ---
|
|
71
|
+
|
|
72
|
+
import numpy as np
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class TestPenaltyFunctions:
|
|
76
|
+
def test_hinge_values(self):
|
|
77
|
+
from deepordinal.tf import _penalty
|
|
78
|
+
z = tf.constant([-1.0, 0.0, 0.5, 1.0, 2.0])
|
|
79
|
+
expected = [2.0, 1.0, 0.5, 0.0, 0.0]
|
|
80
|
+
np.testing.assert_allclose(_penalty(z, "hinge").numpy(), expected)
|
|
81
|
+
|
|
82
|
+
def test_smooth_hinge_values(self):
|
|
83
|
+
from deepordinal.tf import _penalty
|
|
84
|
+
z = tf.constant([-1.0, 0.0, 0.5, 1.0, 2.0])
|
|
85
|
+
expected = [1.5, 0.5, 0.125, 0.0, 0.0]
|
|
86
|
+
np.testing.assert_allclose(_penalty(z, "smooth_hinge").numpy(), expected)
|
|
87
|
+
|
|
88
|
+
def test_modified_least_squares_values(self):
|
|
89
|
+
from deepordinal.tf import _penalty
|
|
90
|
+
z = tf.constant([-1.0, 0.0, 0.5, 1.0, 2.0])
|
|
91
|
+
expected = [4.0, 1.0, 0.25, 0.0, 0.0]
|
|
92
|
+
np.testing.assert_allclose(_penalty(z, "modified_least_squares").numpy(), expected)
|
|
93
|
+
|
|
94
|
+
def test_logistic_values(self):
|
|
95
|
+
from deepordinal.tf import _penalty
|
|
96
|
+
z = tf.constant([0.0])
|
|
97
|
+
result = _penalty(z, "logistic").numpy()
|
|
98
|
+
np.testing.assert_allclose(result, [0.6931471805599453], atol=1e-5)
|
|
99
|
+
|
|
100
|
+
def test_all_penalties_non_negative(self):
|
|
101
|
+
from deepordinal.tf import _penalty
|
|
102
|
+
z = tf.linspace(-3.0, 3.0, 100)
|
|
103
|
+
for name in ["hinge", "smooth_hinge", "modified_least_squares", "logistic"]:
|
|
104
|
+
assert tf.reduce_all(_penalty(z, name) >= -1e-7).numpy(), f"{name} produced negative values"
|
|
105
|
+
|
|
106
|
+
def test_unknown_penalty_raises(self):
|
|
107
|
+
from deepordinal.tf import _penalty
|
|
108
|
+
with pytest.raises(ValueError, match="Unknown penalty"):
|
|
109
|
+
_penalty(tf.constant([0.0]), "bad")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class TestOrdinalLoss:
|
|
113
|
+
def _make_inputs(self):
|
|
114
|
+
thresholds = tf.constant([-1.0, 0.0, 1.0])
|
|
115
|
+
logits = tf.constant([0.5, -0.5])
|
|
116
|
+
targets = tf.constant([1, 2])
|
|
117
|
+
return logits, targets, thresholds
|
|
118
|
+
|
|
119
|
+
def test_all_threshold_runs(self):
|
|
120
|
+
logits, targets, thresholds = self._make_inputs()
|
|
121
|
+
loss = ordinal_loss(logits, targets, thresholds, construction="all", penalty="logistic")
|
|
122
|
+
assert loss.shape == ()
|
|
123
|
+
assert loss.numpy() >= 0
|
|
124
|
+
|
|
125
|
+
def test_immediate_threshold_runs(self):
|
|
126
|
+
logits, targets, thresholds = self._make_inputs()
|
|
127
|
+
loss = ordinal_loss(logits, targets, thresholds, construction="immediate", penalty="logistic")
|
|
128
|
+
assert loss.shape == ()
|
|
129
|
+
assert loss.numpy() >= 0
|
|
130
|
+
|
|
131
|
+
def test_all_penalties_work(self):
|
|
132
|
+
logits, targets, thresholds = self._make_inputs()
|
|
133
|
+
for penalty in ["hinge", "smooth_hinge", "modified_least_squares", "logistic"]:
|
|
134
|
+
loss = ordinal_loss(logits, targets, thresholds, penalty=penalty)
|
|
135
|
+
assert loss.numpy() >= 0, f"{penalty} loss is negative"
|
|
136
|
+
|
|
137
|
+
def test_perfect_prediction_low_loss(self):
|
|
138
|
+
thresholds = tf.constant([-2.0, 0.0, 2.0])
|
|
139
|
+
logits = tf.constant([-3.0])
|
|
140
|
+
targets = tf.constant([0])
|
|
141
|
+
loss_correct = ordinal_loss(logits, targets, thresholds, penalty="hinge")
|
|
142
|
+
targets_wrong = tf.constant([3])
|
|
143
|
+
loss_wrong = ordinal_loss(logits, targets_wrong, thresholds, penalty="hinge")
|
|
144
|
+
assert loss_correct.numpy() < loss_wrong.numpy()
|
|
145
|
+
|
|
146
|
+
def test_gradients_flow_through_loss(self):
|
|
147
|
+
thresholds = tf.Variable([-1.0, 0.0, 1.0])
|
|
148
|
+
logits = tf.Variable([0.5])
|
|
149
|
+
targets = tf.constant([1])
|
|
150
|
+
with tf.GradientTape() as tape:
|
|
151
|
+
loss = ordinal_loss(logits, targets, thresholds, penalty="logistic")
|
|
152
|
+
grads = tape.gradient(loss, [logits, thresholds])
|
|
153
|
+
assert all(g is not None for g in grads)
|
|
154
|
+
|
|
155
|
+
def test_batch_reduction(self):
|
|
156
|
+
thresholds = tf.constant([-1.0, 0.0, 1.0])
|
|
157
|
+
logits = tf.constant([0.5, -0.5, 0.0, 1.5])
|
|
158
|
+
targets = tf.constant([0, 1, 2, 3])
|
|
159
|
+
loss = ordinal_loss(logits, targets, thresholds)
|
|
160
|
+
assert loss.shape == ()
|
|
161
|
+
|
|
162
|
+
def test_unknown_construction_raises(self):
|
|
163
|
+
logits, targets, thresholds = self._make_inputs()
|
|
164
|
+
with pytest.raises(ValueError, match="Unknown construction"):
|
|
165
|
+
ordinal_loss(logits, targets, thresholds, construction="bad")
|
|
166
|
+
|
|
167
|
+
def test_logits_2d(self):
|
|
168
|
+
thresholds = tf.constant([-1.0, 0.0, 1.0])
|
|
169
|
+
logits = tf.constant([[0.5], [-0.5]])
|
|
170
|
+
targets = tf.constant([1, 2])
|
|
171
|
+
loss = ordinal_loss(logits, targets, thresholds)
|
|
172
|
+
assert loss.shape == ()
|
|
173
|
+
|
|
174
|
+
def test_hand_worked_all_threshold_hinge(self):
|
|
175
|
+
thresholds = tf.constant([0.0, 2.0])
|
|
176
|
+
logits = tf.constant([1.0])
|
|
177
|
+
targets = tf.constant([1])
|
|
178
|
+
loss = ordinal_loss(logits, targets, thresholds, construction="all", penalty="hinge")
|
|
179
|
+
np.testing.assert_allclose(loss.numpy(), 0.0, atol=1e-6)
|
|
180
|
+
|
|
181
|
+
def test_hand_worked_immediate_hinge(self):
|
|
182
|
+
thresholds = tf.constant([0.0, 2.0])
|
|
183
|
+
logits = tf.constant([1.0])
|
|
184
|
+
targets = tf.constant([1])
|
|
185
|
+
loss = ordinal_loss(logits, targets, thresholds, construction="immediate", penalty="hinge")
|
|
186
|
+
np.testing.assert_allclose(loss.numpy(), 0.0, atol=1e-6)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class TestOrdisticLoss:
|
|
190
|
+
def test_runs(self):
|
|
191
|
+
means = tf.constant([-1.0, 0.0, 1.0])
|
|
192
|
+
logits = tf.constant([0.5, -0.5])
|
|
193
|
+
targets = tf.constant([0, 2])
|
|
194
|
+
loss = ordistic_loss(logits, targets, means)
|
|
195
|
+
assert loss.shape == ()
|
|
196
|
+
assert loss.numpy() >= 0
|
|
197
|
+
|
|
198
|
+
def test_with_log_priors(self):
|
|
199
|
+
means = tf.constant([-1.0, 0.0, 1.0])
|
|
200
|
+
log_priors = tf.constant([0.0, 0.1, -0.1])
|
|
201
|
+
logits = tf.constant([0.5])
|
|
202
|
+
targets = tf.constant([1])
|
|
203
|
+
loss = ordistic_loss(logits, targets, means, log_priors=log_priors)
|
|
204
|
+
assert loss.shape == ()
|
|
205
|
+
|
|
206
|
+
def test_gradients_flow(self):
|
|
207
|
+
means = tf.Variable([-1.0, 0.0, 1.0])
|
|
208
|
+
logits = tf.Variable([0.5])
|
|
209
|
+
targets = tf.constant([1])
|
|
210
|
+
with tf.GradientTape() as tape:
|
|
211
|
+
loss = ordistic_loss(logits, targets, means)
|
|
212
|
+
grads = tape.gradient(loss, [logits, means])
|
|
213
|
+
assert all(g is not None for g in grads)
|
|
214
|
+
|
|
215
|
+
def test_non_negative(self):
|
|
216
|
+
means = tf.constant([-1.0, 0.0, 1.0])
|
|
217
|
+
logits = tf.random.normal([20])
|
|
218
|
+
targets = tf.random.uniform([20], 0, 3, dtype=tf.int32)
|
|
219
|
+
loss = ordistic_loss(logits, targets, means)
|
|
220
|
+
assert loss.numpy() >= 0
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
torch = pytest.importorskip("torch")
|
|
4
|
+
|
|
5
|
+
from deepordinal.torch import OrdinalOutput, ordinal_loss, ordistic_loss
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_output_shape():
|
|
9
|
+
layer = OrdinalOutput(input_dim=8, output_dim=5)
|
|
10
|
+
x = torch.randn(4, 8)
|
|
11
|
+
out = layer(x)
|
|
12
|
+
assert out.shape == (4, 5)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_probabilities_sum_to_one():
|
|
16
|
+
layer = OrdinalOutput(input_dim=4, output_dim=3)
|
|
17
|
+
x = torch.randn(16, 4)
|
|
18
|
+
out = layer(x)
|
|
19
|
+
torch.testing.assert_close(out.sum(dim=-1), torch.ones(16), atol=1e-5, rtol=0)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_probabilities_non_negative():
|
|
23
|
+
layer = OrdinalOutput(input_dim=4, output_dim=6)
|
|
24
|
+
x = torch.randn(32, 4)
|
|
25
|
+
out = layer(x)
|
|
26
|
+
assert (out >= 0).all()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_thresholds_initialized_sorted():
|
|
30
|
+
layer = OrdinalOutput(input_dim=4, output_dim=5)
|
|
31
|
+
t = layer.interior_thresholds.detach()
|
|
32
|
+
sorted_t, _ = t.sort()
|
|
33
|
+
torch.testing.assert_close(t, sorted_t)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_gradients_flow():
|
|
37
|
+
layer = OrdinalOutput(input_dim=4, output_dim=3)
|
|
38
|
+
x = torch.randn(8, 4)
|
|
39
|
+
out = layer(x)
|
|
40
|
+
loss = out.sum()
|
|
41
|
+
loss.backward()
|
|
42
|
+
assert layer.linear.weight.grad is not None
|
|
43
|
+
assert layer.interior_thresholds.grad is not None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_seed_reproducibility():
|
|
47
|
+
torch.manual_seed(42)
|
|
48
|
+
a = OrdinalOutput(input_dim=4, output_dim=3)
|
|
49
|
+
torch.manual_seed(42)
|
|
50
|
+
b = OrdinalOutput(input_dim=4, output_dim=3)
|
|
51
|
+
x = torch.randn(4, 4)
|
|
52
|
+
torch.testing.assert_close(a(x), b(x))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_single_sample():
|
|
56
|
+
layer = OrdinalOutput(input_dim=2, output_dim=4)
|
|
57
|
+
x = torch.randn(1, 2)
|
|
58
|
+
out = layer(x)
|
|
59
|
+
assert out.shape == (1, 4)
|
|
60
|
+
torch.testing.assert_close(out.sum(), torch.tensor(1.0), atol=1e-5, rtol=0)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# --- Loss function tests ---
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TestPenaltyFunctions:
|
|
67
|
+
"""Test each penalty function for known input/output values."""
|
|
68
|
+
|
|
69
|
+
def test_hinge_values(self):
|
|
70
|
+
z = torch.tensor([-1.0, 0.0, 0.5, 1.0, 2.0])
|
|
71
|
+
expected = torch.tensor([2.0, 1.0, 0.5, 0.0, 0.0])
|
|
72
|
+
from deepordinal.torch import _penalty
|
|
73
|
+
torch.testing.assert_close(_penalty(z, "hinge"), expected)
|
|
74
|
+
|
|
75
|
+
def test_smooth_hinge_values(self):
|
|
76
|
+
z = torch.tensor([-1.0, 0.0, 0.5, 1.0, 2.0])
|
|
77
|
+
expected = torch.tensor([1.5, 0.5, 0.125, 0.0, 0.0])
|
|
78
|
+
from deepordinal.torch import _penalty
|
|
79
|
+
torch.testing.assert_close(_penalty(z, "smooth_hinge"), expected)
|
|
80
|
+
|
|
81
|
+
def test_modified_least_squares_values(self):
|
|
82
|
+
z = torch.tensor([-1.0, 0.0, 0.5, 1.0, 2.0])
|
|
83
|
+
expected = torch.tensor([4.0, 1.0, 0.25, 0.0, 0.0])
|
|
84
|
+
from deepordinal.torch import _penalty
|
|
85
|
+
torch.testing.assert_close(_penalty(z, "modified_least_squares"), expected)
|
|
86
|
+
|
|
87
|
+
def test_logistic_values(self):
|
|
88
|
+
z = torch.tensor([0.0])
|
|
89
|
+
from deepordinal.torch import _penalty
|
|
90
|
+
result = _penalty(z, "logistic")
|
|
91
|
+
torch.testing.assert_close(result, torch.tensor([0.6931471805599453]), atol=1e-5, rtol=0)
|
|
92
|
+
|
|
93
|
+
def test_all_penalties_non_negative(self):
|
|
94
|
+
from deepordinal.torch import _penalty
|
|
95
|
+
z = torch.linspace(-3, 3, 100)
|
|
96
|
+
for name in ["hinge", "smooth_hinge", "modified_least_squares", "logistic"]:
|
|
97
|
+
assert (_penalty(z, name) >= -1e-7).all(), f"{name} produced negative values"
|
|
98
|
+
|
|
99
|
+
def test_unknown_penalty_raises(self):
|
|
100
|
+
from deepordinal.torch import _penalty
|
|
101
|
+
with pytest.raises(ValueError, match="Unknown penalty"):
|
|
102
|
+
_penalty(torch.tensor([0.0]), "bad")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class TestOrdinalLoss:
|
|
106
|
+
"""Test ordinal_loss with both constructions."""
|
|
107
|
+
|
|
108
|
+
def _make_inputs(self):
|
|
109
|
+
thresholds = torch.tensor([-1.0, 0.0, 1.0]) # K=4 classes
|
|
110
|
+
logits = torch.tensor([0.5, -0.5])
|
|
111
|
+
targets = torch.tensor([1, 2]) # 0-indexed
|
|
112
|
+
return logits, targets, thresholds
|
|
113
|
+
|
|
114
|
+
def test_all_threshold_runs(self):
|
|
115
|
+
logits, targets, thresholds = self._make_inputs()
|
|
116
|
+
loss = ordinal_loss(logits, targets, thresholds, construction="all", penalty="logistic")
|
|
117
|
+
assert loss.shape == ()
|
|
118
|
+
assert loss.item() >= 0
|
|
119
|
+
|
|
120
|
+
def test_immediate_threshold_runs(self):
|
|
121
|
+
logits, targets, thresholds = self._make_inputs()
|
|
122
|
+
loss = ordinal_loss(logits, targets, thresholds, construction="immediate", penalty="logistic")
|
|
123
|
+
assert loss.shape == ()
|
|
124
|
+
assert loss.item() >= 0
|
|
125
|
+
|
|
126
|
+
def test_all_penalties_work(self):
|
|
127
|
+
logits, targets, thresholds = self._make_inputs()
|
|
128
|
+
for penalty in ["hinge", "smooth_hinge", "modified_least_squares", "logistic"]:
|
|
129
|
+
loss = ordinal_loss(logits, targets, thresholds, penalty=penalty)
|
|
130
|
+
assert loss.item() >= 0, f"{penalty} loss is negative"
|
|
131
|
+
|
|
132
|
+
def test_perfect_prediction_low_loss(self):
|
|
133
|
+
# Logit perfectly in the middle of its segment should have low loss
|
|
134
|
+
thresholds = torch.tensor([-2.0, 0.0, 2.0])
|
|
135
|
+
logits = torch.tensor([-3.0]) # class 0: should be below -2
|
|
136
|
+
targets = torch.tensor([0])
|
|
137
|
+
loss_correct = ordinal_loss(logits, targets, thresholds, penalty="hinge")
|
|
138
|
+
# Now misclassify
|
|
139
|
+
targets_wrong = torch.tensor([3])
|
|
140
|
+
loss_wrong = ordinal_loss(logits, targets_wrong, thresholds, penalty="hinge")
|
|
141
|
+
assert loss_correct < loss_wrong
|
|
142
|
+
|
|
143
|
+
def test_gradients_flow_through_loss(self):
|
|
144
|
+
thresholds = torch.tensor([-1.0, 0.0, 1.0], requires_grad=True)
|
|
145
|
+
logits = torch.tensor([0.5], requires_grad=True)
|
|
146
|
+
targets = torch.tensor([1])
|
|
147
|
+
loss = ordinal_loss(logits, targets, thresholds, penalty="logistic")
|
|
148
|
+
loss.backward()
|
|
149
|
+
assert logits.grad is not None
|
|
150
|
+
assert thresholds.grad is not None
|
|
151
|
+
|
|
152
|
+
def test_batch_reduction(self):
|
|
153
|
+
thresholds = torch.tensor([-1.0, 0.0, 1.0])
|
|
154
|
+
logits = torch.tensor([0.5, -0.5, 0.0, 1.5])
|
|
155
|
+
targets = torch.tensor([0, 1, 2, 3])
|
|
156
|
+
loss = ordinal_loss(logits, targets, thresholds)
|
|
157
|
+
assert loss.shape == ()
|
|
158
|
+
|
|
159
|
+
def test_unknown_construction_raises(self):
|
|
160
|
+
logits, targets, thresholds = self._make_inputs()
|
|
161
|
+
with pytest.raises(ValueError, match="Unknown construction"):
|
|
162
|
+
ordinal_loss(logits, targets, thresholds, construction="bad")
|
|
163
|
+
|
|
164
|
+
def test_logits_2d(self):
|
|
165
|
+
thresholds = torch.tensor([-1.0, 0.0, 1.0])
|
|
166
|
+
logits = torch.tensor([[0.5], [-0.5]])
|
|
167
|
+
targets = torch.tensor([1, 2])
|
|
168
|
+
loss = ordinal_loss(logits, targets, thresholds)
|
|
169
|
+
assert loss.shape == ()
|
|
170
|
+
|
|
171
|
+
def test_hand_worked_all_threshold_hinge(self):
|
|
172
|
+
# K=3 (classes 0,1,2), thresholds=[0, 2], logit=1, target=1 (paper y=2)
|
|
173
|
+
# s(l=1;y=2)=-1, s(l=2;y=2)=+1
|
|
174
|
+
# f(-1*(0-1)) + f(+1*(2-1)) = f(1) + f(1) = 0 + 0 = 0
|
|
175
|
+
thresholds = torch.tensor([0.0, 2.0])
|
|
176
|
+
logits = torch.tensor([1.0])
|
|
177
|
+
targets = torch.tensor([1])
|
|
178
|
+
loss = ordinal_loss(logits, targets, thresholds, construction="all", penalty="hinge")
|
|
179
|
+
torch.testing.assert_close(loss, torch.tensor(0.0))
|
|
180
|
+
|
|
181
|
+
def test_hand_worked_immediate_hinge(self):
|
|
182
|
+
# K=3, thresholds=[0, 2], logit=1, target=1 (0-indexed)
|
|
183
|
+
# theta_low = thresholds[1] = 0, theta_high = thresholds[1] = 2
|
|
184
|
+
# f(1-0) + f(2-1) = f(1) + f(1) = 0 + 0 = 0
|
|
185
|
+
thresholds = torch.tensor([0.0, 2.0])
|
|
186
|
+
logits = torch.tensor([1.0])
|
|
187
|
+
targets = torch.tensor([1])
|
|
188
|
+
loss = ordinal_loss(logits, targets, thresholds, construction="immediate", penalty="hinge")
|
|
189
|
+
torch.testing.assert_close(loss, torch.tensor(0.0))
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class TestOrdisticLoss:
|
|
193
|
+
def test_runs(self):
|
|
194
|
+
means = torch.tensor([-1.0, 0.0, 1.0])
|
|
195
|
+
logits = torch.tensor([0.5, -0.5])
|
|
196
|
+
targets = torch.tensor([0, 2])
|
|
197
|
+
loss = ordistic_loss(logits, targets, means)
|
|
198
|
+
assert loss.shape == ()
|
|
199
|
+
assert loss.item() >= 0
|
|
200
|
+
|
|
201
|
+
def test_with_log_priors(self):
|
|
202
|
+
means = torch.tensor([-1.0, 0.0, 1.0])
|
|
203
|
+
log_priors = torch.tensor([0.0, 0.1, -0.1])
|
|
204
|
+
logits = torch.tensor([0.5])
|
|
205
|
+
targets = torch.tensor([1])
|
|
206
|
+
loss = ordistic_loss(logits, targets, means, log_priors=log_priors)
|
|
207
|
+
assert loss.shape == ()
|
|
208
|
+
|
|
209
|
+
def test_gradients_flow(self):
|
|
210
|
+
means = torch.tensor([-1.0, 0.0, 1.0], requires_grad=True)
|
|
211
|
+
logits = torch.tensor([0.5], requires_grad=True)
|
|
212
|
+
targets = torch.tensor([1])
|
|
213
|
+
loss = ordistic_loss(logits, targets, means)
|
|
214
|
+
loss.backward()
|
|
215
|
+
assert logits.grad is not None
|
|
216
|
+
assert means.grad is not None
|
|
217
|
+
|
|
218
|
+
def test_non_negative(self):
|
|
219
|
+
means = torch.tensor([-1.0, 0.0, 1.0])
|
|
220
|
+
logits = torch.randn(20)
|
|
221
|
+
targets = torch.randint(0, 3, (20,))
|
|
222
|
+
loss = ordistic_loss(logits, targets, means)
|
|
223
|
+
assert loss.item() >= 0
|