deepordinal 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepordinal/__init__.py +2 -0
- deepordinal/tf.py +163 -0
- deepordinal/torch.py +123 -0
- deepordinal-0.2.0.dist-info/METADATA +185 -0
- deepordinal-0.2.0.dist-info/RECORD +8 -0
- deepordinal-0.2.0.dist-info/WHEEL +5 -0
- deepordinal-0.2.0.dist-info/licenses/LICENSE +21 -0
- deepordinal-0.2.0.dist-info/top_level.txt +1 -0
deepordinal/__init__.py
ADDED
deepordinal/tf.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import tensorflow as tf
|
|
2
|
+
from tensorflow.keras import initializers
|
|
3
|
+
from tensorflow.keras.layers import Layer
|
|
4
|
+
|
|
5
|
+
__all__ = ["OrdinalOutput", "SortedInitializer", "ordinal_loss", "ordistic_loss"]
|
|
6
|
+
|
|
7
|
+
_INF = tf.constant(float("inf"))
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _penalty(z, name):
|
|
11
|
+
if name == "hinge":
|
|
12
|
+
return tf.maximum(0.0, 1.0 - z)
|
|
13
|
+
elif name == "smooth_hinge":
|
|
14
|
+
return tf.where(
|
|
15
|
+
z >= 1.0,
|
|
16
|
+
tf.zeros_like(z),
|
|
17
|
+
tf.where(z > 0.0, (1.0 - z) ** 2 / 2.0, 0.5 - z),
|
|
18
|
+
)
|
|
19
|
+
elif name == "modified_least_squares":
|
|
20
|
+
return tf.where(z >= 1.0, tf.zeros_like(z), (1.0 - z) ** 2)
|
|
21
|
+
elif name == "logistic":
|
|
22
|
+
return tf.math.softplus(-z)
|
|
23
|
+
else:
|
|
24
|
+
raise ValueError(f"Unknown penalty: {name}")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def ordinal_loss(logits, targets, thresholds, construction="all", penalty="logistic"):
|
|
28
|
+
"""Rennie & Srebro ordinal loss (IJCAI 2005).
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
logits: (batch,) or (batch, 1) — raw predictor output z(x).
|
|
32
|
+
targets: (batch,) — integer labels in [0, K).
|
|
33
|
+
thresholds: (K-1,) — sorted interior thresholds.
|
|
34
|
+
construction: ``'all'`` or ``'immediate'``.
|
|
35
|
+
penalty: ``'hinge'``, ``'smooth_hinge'``, ``'modified_least_squares'``,
|
|
36
|
+
or ``'logistic'``.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Scalar mean loss over the batch.
|
|
40
|
+
"""
|
|
41
|
+
logits = tf.cast(tf.reshape(logits, [-1]), tf.float32)
|
|
42
|
+
targets = tf.cast(targets, tf.int32)
|
|
43
|
+
thresholds = tf.cast(thresholds, tf.float32)
|
|
44
|
+
K = thresholds.shape[0] + 1
|
|
45
|
+
y = targets + 1 # 1-indexed
|
|
46
|
+
|
|
47
|
+
if construction == "all":
|
|
48
|
+
l_idx = tf.cast(tf.range(1, K), tf.float32) # (K-1,)
|
|
49
|
+
y_f = tf.cast(y, tf.float32)
|
|
50
|
+
signs = tf.where(
|
|
51
|
+
tf.expand_dims(l_idx, 0) < tf.expand_dims(y_f, 1), -1.0, 1.0
|
|
52
|
+
) # (batch, K-1)
|
|
53
|
+
diff = tf.expand_dims(thresholds, 0) - tf.expand_dims(logits, 1) # (batch, K-1)
|
|
54
|
+
loss = tf.reduce_sum(_penalty(signs * diff, penalty), axis=1)
|
|
55
|
+
elif construction == "immediate":
|
|
56
|
+
t_low = tf.concat([[float("-inf")], thresholds], axis=0)
|
|
57
|
+
t_high = tf.concat([thresholds, [float("inf")]], axis=0)
|
|
58
|
+
theta_low = tf.gather(t_low, targets)
|
|
59
|
+
theta_high = tf.gather(t_high, targets)
|
|
60
|
+
loss = _penalty(logits - theta_low, penalty) + _penalty(theta_high - logits, penalty)
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError(f"Unknown construction: {construction}")
|
|
63
|
+
|
|
64
|
+
return tf.reduce_mean(loss)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def ordistic_loss(logits, targets, means, log_priors=None):
|
|
68
|
+
"""Ordistic loss (Rennie & Srebro, Section 4).
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
logits: (batch,) or (batch, 1) — raw predictor output z(x).
|
|
72
|
+
targets: (batch,) — integer labels in [0, K).
|
|
73
|
+
means: (K,) — class means.
|
|
74
|
+
log_priors: (K,) or None — log-prior terms. Defaults to zeros.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Scalar mean negative log-likelihood over the batch.
|
|
78
|
+
"""
|
|
79
|
+
logits = tf.cast(tf.reshape(logits, [-1]), tf.float32)
|
|
80
|
+
targets = tf.cast(targets, tf.int32)
|
|
81
|
+
means = tf.cast(means, tf.float32)
|
|
82
|
+
K = means.shape[0]
|
|
83
|
+
if log_priors is None:
|
|
84
|
+
log_priors = tf.zeros([K], dtype=tf.float32)
|
|
85
|
+
else:
|
|
86
|
+
log_priors = tf.cast(log_priors, tf.float32)
|
|
87
|
+
energy = (
|
|
88
|
+
tf.expand_dims(means, 0) * tf.expand_dims(logits, 1)
|
|
89
|
+
+ tf.expand_dims(log_priors, 0)
|
|
90
|
+
- tf.expand_dims(means, 0) ** 2 / 2.0
|
|
91
|
+
)
|
|
92
|
+
batch_idx = tf.range(tf.shape(targets)[0])
|
|
93
|
+
indices = tf.stack([batch_idx, targets], axis=1)
|
|
94
|
+
target_energy = tf.gather_nd(energy, indices)
|
|
95
|
+
log_partition = tf.reduce_logsumexp(energy, axis=1)
|
|
96
|
+
return tf.reduce_mean(log_partition - target_energy)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class SortedInitializer(initializers.Initializer):
|
|
100
|
+
"""Wraps a Keras initializer and returns its output sorted along the last axis."""
|
|
101
|
+
|
|
102
|
+
def __init__(self, base="glorot_uniform"):
|
|
103
|
+
self.base = initializers.get(base)
|
|
104
|
+
|
|
105
|
+
def __call__(self, shape, dtype=None):
|
|
106
|
+
values = self.base(shape, dtype=dtype)
|
|
107
|
+
return tf.sort(values, axis=-1)
|
|
108
|
+
|
|
109
|
+
def get_config(self):
|
|
110
|
+
return {"base": initializers.serialize(self.base)}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class OrdinalOutput(Layer):
|
|
114
|
+
"""Ordinal regression output layer.
|
|
115
|
+
|
|
116
|
+
Projects an arbitrary input down to a single logit and converts it
|
|
117
|
+
into *output_dim* class probabilities via learned, sorted thresholds.
|
|
118
|
+
|
|
119
|
+
The layer learns ``output_dim - 1`` interior thresholds ``t(1)…t(K-1)``
|
|
120
|
+
(with ``t(0) = -∞`` and ``t(K) = +∞`` fixed) and computes::
|
|
121
|
+
|
|
122
|
+
P(y = k | x) = σ(t(k+1) - logit) - σ(t(k) - logit)
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
def __init__(self, output_dim, **kwargs):
|
|
126
|
+
super().__init__(**kwargs)
|
|
127
|
+
self.output_dim = output_dim
|
|
128
|
+
|
|
129
|
+
def build(self, input_shape):
|
|
130
|
+
self.kernel = self.add_weight(
|
|
131
|
+
name="kernel",
|
|
132
|
+
shape=(input_shape[-1], 1),
|
|
133
|
+
initializer="glorot_uniform",
|
|
134
|
+
trainable=True,
|
|
135
|
+
)
|
|
136
|
+
self.bias = self.add_weight(
|
|
137
|
+
name="bias",
|
|
138
|
+
shape=(1,),
|
|
139
|
+
initializer="zeros",
|
|
140
|
+
trainable=True,
|
|
141
|
+
)
|
|
142
|
+
self.interior_thresholds = self.add_weight(
|
|
143
|
+
name="thresholds",
|
|
144
|
+
shape=(1, self.output_dim - 1),
|
|
145
|
+
initializer=SortedInitializer("glorot_uniform"),
|
|
146
|
+
trainable=True,
|
|
147
|
+
)
|
|
148
|
+
super().build(input_shape)
|
|
149
|
+
|
|
150
|
+
def call(self, inputs):
|
|
151
|
+
logit = tf.matmul(inputs, self.kernel) + self.bias
|
|
152
|
+
t_low = tf.fill([1, 1], -_INF)
|
|
153
|
+
t_high = tf.fill([1, 1], _INF)
|
|
154
|
+
thresholds = tf.concat([t_low, self.interior_thresholds, t_high], axis=-1)
|
|
155
|
+
return tf.sigmoid(thresholds[:, 1:] - logit) - tf.sigmoid(thresholds[:, :-1] - logit)
|
|
156
|
+
|
|
157
|
+
def compute_output_shape(self, input_shape):
|
|
158
|
+
return (input_shape[0], self.output_dim)
|
|
159
|
+
|
|
160
|
+
def get_config(self):
|
|
161
|
+
config = super().get_config()
|
|
162
|
+
config["output_dim"] = self.output_dim
|
|
163
|
+
return config
|
deepordinal/torch.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
|
|
4
|
+
__all__ = ["OrdinalOutput", "ordinal_loss", "ordistic_loss"]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _penalty(z, name):
|
|
8
|
+
if name == "hinge":
|
|
9
|
+
return torch.clamp(1 - z, min=0)
|
|
10
|
+
elif name == "smooth_hinge":
|
|
11
|
+
return torch.where(
|
|
12
|
+
z >= 1,
|
|
13
|
+
torch.zeros_like(z),
|
|
14
|
+
torch.where(z > 0, (1 - z) ** 2 / 2, 0.5 - z),
|
|
15
|
+
)
|
|
16
|
+
elif name == "modified_least_squares":
|
|
17
|
+
return torch.where(z >= 1, torch.zeros_like(z), (1 - z) ** 2)
|
|
18
|
+
elif name == "logistic":
|
|
19
|
+
return torch.nn.functional.softplus(-z)
|
|
20
|
+
else:
|
|
21
|
+
raise ValueError(f"Unknown penalty: {name}")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def ordinal_loss(logits, targets, thresholds, construction="all", penalty="logistic"):
|
|
25
|
+
"""Rennie & Srebro ordinal loss (IJCAI 2005).
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
logits: (batch,) or (batch, 1) — raw predictor output z(x).
|
|
29
|
+
targets: (batch,) — integer labels in [0, K).
|
|
30
|
+
thresholds: (K-1,) — sorted interior thresholds.
|
|
31
|
+
construction: ``'all'`` or ``'immediate'``.
|
|
32
|
+
penalty: ``'hinge'``, ``'smooth_hinge'``, ``'modified_least_squares'``,
|
|
33
|
+
or ``'logistic'``.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Scalar mean loss over the batch.
|
|
37
|
+
"""
|
|
38
|
+
logits = logits.reshape(-1)
|
|
39
|
+
targets = targets.long()
|
|
40
|
+
K = thresholds.shape[0] + 1
|
|
41
|
+
# Paper uses 1-indexed labels; convert 0-indexed targets
|
|
42
|
+
y = targets + 1 # (batch,) in [1, K]
|
|
43
|
+
|
|
44
|
+
if construction == "all":
|
|
45
|
+
# eq 13: sum over l=1..K-1 of f(s(l;y) * (theta_l - z))
|
|
46
|
+
# s(l;y) = -1 if l < y, +1 if l >= y
|
|
47
|
+
l_idx = torch.arange(1, K, device=logits.device).float() # (K-1,)
|
|
48
|
+
signs = torch.where(l_idx.unsqueeze(0) < y.unsqueeze(1), -1.0, 1.0) # (batch, K-1)
|
|
49
|
+
diff = thresholds.unsqueeze(0) - logits.unsqueeze(1) # (batch, K-1)
|
|
50
|
+
loss = _penalty(signs * diff, penalty).sum(dim=1) # (batch,)
|
|
51
|
+
elif construction == "immediate":
|
|
52
|
+
# eq 12: f(z - theta_{y-1}) + f(theta_y - z)
|
|
53
|
+
t_low = torch.cat([torch.tensor([float("-inf")], device=thresholds.device), thresholds])
|
|
54
|
+
t_high = torch.cat([thresholds, torch.tensor([float("inf")], device=thresholds.device)])
|
|
55
|
+
theta_low = t_low[targets] # theta_{y-1} (0-indexed: targets maps to y-1)
|
|
56
|
+
theta_high = t_high[targets] # theta_y
|
|
57
|
+
loss = _penalty(logits - theta_low, penalty) + _penalty(theta_high - logits, penalty)
|
|
58
|
+
else:
|
|
59
|
+
raise ValueError(f"Unknown construction: {construction}")
|
|
60
|
+
|
|
61
|
+
return loss.mean()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def ordistic_loss(logits, targets, means, log_priors=None):
|
|
65
|
+
"""Ordistic loss (Rennie & Srebro, Section 4).
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
logits: (batch,) or (batch, 1) — raw predictor output z(x).
|
|
69
|
+
targets: (batch,) — integer labels in [0, K).
|
|
70
|
+
means: (K,) — class means (mu_1=-1, mu_K=1 by convention; interior learned).
|
|
71
|
+
log_priors: (K,) or None — log-prior terms pi_i. Defaults to zeros.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Scalar mean negative log-likelihood over the batch.
|
|
75
|
+
"""
|
|
76
|
+
logits = logits.reshape(-1)
|
|
77
|
+
targets = targets.long()
|
|
78
|
+
K = means.shape[0]
|
|
79
|
+
if log_priors is None:
|
|
80
|
+
log_priors = torch.zeros(K, device=logits.device, dtype=logits.dtype)
|
|
81
|
+
# energy_ik = mu_k * z_i + pi_k - mu_k^2 / 2
|
|
82
|
+
energy = means.unsqueeze(0) * logits.unsqueeze(1) + log_priors.unsqueeze(0) - means.unsqueeze(0) ** 2 / 2
|
|
83
|
+
# loss = -log P(y|z) = -energy[y] + log(sum_k exp(energy[k]))
|
|
84
|
+
target_energy = energy[torch.arange(len(targets), device=targets.device), targets]
|
|
85
|
+
log_partition = torch.logsumexp(energy, dim=1)
|
|
86
|
+
return (log_partition - target_energy).mean()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class OrdinalOutput(nn.Module):
|
|
90
|
+
"""Ordinal regression output layer.
|
|
91
|
+
|
|
92
|
+
Projects an arbitrary input down to a single logit and converts it
|
|
93
|
+
into *output_dim* class probabilities via learned, sorted thresholds.
|
|
94
|
+
|
|
95
|
+
The layer learns ``output_dim - 1`` interior thresholds ``t(1)…t(K-1)``
|
|
96
|
+
(with ``t(0) = -∞`` and ``t(K) = +∞`` fixed) and computes::
|
|
97
|
+
|
|
98
|
+
P(y = k | x) = σ(t(k+1) - logit) - σ(t(k) - logit)
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
input_dim: Size of the input feature dimension.
|
|
102
|
+
output_dim: Number of ordinal classes.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
def __init__(self, input_dim, output_dim):
|
|
106
|
+
super().__init__()
|
|
107
|
+
self.input_dim = input_dim
|
|
108
|
+
self.output_dim = output_dim
|
|
109
|
+
self.linear = nn.Linear(input_dim, 1)
|
|
110
|
+
self.interior_thresholds = nn.Parameter(torch.empty(output_dim - 1))
|
|
111
|
+
self._init_thresholds()
|
|
112
|
+
|
|
113
|
+
def _init_thresholds(self):
|
|
114
|
+
nn.init.xavier_uniform_(self.interior_thresholds.unsqueeze(0))
|
|
115
|
+
with torch.no_grad():
|
|
116
|
+
self.interior_thresholds.copy_(self.interior_thresholds.sort().values)
|
|
117
|
+
|
|
118
|
+
def forward(self, x):
|
|
119
|
+
logit = self.linear(x) # (batch, 1)
|
|
120
|
+
t_low = torch.full((1,), float("-inf"), device=logit.device, dtype=logit.dtype)
|
|
121
|
+
t_high = torch.full((1,), float("inf"), device=logit.device, dtype=logit.dtype)
|
|
122
|
+
thresholds = torch.cat([t_low, self.interior_thresholds, t_high]) # (K+1,)
|
|
123
|
+
return torch.sigmoid(thresholds[1:] - logit) - torch.sigmoid(thresholds[:-1] - logit)
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepordinal
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Ordinal output layers and loss functions (Rennie & Srebro, 2005) for PyTorch and TF/Keras
|
|
5
|
+
Author: Nicholas Hirons
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/nhirons/deepordinal
|
|
8
|
+
Keywords: ordinal-regression,deep-learning,pytorch,tensorflow,keras
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Provides-Extra: tf
|
|
18
|
+
Requires-Dist: tensorflow>=2.0; extra == "tf"
|
|
19
|
+
Provides-Extra: torch
|
|
20
|
+
Requires-Dist: torch>=2.0; extra == "torch"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# DeepOrdinal
|
|
24
|
+
|
|
25
|
+
Ordinal output layers and loss functions ([Rennie & Srebro, 2005](https://ttic.uchicago.edu/~nati/Publications/RennieSrebroIJCAI05.pdf)) for PyTorch and TF/Keras.
|
|
26
|
+
|
|
27
|
+
DeepOrdinal provides an `OrdinalOutput` layer that converts a learned logit into ordinal class probabilities via sorted thresholds, plus loss functions designed specifically for ordinal regression.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install deepordinal
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
With a specific backend:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install ".[tf]" # TensorFlow/Keras
|
|
39
|
+
pip install ".[torch]" # PyTorch
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
For development:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install -e ".[tf,torch]"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Backends
|
|
49
|
+
|
|
50
|
+
DeepOrdinal supports two backends with identical APIs:
|
|
51
|
+
|
|
52
|
+
| | PyTorch | TensorFlow/Keras |
|
|
53
|
+
|---|---|---|
|
|
54
|
+
| Module | `deepordinal.torch` | `deepordinal.tf` |
|
|
55
|
+
| Layer | `OrdinalOutput(input_dim=D, output_dim=K)` | `OrdinalOutput(output_dim=K)` |
|
|
56
|
+
| Loss functions | `ordinal_loss`, `ordistic_loss` | `ordinal_loss`, `ordistic_loss` |
|
|
57
|
+
|
|
58
|
+
## OrdinalOutput Layer
|
|
59
|
+
|
|
60
|
+
The `OrdinalOutput` layer accepts any input size, projects to a single logit, and converts it into K class probabilities using K-1 learned, sorted thresholds:
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
P(y = k | x) = sigmoid(t(k+1) - logit) - sigmoid(t(k) - logit)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
where `t(0) = -inf` and `t(K) = inf` are fixed, and interior thresholds are initialized sorted.
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from deepordinal.torch import OrdinalOutput # or deepordinal.tf
|
|
70
|
+
layer = OrdinalOutput(input_dim=16, output_dim=4) # TF omits input_dim
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Loss Functions
|
|
74
|
+
|
|
75
|
+
DeepOrdinal implements the threshold-based ordinal loss functions from Rennie & Srebro, "Loss Functions for Preference Levels" (IJCAI 2005). These operate on raw logits and thresholds rather than probability output.
|
|
76
|
+
|
|
77
|
+
### `ordinal_loss`
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
ordinal_loss(logits, targets, thresholds, construction='all', penalty='logistic')
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
- **logits**: `(batch,)` or `(batch, 1)` — raw predictor output
|
|
84
|
+
- **targets**: `(batch,)` — integer labels in `[0, K)`
|
|
85
|
+
- **thresholds**: `(K-1,)` — sorted interior thresholds
|
|
86
|
+
- **construction**: `'all'` or `'immediate'`
|
|
87
|
+
- **penalty**: `'hinge'`, `'smooth_hinge'`, `'modified_least_squares'`, or `'logistic'`
|
|
88
|
+
- **Returns**: scalar mean loss over the batch
|
|
89
|
+
|
|
90
|
+
#### Constructions
|
|
91
|
+
|
|
92
|
+
- **All-threshold** (default, eq 13): penalizes violations of every threshold, weighted by direction. Bounds mean absolute error. Best performer in the paper's experiments.
|
|
93
|
+
- **Immediate-threshold** (eq 12): only penalizes violations of the two thresholds bounding the correct class segment.
|
|
94
|
+
|
|
95
|
+
#### Penalty functions
|
|
96
|
+
|
|
97
|
+
| Name | Formula | Reference |
|
|
98
|
+
|---|---|---|
|
|
99
|
+
| `'hinge'` | `max(0, 1-z)` | eq 5 |
|
|
100
|
+
| `'smooth_hinge'` | 0 if z≥1, (1-z)²/2 if 0<z<1, 0.5-z if z≤0 | eq 6 |
|
|
101
|
+
| `'modified_least_squares'` | 0 if z≥1, (1-z)² if z<1 | eq 7 |
|
|
102
|
+
| `'logistic'` | `log(1 + exp(-z))` | eq 9 |
|
|
103
|
+
|
|
104
|
+
The paper recommends **all-threshold + logistic** as the best-performing combination.
|
|
105
|
+
|
|
106
|
+
### `ordistic_loss`
|
|
107
|
+
|
|
108
|
+
Probabilistic generalization of logistic regression to K-class ordinal problems (Section 4).
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
ordistic_loss(logits, targets, means, log_priors=None)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
- **logits**: `(batch,)` or `(batch, 1)` — raw predictor output
|
|
115
|
+
- **targets**: `(batch,)` — integer labels in `[0, K)`
|
|
116
|
+
- **means**: `(K,)` — class means (convention: μ₁=-1, μ_K=1; interior means learned)
|
|
117
|
+
- **log_priors**: `(K,)` or `None` — optional log-prior terms π_i
|
|
118
|
+
- **Returns**: scalar mean negative log-likelihood over the batch
|
|
119
|
+
|
|
120
|
+
### Example usage
|
|
121
|
+
|
|
122
|
+
#### PyTorch
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import torch
|
|
126
|
+
from deepordinal.torch import OrdinalOutput, ordinal_loss
|
|
127
|
+
|
|
128
|
+
layer = OrdinalOutput(input_dim=16, output_dim=4)
|
|
129
|
+
h = torch.randn(8, 16)
|
|
130
|
+
targets = torch.randint(0, 4, (8,))
|
|
131
|
+
|
|
132
|
+
probs = layer(h)
|
|
133
|
+
loss = ordinal_loss(layer.linear(h), targets, layer.interior_thresholds)
|
|
134
|
+
loss.backward()
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### TensorFlow
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
import tensorflow as tf
|
|
141
|
+
from deepordinal.tf import OrdinalOutput, ordinal_loss
|
|
142
|
+
|
|
143
|
+
layer = OrdinalOutput(output_dim=4)
|
|
144
|
+
h = tf.random.normal((8, 16))
|
|
145
|
+
targets = tf.random.uniform((8,), 0, 4, dtype=tf.int32)
|
|
146
|
+
|
|
147
|
+
with tf.GradientTape() as tape:
|
|
148
|
+
probs = layer(h)
|
|
149
|
+
logit = tf.matmul(h, layer.kernel) + layer.bias
|
|
150
|
+
loss = ordinal_loss(logit, targets, tf.squeeze(layer.interior_thresholds))
|
|
151
|
+
grads = tape.gradient(loss, layer.trainable_variables)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Running Tests
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
pip install -e ".[tf,torch]"
|
|
158
|
+
pytest -v
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Changelog
|
|
162
|
+
|
|
163
|
+
### 0.2.0
|
|
164
|
+
|
|
165
|
+
- Added `ordinal_loss` — Rennie & Srebro threshold-based ordinal loss with two constructions (all-threshold, immediate-threshold) and four penalty functions (hinge, smooth hinge, modified least squares, logistic)
|
|
166
|
+
- Added `ordistic_loss` — ordistic negative log-likelihood loss (Rennie & Srebro, Section 4)
|
|
167
|
+
- Both loss functions available in `deepordinal.torch` and `deepordinal.tf`
|
|
168
|
+
|
|
169
|
+
### 0.1.0
|
|
170
|
+
|
|
171
|
+
- Added PyTorch backend (`deepordinal.torch`) with `OrdinalOutput` module
|
|
172
|
+
- Modernized TensorFlow backend to `tf.keras` with self-contained `OrdinalOutput` layer and `SortedInitializer`
|
|
173
|
+
- Dual-backend support (TensorFlow/Keras and PyTorch) with matching APIs
|
|
174
|
+
- `pyproject.toml` build configuration with optional `[tf]` and `[torch]` extras
|
|
175
|
+
|
|
176
|
+
### Initial
|
|
177
|
+
|
|
178
|
+
- `OrdinalOutput` Keras layer for deep ordinal regression
|
|
179
|
+
- Example notebook with synthetic ordinal data
|
|
180
|
+
|
|
181
|
+
## Examples
|
|
182
|
+
|
|
183
|
+
- `examples/example_tf.ipynb` — TensorFlow/Keras with `ordinal_loss` and `GradientTape` training loop
|
|
184
|
+
- `examples/example_torch.ipynb` — PyTorch with `ordinal_loss` and standard training loop
|
|
185
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
deepordinal/__init__.py,sha256=J_V0erDdvUobpcKnwcFY6Uvb_a5qF4J0dcE14znJWX4,48
|
|
2
|
+
deepordinal/tf.py,sha256=FRZ-LuAjEvrOfAVXI_B-wGxxZiU4RlOvZc4sEpl4Z2o,5782
|
|
3
|
+
deepordinal/torch.py,sha256=PLtkdhRKGfsuPg9Q52pPiu2Q2v5S2g0hoEzmPikadhE,5040
|
|
4
|
+
deepordinal-0.2.0.dist-info/licenses/LICENSE,sha256=AWIoKzqfTVmlyfwzvydQoYGGiA6IKHnAdQzaaAGGQRw,1072
|
|
5
|
+
deepordinal-0.2.0.dist-info/METADATA,sha256=NB-xBvi6v0oDVdrDd2SP8aCEWC1HB-zpueFXUhwQz68,6182
|
|
6
|
+
deepordinal-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
deepordinal-0.2.0.dist-info/top_level.txt,sha256=fa0kRbTv8EIePUchQkt-TfBTcUfXnMSIXDNBiAZ8BGw,12
|
|
8
|
+
deepordinal-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nicholas Hirons
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
deepordinal
|