pytorch-fairloss 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ publish:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.11"
20
+
21
+ - name: Install build tools
22
+ run: pip install build twine
23
+
24
+ - name: Build package
25
+ run: python -m build
26
+
27
+ - name: Create GitHub Release
28
+ uses: softprops/action-gh-release@v2
29
+ with:
30
+ generate_release_notes: true
31
+ files: dist/*
32
+
33
+ - name: Publish to PyPI
34
+ env:
35
+ TWINE_USERNAME: __token__
36
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
37
+ run: twine upload dist/* --verbose
@@ -0,0 +1,12 @@
1
+ data/
2
+ *.pth
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+ .DS_Store
7
+ .vscode/
8
+
9
+ dist/
10
+ build/
11
+ *.egg-info/
12
+ .eggs/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Aref
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,147 @@
1
+ Metadata-Version: 2.4
2
+ Name: pytorch-fairloss
3
+ Version: 0.1.1
4
+ Summary: PyTorch classification loss with a SmoothMax-based calibration penalty
5
+ Project-URL: Homepage, https://github.com/stormaref/FairLoss
6
+ Project-URL: Repository, https://github.com/stormaref/FairLoss
7
+ Project-URL: Issues, https://github.com/stormaref/FairLoss/issues
8
+ Author-email: Aref <storm.aref@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: calibration,classification,loss,pytorch
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.9
23
+ Requires-Dist: torch>=2.0
24
+ Provides-Extra: dev
25
+ Requires-Dist: build; extra == 'dev'
26
+ Requires-Dist: pytest; extra == 'dev'
27
+ Requires-Dist: twine; extra == 'dev'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # FairLoss
31
+
32
+ PyTorch classification loss that combines cross-entropy with a smooth argmax penalty. Inspired by **Penalized Logarithmic Loss (PLL)** from Ahmadian et al. (2025).
33
+
34
+ ## The problem
35
+
36
+ In multi-class classification, we often want losses to rank **correct** predictions above **incorrect** ones. Strictly proper scores such as cross-entropy encourage calibrated probabilities, but they do not always do that.
37
+
38
+ Consider two logits with true class `0`:
39
+
40
+ | Sample | Logits | True class | Argmax correct? |
41
+ | ------ | -------------------- | ---------- | --------------- |
42
+ | 1 | `[0.34, 0.33, 0.33]` | 0 | Yes |
43
+ | 2 | `[0.49, 0.51, 0.00]` | 0 | No |
44
+
45
+ Sample 2 assigns a **higher logit to the true class** but picks the **wrong label**. Cross-entropy prefers sample 2; intuitively, sample 1 should win.
46
+
47
+ Ahmadian et al. call this the **Superior** property: a scoring rule should always rank correct predictions above incorrect ones. PLL adds a hard argmax penalty while staying strictly proper. FairLoss applies the same idea with a differentiable penalty suitable for gradient-based training.
48
+
49
+ Running [`examples/difference.py`](examples/difference.py) on these logits:
50
+
51
+ ```
52
+ Cross-entropy: tensor([1.0920, 0.9681]) # sample 2 wins
53
+ FairLoss: tensor([1.0920, 1.3014]) # sample 1 wins
54
+ ```
55
+
56
+ Cross-entropy gives sample 2 the lower loss because it is more confident on the true class. FairLoss adds a penalty when the argmax is wrong, so sample 1 ends up with the lower loss. Sample 1 keeps the same loss under both criteria because its prediction is already correct.
57
+
58
+ ## How it works
59
+
60
+ FairLoss keeps standard cross-entropy and adds a SmoothMax-based penalty:
61
+
62
+ ```
63
+ total_loss = cross_entropy + penalty
64
+ ```
65
+
66
+ The penalty is built from the gap between the true-class logit and a smooth approximation of the largest logit:
67
+
68
+ 1. **SmoothMax** approximates `max(logits)` — the predicted class.
69
+ 2. **`logit_true - smooth_max`** is near zero when the argmax is correct, negative when it is wrong.
70
+ 3. A **sigmoid transform** maps that gap into a smooth penalty: near zero for correct predictions, positive for incorrect ones.
71
+
72
+ Full derivation: [`formulation.pdf`](formulation.pdf)
73
+
74
+ ## Install
75
+
76
+ ```bash
77
+ pip install pytorch-fairloss
78
+ ```
79
+
80
+ Import as `from fairloss import FairLoss` (the PyPI name differs from the import path).
81
+
82
+ Or install from source:
83
+
84
+ ```bash
85
+ pip install git+https://github.com/stormaref/FairLoss.git
86
+ ```
87
+
88
+ ## Quick start
89
+
90
+ ```python
91
+ import torch
92
+ import torch.nn as nn
93
+ import torch.optim as optim
94
+ from fairloss import FairLoss
95
+
96
+ model = nn.Linear(784, 10)
97
+ criterion = FairLoss()
98
+ optimizer = optim.Adam(model.parameters(), lr=1e-3)
99
+
100
+ x = torch.randn(64, 784)
101
+ y = torch.randint(0, 10, (64,))
102
+
103
+ logits = model(x)
104
+ loss = criterion(logits, y)
105
+
106
+ optimizer.zero_grad()
107
+ loss.backward()
108
+ optimizer.step()
109
+ ```
110
+
111
+ See [`examples/basic_usage.py`](examples/basic_usage.py) for a training loop and [`examples/difference.py`](examples/difference.py) for the cross-entropy comparison above.
112
+
113
+ ## API
114
+
115
+ ### `FairLoss(cross_entropy=None, smooth_max_beta=1000.0, scaling_factor=1000.0, reduction="mean")`
116
+
117
+ Wraps a `nn.CrossEntropyLoss` and adds the FairLoss penalty on top.
118
+
119
+ | Parameter | Description |
120
+ | ----------------- | --------------------------------------------------------------------------- |
121
+ | `cross_entropy` | Optional `nn.CrossEntropyLoss` with `reduction="none"`. Defaults to `nn.CrossEntropyLoss(reduction="none")`. Pass a custom instance to set `weight`, `label_smoothing`, `ignore_index`, etc. |
122
+ | `smooth_max_beta` | Temperature for the smooth maximum approximation over logits. |
123
+ | `scaling_factor` | Scale applied before the sigmoid penalty transform. |
124
+ | `reduction` | Reduction applied to the combined loss: `"mean"`, `"sum"`, or `"none"`. Default: `"mean"`. |
125
+
126
+ **Forward pass**
127
+
128
+ - `output`: Logits tensor of shape `(N, C)` where `N` is batch size and `C` is number of classes.
129
+ - `target`: Integer class labels of shape `(N,)`.
130
+ - **Returns**: Scalar mean loss over the batch (or per-sample losses when `reduction="none"`).
131
+
132
+ ## Requirements
133
+
134
+ - Python 3.9+
135
+ - PyTorch 2.0+
136
+
137
+ ## License
138
+
139
+ MIT — see [LICENSE](LICENSE).
140
+
141
+ ## Reference
142
+
143
+ Ahmadian, R., Ghatee, M., & Wahlström, J. (2025). Superior scoring rules for probabilistic evaluation of single-label multi-class classification tasks. _International Journal of Approximate Reasoning_, 182, 109421. [https://doi.org/10.1016/j.ijar.2025.109421](https://doi.org/10.1016/j.ijar.2025.109421)
144
+
145
+ Preprint: [arXiv:2407.17697](https://arxiv.org/abs/2407.17697)
146
+
147
+ GitHub: https://github.com/stormaref/FairLoss
@@ -0,0 +1,118 @@
1
+ # FairLoss
2
+
3
+ PyTorch classification loss that combines cross-entropy with a smooth argmax penalty. Inspired by **Penalized Logarithmic Loss (PLL)** from Ahmadian et al. (2025).
4
+
5
+ ## The problem
6
+
7
+ In multi-class classification, we often want losses to rank **correct** predictions above **incorrect** ones. Strictly proper scores such as cross-entropy encourage calibrated probabilities, but they do not always do that.
8
+
9
+ Consider two logits with true class `0`:
10
+
11
+ | Sample | Logits | True class | Argmax correct? |
12
+ | ------ | -------------------- | ---------- | --------------- |
13
+ | 1 | `[0.34, 0.33, 0.33]` | 0 | Yes |
14
+ | 2 | `[0.49, 0.51, 0.00]` | 0 | No |
15
+
16
+ Sample 2 assigns a **higher logit to the true class** but picks the **wrong label**. Cross-entropy prefers sample 2; intuitively, sample 1 should win.
17
+
18
+ Ahmadian et al. call this the **Superior** property: a scoring rule should always rank correct predictions above incorrect ones. PLL adds a hard argmax penalty while staying strictly proper. FairLoss applies the same idea with a differentiable penalty suitable for gradient-based training.
19
+
20
+ Running [`examples/difference.py`](examples/difference.py) on these logits:
21
+
22
+ ```
23
+ Cross-entropy: tensor([1.0920, 0.9681]) # sample 2 wins
24
+ FairLoss: tensor([1.0920, 1.3014]) # sample 1 wins
25
+ ```
26
+
27
+ Cross-entropy gives sample 2 the lower loss because it is more confident on the true class. FairLoss adds a penalty when the argmax is wrong, so sample 1 ends up with the lower loss. Sample 1 keeps the same loss under both criteria because its prediction is already correct.
28
+
29
+ ## How it works
30
+
31
+ FairLoss keeps standard cross-entropy and adds a SmoothMax-based penalty:
32
+
33
+ ```
34
+ total_loss = cross_entropy + penalty
35
+ ```
36
+
37
+ The penalty is built from the gap between the true-class logit and a smooth approximation of the largest logit:
38
+
39
+ 1. **SmoothMax** approximates `max(logits)` — the predicted class.
40
+ 2. **`logit_true - smooth_max`** is near zero when the argmax is correct, negative when it is wrong.
41
+ 3. A **sigmoid transform** maps that gap into a smooth penalty: near zero for correct predictions, positive for incorrect ones.
42
+
43
+ Full derivation: [`formulation.pdf`](formulation.pdf)
44
+
45
+ ## Install
46
+
47
+ ```bash
48
+ pip install pytorch-fairloss
49
+ ```
50
+
51
+ Import as `from fairloss import FairLoss` (the PyPI name differs from the import path).
52
+
53
+ Or install from source:
54
+
55
+ ```bash
56
+ pip install git+https://github.com/stormaref/FairLoss.git
57
+ ```
58
+
59
+ ## Quick start
60
+
61
+ ```python
62
+ import torch
63
+ import torch.nn as nn
64
+ import torch.optim as optim
65
+ from fairloss import FairLoss
66
+
67
+ model = nn.Linear(784, 10)
68
+ criterion = FairLoss()
69
+ optimizer = optim.Adam(model.parameters(), lr=1e-3)
70
+
71
+ x = torch.randn(64, 784)
72
+ y = torch.randint(0, 10, (64,))
73
+
74
+ logits = model(x)
75
+ loss = criterion(logits, y)
76
+
77
+ optimizer.zero_grad()
78
+ loss.backward()
79
+ optimizer.step()
80
+ ```
81
+
82
+ See [`examples/basic_usage.py`](examples/basic_usage.py) for a training loop and [`examples/difference.py`](examples/difference.py) for the cross-entropy comparison above.
83
+
84
+ ## API
85
+
86
+ ### `FairLoss(cross_entropy=None, smooth_max_beta=1000.0, scaling_factor=1000.0, reduction="mean")`
87
+
88
+ Wraps a `nn.CrossEntropyLoss` and adds the FairLoss penalty on top.
89
+
90
+ | Parameter | Description |
91
+ | ----------------- | --------------------------------------------------------------------------- |
92
+ | `cross_entropy` | Optional `nn.CrossEntropyLoss` with `reduction="none"`. Defaults to `nn.CrossEntropyLoss(reduction="none")`. Pass a custom instance to set `weight`, `label_smoothing`, `ignore_index`, etc. |
93
+ | `smooth_max_beta` | Temperature for the smooth maximum approximation over logits. |
94
+ | `scaling_factor` | Scale applied before the sigmoid penalty transform. |
95
+ | `reduction` | Reduction applied to the combined loss: `"mean"`, `"sum"`, or `"none"`. Default: `"mean"`. |
96
+
97
+ **Forward pass**
98
+
99
+ - `output`: Logits tensor of shape `(N, C)` where `N` is batch size and `C` is number of classes.
100
+ - `target`: Integer class labels of shape `(N,)`.
101
+ - **Returns**: Scalar mean loss over the batch (or per-sample losses when `reduction="none"`).
102
+
103
+ ## Requirements
104
+
105
+ - Python 3.9+
106
+ - PyTorch 2.0+
107
+
108
+ ## License
109
+
110
+ MIT — see [LICENSE](LICENSE).
111
+
112
+ ## Reference
113
+
114
+ Ahmadian, R., Ghatee, M., & Wahlström, J. (2025). Superior scoring rules for probabilistic evaluation of single-label multi-class classification tasks. _International Journal of Approximate Reasoning_, 182, 109421. [https://doi.org/10.1016/j.ijar.2025.109421](https://doi.org/10.1016/j.ijar.2025.109421)
115
+
116
+ Preprint: [arXiv:2407.17697](https://arxiv.org/abs/2407.17697)
117
+
118
+ GitHub: https://github.com/stormaref/FairLoss
@@ -0,0 +1,30 @@
1
+ """Minimal example: train a linear classifier with FairLoss."""
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.optim as optim
6
+
7
+ from fairloss import FairLoss
8
+
9
+
10
+ def main():
11
+ model = nn.Linear(784, 10)
12
+ criterion = FairLoss()
13
+ optimizer = optim.Adam(model.parameters(), lr=1e-3)
14
+
15
+ for step in range(3):
16
+ x = torch.randn(64, 784)
17
+ y = torch.randint(0, 10, (64,))
18
+
19
+ logits = model(x)
20
+ loss = criterion(logits, y)
21
+
22
+ optimizer.zero_grad()
23
+ loss.backward()
24
+ optimizer.step()
25
+
26
+ print(f"step={step + 1} loss={loss.item():.4f}")
27
+
28
+
29
+ if __name__ == "__main__":
30
+ main()
@@ -0,0 +1,16 @@
1
+ import torch
2
+ import torch.nn as nn
3
+ from fairloss import FairLoss
4
+
5
+ output = torch.tensor(
6
+ [[0.34, 0.33, 0.33], [0.49, 0.51, 0.0]], dtype=torch.float32)
7
+ target = torch.tensor([0, 0], dtype=torch.long)
8
+
9
+ cross_entropy_fn = nn.CrossEntropyLoss(reduction="none")
10
+ fair_loss_fn = FairLoss(reduction="none")
11
+
12
+ cross_entropy = cross_entropy_fn(output, target)
13
+ fair_loss = fair_loss_fn(output, target)
14
+
15
+ print(f"Cross-entropy: {cross_entropy}")
16
+ print(f"FairLoss: {fair_loss}")
@@ -0,0 +1,4 @@
1
+ from fairloss.loss import FairLoss
2
+
3
+ __all__ = ["FairLoss"]
4
+ __version__ = "0.1.1"
@@ -0,0 +1,83 @@
1
+ from typing import Literal, Optional
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+
7
+
8
+ class FairLoss(nn.Module):
9
+ """Classification loss combining cross-entropy with a SmoothMax-based penalty.
10
+
11
+ Args:
12
+ cross_entropy: Cross-entropy loss with ``reduction="none"``. Defaults to
13
+ ``nn.CrossEntropyLoss(reduction="none")``.
14
+ smooth_max_beta: Temperature for the smooth maximum approximation.
15
+ scaling_factor: Scale applied before the sigmoid penalty transform.
16
+ reduction: Reduction applied to the combined loss.
17
+
18
+ Forward:
19
+ output: Logits of shape (N, C).
20
+ target: Integer class labels of shape (N,).
21
+ Returns: Scalar mean loss over the batch.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ cross_entropy: Optional[nn.CrossEntropyLoss] = None,
27
+ smooth_max_beta: float = 1000.0,
28
+ scaling_factor: float = 1000.0,
29
+ reduction: Literal["mean", "none", "sum"] = "mean",
30
+ ):
31
+ super().__init__()
32
+ if cross_entropy is None:
33
+ cross_entropy = nn.CrossEntropyLoss(reduction="none")
34
+
35
+ if not isinstance(cross_entropy, nn.CrossEntropyLoss):
36
+ raise TypeError(
37
+ "cross_entropy must be an instance of nn.CrossEntropyLoss, "
38
+ f"got {type(cross_entropy).__name__}."
39
+ )
40
+ if cross_entropy.reduction != "none":
41
+ raise ValueError(
42
+ 'cross_entropy must use reduction="none" so FairLoss can add the '
43
+ f"penalty per sample before applying its own reduction, got "
44
+ f'reduction="{cross_entropy.reduction}".'
45
+ )
46
+
47
+ self.cross_entropy = cross_entropy
48
+ self.smooth_max_beta = smooth_max_beta
49
+ self.scaling_factor = scaling_factor
50
+ self.reduction = reduction
51
+
52
+ def one_hot_encode(self, target: torch.Tensor, num_classes: int) -> torch.Tensor:
53
+ target = target.long()
54
+ return F.one_hot(target, num_classes=num_classes).float()
55
+
56
+ def forward(self, output: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
57
+ num_classes = output.shape[1]
58
+ base_loss = self.cross_entropy(output, target)
59
+
60
+ target = self.one_hot_encode(target, num_classes)
61
+ smooth_max = (1.0 / self.smooth_max_beta) * torch.logsumexp(
62
+ self.smooth_max_beta * output, dim=1, keepdim=False
63
+ )
64
+
65
+ result = output - smooth_max.unsqueeze(1)
66
+ element_wise_mult = result * target
67
+
68
+ ones_vector = torch.ones(num_classes, 1, device=output.device)
69
+ final_scalar = torch.matmul(element_wise_mult, ones_vector)
70
+
71
+ sigmoid_output = torch.sigmoid(self.scaling_factor * -1 * final_scalar)
72
+ final_output = (sigmoid_output - 0.5) * 2
73
+ final_output = final_output.squeeze(1)
74
+
75
+ penalty = final_output / num_classes
76
+ total_loss = base_loss + penalty
77
+
78
+ if self.reduction == "mean":
79
+ return total_loss.mean()
80
+ if self.reduction == "sum":
81
+ return total_loss.sum()
82
+
83
+ return total_loss
Binary file
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "pytorch-fairloss"
7
+ version = "0.1.1"
8
+ description = "PyTorch classification loss with a SmoothMax-based calibration penalty"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ keywords = ["classification", "loss", "pytorch", "calibration"]
13
+ authors = [
14
+ { name = "Aref", email = "storm.aref@gmail.com" },
15
+ ]
16
+ dependencies = [
17
+ "torch>=2.0",
18
+ ]
19
+ classifiers = [
20
+ "Development Status :: 4 - Beta",
21
+ "Intended Audience :: Developers",
22
+ "Intended Audience :: Science/Research",
23
+ "License :: OSI Approved :: MIT License",
24
+ "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.9",
26
+ "Programming Language :: Python :: 3.10",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
30
+ ]
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/stormaref/FairLoss"
34
+ Repository = "https://github.com/stormaref/FairLoss"
35
+ Issues = "https://github.com/stormaref/FairLoss/issues"
36
+
37
+ [project.optional-dependencies]
38
+ dev = ["build", "twine", "pytest"]
39
+
40
+ [tool.hatch.build.targets.wheel]
41
+ packages = ["fairloss"]