floydnet 0.1.2__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {floydnet-0.1.2 → floydnet-1.1.0}/.gitignore +2 -1
- {floydnet-0.1.2 → floydnet-1.1.0}/CHANGELOG.md +4 -0
- floydnet-1.1.0/CITATION.cff +17 -0
- {floydnet-0.1.2 → floydnet-1.1.0}/PKG-INFO +16 -13
- {floydnet-0.1.2 → floydnet-1.1.0}/README.md +15 -12
- {floydnet-0.1.2 → floydnet-1.1.0}/example/README.md +20 -2
- {floydnet-0.1.2 → floydnet-1.1.0}/pyproject.toml +1 -1
- {floydnet-0.1.2 → floydnet-1.1.0}/src/floydnet/functional.py +15 -1
- floydnet-0.1.2/CITATION.cff +0 -10
- {floydnet-0.1.2 → floydnet-1.1.0}/LICENSE +0 -0
- {floydnet-0.1.2 → floydnet-1.1.0}/src/floydnet/__init__.py +0 -0
- {floydnet-0.1.2 → floydnet-1.1.0}/src/floydnet/transformer.py +0 -0
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [1.1.0] - 2026-02-05
|
|
6
|
+
- Added `softmax_cap` parameter to `pivotal_attention3` for improved numerical stability.
|
|
7
|
+
- Added LRGB example script.
|
|
8
|
+
|
|
5
9
|
## [1.0.0] - 2026-01-25
|
|
6
10
|
- Full release with training and evaluation scripts for Graph Count, BREC, and TSP.
|
|
7
11
|
- Added `pivotal_attention3` functional API for 3-Floyd attention.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
title: "FloydNet"
|
|
3
|
+
message: "If you use FloydNet in your research, please cite the associated paper."
|
|
4
|
+
type: software
|
|
5
|
+
authors:
|
|
6
|
+
- family-names: Yu
|
|
7
|
+
given-names: Jingcheng
|
|
8
|
+
- family-names: Zeng
|
|
9
|
+
given-names: Mingliang
|
|
10
|
+
- family-names: Ye
|
|
11
|
+
given-names: Qiwei
|
|
12
|
+
version: "1.0.0"
|
|
13
|
+
license: Apache-2.0
|
|
14
|
+
repository-code: "https://github.com/ocx-lab/FloydNet"
|
|
15
|
+
doi: "10.48550/arXiv.2601.19094"
|
|
16
|
+
url: "https://arxiv.org/abs/2601.19094"
|
|
17
|
+
date-released: 2026-01-27
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: floydnet
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Floyd Multi-Head Attention: a drop-in variant of PyTorch MHA with module and function APIs
|
|
5
5
|
Project-URL: Homepage, https://github.com/ocx-lab/FloydNet
|
|
6
6
|
Project-URL: Repository, https://github.com/ocx-lab/FloydNet
|
|
@@ -235,7 +235,7 @@ Description-Content-Type: text/markdown
|
|
|
235
235
|
[](https://www.python.org/)
|
|
236
236
|
[](https://pytorch.org/)
|
|
237
237
|
|
|
238
|
-
Official implementation of
|
|
238
|
+
Official implementation of [FloydNet](https://arxiv.org/pdf/2601.19094).
|
|
239
239
|
|
|
240
240
|

|
|
241
241
|
|
|
@@ -247,13 +247,13 @@ This repository serves two audiences:
|
|
|
247
247
|
|
|
248
248
|
## Introduction
|
|
249
249
|
|
|
250
|
-
FloydNet is the official PyTorch implementation
|
|
250
|
+
FloydNet is the official PyTorch implementation.
|
|
251
251
|
The repository provides:
|
|
252
252
|
|
|
253
253
|
1. **Reusable components**: a drop-in attention/Transformer-block interface intended for integration into existing projects.
|
|
254
254
|
2. **Reproduction code**: end-to-end training/evaluation pipelines to reproduce the benchmarks reported in the paper.
|
|
255
255
|
|
|
256
|
-
For algorithmic details, hyperparameter choices, and analysis, please refer to the paper
|
|
256
|
+
For algorithmic details, hyperparameter choices, and analysis, please refer to the [paper](https://arxiv.org/pdf/2601.19094).
|
|
257
257
|
|
|
258
258
|
---
|
|
259
259
|
|
|
@@ -360,9 +360,9 @@ uv pip install -e .
|
|
|
360
360
|
|
|
361
361
|
## Changelog (latest)
|
|
362
362
|
|
|
363
|
-
-
|
|
364
|
-
- Added
|
|
365
|
-
|
|
363
|
+
- Added `softmax_cap` parameter to `pivotal_attention3` for improved numerical stability.
|
|
364
|
+
- Added LRGB example script.
|
|
365
|
+
|
|
366
366
|
|
|
367
367
|
The full changelog is in [CHANGELOG.md](CHANGELOG.md).
|
|
368
368
|
|
|
@@ -371,12 +371,15 @@ The full changelog is in [CHANGELOG.md](CHANGELOG.md).
|
|
|
371
371
|
If you use this code in your research, please cite the paper:
|
|
372
372
|
|
|
373
373
|
```bibtex
|
|
374
|
-
@
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
374
|
+
@misc{yu2026floydnetlearningparadigmglobal,
|
|
375
|
+
title={FloydNet: A Learning Paradigm for Global Relational Reasoning},
|
|
376
|
+
author={Jingcheng Yu and Mingliang Zeng and Qiwei Ye},
|
|
377
|
+
year={2026},
|
|
378
|
+
eprint={2601.19094},
|
|
379
|
+
archivePrefix={arXiv},
|
|
380
|
+
primaryClass={cs.LG},
|
|
381
|
+
url={https://arxiv.org/abs/2601.19094},
|
|
382
|
+
|
|
380
383
|
}
|
|
381
384
|
```
|
|
382
385
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://www.python.org/)
|
|
4
4
|
[](https://pytorch.org/)
|
|
5
5
|
|
|
6
|
-
Official implementation of
|
|
6
|
+
Official implementation of [FloydNet](https://arxiv.org/pdf/2601.19094).
|
|
7
7
|
|
|
8
8
|

|
|
9
9
|
|
|
@@ -15,13 +15,13 @@ This repository serves two audiences:
|
|
|
15
15
|
|
|
16
16
|
## Introduction
|
|
17
17
|
|
|
18
|
-
FloydNet is the official PyTorch implementation
|
|
18
|
+
FloydNet is the official PyTorch implementation.
|
|
19
19
|
The repository provides:
|
|
20
20
|
|
|
21
21
|
1. **Reusable components**: a drop-in attention/Transformer-block interface intended for integration into existing projects.
|
|
22
22
|
2. **Reproduction code**: end-to-end training/evaluation pipelines to reproduce the benchmarks reported in the paper.
|
|
23
23
|
|
|
24
|
-
For algorithmic details, hyperparameter choices, and analysis, please refer to the paper
|
|
24
|
+
For algorithmic details, hyperparameter choices, and analysis, please refer to the [paper](https://arxiv.org/pdf/2601.19094).
|
|
25
25
|
|
|
26
26
|
---
|
|
27
27
|
|
|
@@ -128,9 +128,9 @@ uv pip install -e .
|
|
|
128
128
|
|
|
129
129
|
## Changelog (latest)
|
|
130
130
|
|
|
131
|
-
-
|
|
132
|
-
- Added
|
|
133
|
-
|
|
131
|
+
- Added `softmax_cap` parameter to `pivotal_attention3` for improved numerical stability.
|
|
132
|
+
- Added LRGB example script.
|
|
133
|
+
|
|
134
134
|
|
|
135
135
|
The full changelog is in [CHANGELOG.md](CHANGELOG.md).
|
|
136
136
|
|
|
@@ -139,12 +139,15 @@ The full changelog is in [CHANGELOG.md](CHANGELOG.md).
|
|
|
139
139
|
If you use this code in your research, please cite the paper:
|
|
140
140
|
|
|
141
141
|
```bibtex
|
|
142
|
-
@
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
142
|
+
@misc{yu2026floydnetlearningparadigmglobal,
|
|
143
|
+
title={FloydNet: A Learning Paradigm for Global Relational Reasoning},
|
|
144
|
+
author={Jingcheng Yu and Mingliang Zeng and Qiwei Ye},
|
|
145
|
+
year={2026},
|
|
146
|
+
eprint={2601.19094},
|
|
147
|
+
archivePrefix={arXiv},
|
|
148
|
+
primaryClass={cs.LG},
|
|
149
|
+
url={https://arxiv.org/abs/2601.19094},
|
|
150
|
+
|
|
148
151
|
}
|
|
149
152
|
```
|
|
150
153
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
### Benchmarks
|
|
2
2
|
|
|
3
|
-
The paper reports results on **
|
|
3
|
+
The paper reports results on **four benchmarks**:
|
|
4
4
|
|
|
5
5
|
- Graph Count
|
|
6
6
|
- BREC
|
|
7
7
|
- TSP
|
|
8
|
+
- LRGB
|
|
8
9
|
|
|
9
10
|
## 🚀 Key Results
|
|
10
11
|
|
|
@@ -134,4 +135,21 @@ torchrun \
|
|
|
134
135
|
--wandb_name TSP_exp
|
|
135
136
|
```
|
|
136
137
|
|
|
137
|
-
---
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
### LRGB
|
|
141
|
+
|
|
142
|
+
The LRGB benchmark and dataset construction follow:
|
|
143
|
+
https://github.com/vijaydwivedi75/lrgb
|
|
144
|
+
|
|
145
|
+
#### PCQM-Contact
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
source .venv/bin/activate
|
|
149
|
+
cd example
|
|
150
|
+
torchrun \
|
|
151
|
+
--nproc_per_node=8 \
|
|
152
|
+
-m LRGB.run \
|
|
153
|
+
--name pcqm-contact \
|
|
154
|
+
--wandb_name LRGB_pcqm-contact
|
|
155
|
+
```
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "floydnet"
|
|
7
|
-
version = "
|
|
7
|
+
version = "1.1.0"
|
|
8
8
|
description = "Floyd Multi-Head Attention: a drop-in variant of PyTorch MHA with module and function APIs"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -31,6 +31,7 @@ def pivotal_attention(
|
|
|
31
31
|
dropout: float = 0.0,
|
|
32
32
|
scale: Optional[float] = None,
|
|
33
33
|
inf: float = 1e9,
|
|
34
|
+
softmax_cap: float = -1,
|
|
34
35
|
) -> torch.Tensor:
|
|
35
36
|
"""Pivotal attention as described in "FLOYDNET: A LEARNING PARADIGM FOR GLOBAL RELATIONAL REASONING".
|
|
36
37
|
|
|
@@ -47,6 +48,9 @@ def pivotal_attention(
|
|
|
47
48
|
dropout: Dropout probability applied to attention weights (only effective if > 0).
|
|
48
49
|
scale: Optional custom scaling factor. If None, defaults to 1/sqrt(2*D).
|
|
49
50
|
inf: Value to use for -infinity in masks.
|
|
51
|
+
softmax_cap: If > 0, applies a tanh-based logit cap before softmax.
|
|
52
|
+
Note: when using a non-boolean (additive) attn_mask, ensure its magnitude/semantics remain compatible
|
|
53
|
+
with capping (e.g., very large negative values used to approximate -inf can interact with logit shaping).
|
|
50
54
|
|
|
51
55
|
Returns:
|
|
52
56
|
Tensor of shape (B, H, L_i, L_k, D)
|
|
@@ -65,6 +69,9 @@ def pivotal_attention(
|
|
|
65
69
|
attn_scores = torch.einsum("bhikd,bhijd->bhikj", q_ik, k_ij) \
|
|
66
70
|
+ torch.einsum("bhikd,bhjkd->bhikj", q_ik, k_jk)
|
|
67
71
|
|
|
72
|
+
if softmax_cap > 0:
|
|
73
|
+
attn_scores = softmax_cap * torch.tanh(attn_scores / softmax_cap)
|
|
74
|
+
|
|
68
75
|
if attn_mask is not None:
|
|
69
76
|
if attn_mask.dtype == torch.bool:
|
|
70
77
|
attn_scores = attn_scores.masked_fill(attn_mask, -inf)
|
|
@@ -93,6 +100,7 @@ def pivotal_attention3(
|
|
|
93
100
|
dropout: float = 0.0,
|
|
94
101
|
scale: Optional[float] = None,
|
|
95
102
|
inf: float = 1e9,
|
|
103
|
+
softmax_cap: float = -1,
|
|
96
104
|
) -> torch.Tensor:
|
|
97
105
|
"""3-Pivotal attention as described in "FLOYDNET: A LEARNING PARADIGM FOR GLOBAL RELATIONAL REASONING".
|
|
98
106
|
|
|
@@ -111,9 +119,12 @@ def pivotal_attention3(
|
|
|
111
119
|
dropout: Dropout probability applied to attention weights (only effective if > 0).
|
|
112
120
|
scale: Optional custom scaling factor. If None, defaults to 1/sqrt(3*D).
|
|
113
121
|
inf: Value to use for -infinity in masks.
|
|
122
|
+
softmax_cap: If > 0, applies a tanh-based logit cap before softmax.
|
|
123
|
+
Note: when using a non-boolean (additive) attn_mask, ensure its magnitude/semantics remain compatible
|
|
124
|
+
with capping (e.g., very large negative values used to approximate -inf can interact with logit shaping).
|
|
114
125
|
|
|
115
126
|
Returns:
|
|
116
|
-
Tensor of shape (B, H, L_i,
|
|
127
|
+
Tensor of shape (B, H, L_i, L_j, L_k, D)
|
|
117
128
|
"""
|
|
118
129
|
assert all([t.dim() == 6 for t in [q_ijk, k_pjk, k_ipk, k_ijp, v_pjk, v_ipk, v_ijp]]), "All inputs must be 6D tensors"
|
|
119
130
|
B, H, L_i, L_j, L_k, D = q_ijk.shape
|
|
@@ -130,6 +141,9 @@ def pivotal_attention3(
|
|
|
130
141
|
attn_scores = torch.einsum("bhijkd,bhpjkd->bhijkp", q_ijk, k_pjk) \
|
|
131
142
|
+ torch.einsum("bhijkd,bhipkd->bhijkp", q_ijk, k_ipk) \
|
|
132
143
|
+ torch.einsum("bhijkd,bhijpd->bhijkp", q_ijk, k_ijp)
|
|
144
|
+
|
|
145
|
+
if softmax_cap > 0:
|
|
146
|
+
attn_scores = softmax_cap * torch.tanh(attn_scores / softmax_cap)
|
|
133
147
|
|
|
134
148
|
if attn_mask is not None:
|
|
135
149
|
if attn_mask.dtype == torch.bool:
|
floydnet-0.1.2/CITATION.cff
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
cff-version: 1.2.0
|
|
2
|
-
title: "Floyd Multi-Head Attention"
|
|
3
|
-
authors:
|
|
4
|
-
- family-names: YourSurname
|
|
5
|
-
given-names: YourName
|
|
6
|
-
orcid: "0000-0000-0000-0000"
|
|
7
|
-
version: "0.1.0"
|
|
8
|
-
license: MIT
|
|
9
|
-
repository-code: "https://github.com/yourname/floyd-net"
|
|
10
|
-
message: "If you use this software, please cite it as below."
|
|
File without changes
|
|
File without changes
|
|
File without changes
|