PyPI - floydnet - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

floydnet 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

floydnet/functional.py CHANGED Viewed

@@ -31,6 +31,7 @@ def pivotal_attention(
     dropout: float = 0.0,
     scale: Optional[float] = None,
     inf: float = 1e9,
+    softmax_cap: float = -1,
 ) -> torch.Tensor:
     """Pivotal attention as described in "FLOYDNET: A LEARNING PARADIGM FOR GLOBAL RELATIONAL REASONING".
@@ -47,6 +48,9 @@ def pivotal_attention(
         dropout: Dropout probability applied to attention weights (only effective if > 0).
         scale: Optional custom scaling factor. If None, defaults to 1/sqrt(2*D).
         inf: Value to use for -infinity in masks.
+        softmax_cap: If > 0, applies a tanh-based logit cap before softmax.
+            Note: when using a non-boolean (additive) attn_mask, ensure its magnitude/semantics remain compatible
+            with capping (e.g., very large negative values used to approximate -inf can interact with logit shaping).
     Returns:
         Tensor of shape (B, H, L_i, L_k, D)
@@ -65,6 +69,9 @@ def pivotal_attention(
     attn_scores = torch.einsum("bhikd,bhijd->bhikj", q_ik, k_ij) \
                 + torch.einsum("bhikd,bhjkd->bhikj", q_ik, k_jk)
+    if softmax_cap > 0:
+        attn_scores = softmax_cap * torch.tanh(attn_scores / softmax_cap)
     if attn_mask is not None:
         if attn_mask.dtype == torch.bool:
             attn_scores = attn_scores.masked_fill(attn_mask, -inf)
@@ -93,6 +100,7 @@ def pivotal_attention3(
     dropout: float = 0.0,
     scale: Optional[float] = None,
     inf: float = 1e9,
+    softmax_cap: float = -1,
 ) -> torch.Tensor:
     """3-Pivotal attention as described in "FLOYDNET: A LEARNING PARADIGM FOR GLOBAL RELATIONAL REASONING".
@@ -111,9 +119,12 @@ def pivotal_attention3(
         dropout: Dropout probability applied to attention weights (only effective if > 0).
         scale: Optional custom scaling factor. If None, defaults to 1/sqrt(3*D).
         inf: Value to use for -infinity in masks.
+        softmax_cap: If > 0, applies a tanh-based logit cap before softmax.
+            Note: when using a non-boolean (additive) attn_mask, ensure its magnitude/semantics remain compatible
+            with capping (e.g., very large negative values used to approximate -inf can interact with logit shaping).
     Returns:
-        Tensor of shape (B, H, L_i, l_j, L_k, D)
+        Tensor of shape (B, H, L_i, L_j, L_k, D)
     """
     assert all([t.dim() == 6 for t in [q_ijk, k_pjk, k_ipk, k_ijp, v_pjk, v_ipk, v_ijp]]), "All inputs must be 6D tensors"
     B, H, L_i, L_j, L_k, D = q_ijk.shape
@@ -130,6 +141,9 @@ def pivotal_attention3(
     attn_scores = torch.einsum("bhijkd,bhpjkd->bhijkp", q_ijk, k_pjk) \
                 + torch.einsum("bhijkd,bhipkd->bhijkp", q_ijk, k_ipk) \
                 + torch.einsum("bhijkd,bhijpd->bhijkp", q_ijk, k_ijp)
+    if softmax_cap > 0:
+        attn_scores = softmax_cap * torch.tanh(attn_scores / softmax_cap)
     if attn_mask is not None:
         if attn_mask.dtype == torch.bool:

{floydnet-1.0.0.dist-info → floydnet-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: floydnet
-Version: 1.0.0
+Version: 1.1.0
 Summary: Floyd Multi-Head Attention: a drop-in variant of PyTorch MHA with module and function APIs
 Project-URL: Homepage, https://github.com/ocx-lab/FloydNet
 Project-URL: Repository, https://github.com/ocx-lab/FloydNet
@@ -235,7 +235,7 @@ Description-Content-Type: text/markdown
 [![Python](https://img.shields.io/badge/Python-3.9%2B-blue)](https://www.python.org/)
 [![PyTorch](https://img.shields.io/badge/PyTorch-2.1%2B-orange)](https://pytorch.org/)
-Official implementation of [FloydNet](https://openreview.net/pdf?id=aUsx1G6RVQ).
+Official implementation of [FloydNet](https://arxiv.org/pdf/2601.19094).
 ![Figure Pivotal Attention Mechanism for 2-Floyd/3-Floyd.](misc/pivotalattn2&3.png)
@@ -253,7 +253,7 @@ The repository provides:
 1. **Reusable components**: a drop-in attention/Transformer-block interface intended for integration into existing projects.
 2. **Reproduction code**: end-to-end training/evaluation pipelines to reproduce the benchmarks reported in the paper.
-For algorithmic details, hyperparameter choices, and analysis, please refer to the paper (TODO: link).
+For algorithmic details, hyperparameter choices, and analysis, please refer to the [paper](https://arxiv.org/pdf/2601.19094).
 ---
@@ -360,9 +360,9 @@ uv pip install -e .
 ## Changelog (latest)
-- Full release with training and evaluation scripts for Graph Count, BREC, and TSP.
-- Added `pivotal_attention3` functional API for 3-Floyd attention.
-- Added additional configuration options in `PivotalAttentionBlock`.
+- Added `softmax_cap` parameter to `pivotal_attention3` for improved numerical stability.
+- Added LRGB example script.
 The full changelog is in [CHANGELOG.md](CHANGELOG.md).
@@ -371,12 +371,15 @@ The full changelog is in [CHANGELOG.md](CHANGELOG.md).
 If you use this code in your research, please cite the paper:
 ```bibtex
-@inproceedings{TODO,
-  title     = {TODO},
-  author    = {TODO},
-  booktitle = {TODO},
-  year      = {TODO},
-  url       = {TODO}
+@misc{yu2026floydnetlearningparadigmglobal,
+      title={FloydNet: A Learning Paradigm for Global Relational Reasoning},
+      author={Jingcheng Yu and Mingliang Zeng and Qiwei Ye},
+      year={2026},
+      eprint={2601.19094},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/2601.19094},
 }
 ```

floydnet-1.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+floydnet/__init__.py,sha256=0AnWvQ3EQu8NhEsNWIffH6ZEf3jKpJ26zTQWuV0Ersg,204
+floydnet/functional.py,sha256=Usy_9AmJxzkx-HT8qJZaB0ufzVZn8fSEfcj6mgIi1Lk,6600
+floydnet/transformer.py,sha256=jMTEKGkO64rlJqtB5-LFpsInkjyv8_Zo3Tvzd6ZZtPs,8678
+floydnet-1.1.0.dist-info/METADATA,sha256=sExDcksEtcw0pPDJO5w9y4Oj9S_4hGfUIa1upzqedtw,18727
+floydnet-1.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+floydnet-1.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+floydnet-1.1.0.dist-info/RECORD,,

floydnet-1.0.0.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-floydnet/__init__.py,sha256=0AnWvQ3EQu8NhEsNWIffH6ZEf3jKpJ26zTQWuV0Ersg,204
-floydnet/functional.py,sha256=96TcAbb_YmsT0H2Hqt4EuKy1v5Xkdv0ZKNtCfRx6ab0,5716
-floydnet/transformer.py,sha256=jMTEKGkO64rlJqtB5-LFpsInkjyv8_Zo3Tvzd6ZZtPs,8678
-floydnet-1.0.0.dist-info/METADATA,sha256=h3UXHjbQPZ59gdQ5UXkpYOB0AHZ_YBc4zli-iMmixn4,18608
-floydnet-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-floydnet-1.0.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-floydnet-1.0.0.dist-info/RECORD,,

{floydnet-1.0.0.dist-info → floydnet-1.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{floydnet-1.0.0.dist-info → floydnet-1.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

floydnet 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

floydnet 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl