minmaxrnc 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minmaxrnc-0.1.0/LICENSE +143 -0
- minmaxrnc-0.1.0/NOTICE +10 -0
- minmaxrnc-0.1.0/PKG-INFO +329 -0
- minmaxrnc-0.1.0/README.md +160 -0
- minmaxrnc-0.1.0/pyproject.toml +43 -0
- minmaxrnc-0.1.0/setup.cfg +4 -0
- minmaxrnc-0.1.0/src/minmaxrnc/__init__.py +12 -0
- minmaxrnc-0.1.0/src/minmaxrnc/minmax_layer.py +134 -0
- minmaxrnc-0.1.0/src/minmaxrnc/minmax_neuron.py +148 -0
- minmaxrnc-0.1.0/src/minmaxrnc/minmax_operator.py +39 -0
- minmaxrnc-0.1.0/src/minmaxrnc/minmax_rnc.py +281 -0
- minmaxrnc-0.1.0/src/minmaxrnc/minmax_rnc_lm.py +88 -0
- minmaxrnc-0.1.0/src/minmaxrnc/minmax_scan.py +60 -0
- minmaxrnc-0.1.0/src/minmaxrnc/modules/basic_conv.py +77 -0
- minmaxrnc-0.1.0/src/minmaxrnc/modules/feedforward.py +167 -0
- minmaxrnc-0.1.0/src/minmaxrnc/modules/gated_conv.py +78 -0
- minmaxrnc-0.1.0/src/minmaxrnc/modules/initialisers.py +60 -0
- minmaxrnc-0.1.0/src/minmaxrnc.egg-info/PKG-INFO +329 -0
- minmaxrnc-0.1.0/src/minmaxrnc.egg-info/SOURCES.txt +21 -0
- minmaxrnc-0.1.0/src/minmaxrnc.egg-info/dependency_links.txt +1 -0
- minmaxrnc-0.1.0/src/minmaxrnc.egg-info/requires.txt +1 -0
- minmaxrnc-0.1.0/src/minmaxrnc.egg-info/top_level.txt +1 -0
- minmaxrnc-0.1.0/tests/test_minmax.py +482 -0
minmaxrnc-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
This project is licensed under the PolyForm Noncommercial License 1.0.0.
|
|
2
|
+
|
|
3
|
+
Commercial use is not permitted under this public license.
|
|
4
|
+
For commercial licensing, contact:
|
|
5
|
+
|
|
6
|
+
Alessandro Ronca
|
|
7
|
+
alessandro.ronca@iris-ai.org
|
|
8
|
+
|
|
9
|
+
The full license text follows.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# PolyForm Noncommercial License 1.0.0
|
|
13
|
+
|
|
14
|
+
<https://polyformproject.org/licenses/noncommercial/1.0.0>
|
|
15
|
+
|
|
16
|
+
## Acceptance
|
|
17
|
+
|
|
18
|
+
In order to get any license under these terms, you must agree
|
|
19
|
+
to them as both strict obligations and conditions to all
|
|
20
|
+
your licenses.
|
|
21
|
+
|
|
22
|
+
## Copyright License
|
|
23
|
+
|
|
24
|
+
The licensor grants you a copyright license for the
|
|
25
|
+
software to do everything you might do with the software
|
|
26
|
+
that would otherwise infringe the licensor's copyright
|
|
27
|
+
in it for any permitted purpose. However, you may
|
|
28
|
+
only distribute the software according to [Distribution
|
|
29
|
+
License](#distribution-license) and make changes or new works
|
|
30
|
+
based on the software according to [Changes and New Works
|
|
31
|
+
License](#changes-and-new-works-license).
|
|
32
|
+
|
|
33
|
+
## Distribution License
|
|
34
|
+
|
|
35
|
+
The licensor grants you an additional copyright license
|
|
36
|
+
to distribute copies of the software. Your license
|
|
37
|
+
to distribute covers distributing the software with
|
|
38
|
+
changes and new works permitted by [Changes and New Works
|
|
39
|
+
License](#changes-and-new-works-license).
|
|
40
|
+
|
|
41
|
+
## Notices
|
|
42
|
+
|
|
43
|
+
You must ensure that anyone who gets a copy of any part of
|
|
44
|
+
the software from you also gets a copy of these terms or the
|
|
45
|
+
URL for them above, as well as copies of any plain-text lines
|
|
46
|
+
beginning with `Required Notice:` that the licensor provided
|
|
47
|
+
with the software. For example:
|
|
48
|
+
|
|
49
|
+
> Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
|
|
50
|
+
|
|
51
|
+
## Changes and New Works License
|
|
52
|
+
|
|
53
|
+
The licensor grants you an additional copyright license to
|
|
54
|
+
make changes and new works based on the software for any
|
|
55
|
+
permitted purpose.
|
|
56
|
+
|
|
57
|
+
## Patent License
|
|
58
|
+
|
|
59
|
+
The licensor grants you a patent license for the software that
|
|
60
|
+
covers patent claims the licensor can license, or becomes able
|
|
61
|
+
to license, that you would infringe by using the software.
|
|
62
|
+
|
|
63
|
+
## Noncommercial Purposes
|
|
64
|
+
|
|
65
|
+
Any noncommercial purpose is a permitted purpose.
|
|
66
|
+
|
|
67
|
+
## Personal Uses
|
|
68
|
+
|
|
69
|
+
Personal use for research, experiment, and testing for
|
|
70
|
+
the benefit of public knowledge, personal study, private
|
|
71
|
+
entertainment, hobby projects, amateur pursuits, or religious
|
|
72
|
+
observance, without any anticipated commercial application,
|
|
73
|
+
is use for a permitted purpose.
|
|
74
|
+
|
|
75
|
+
## Noncommercial Organizations
|
|
76
|
+
|
|
77
|
+
Use by any charitable organization, educational institution,
|
|
78
|
+
public research organization, public safety or health
|
|
79
|
+
organization, environmental protection organization,
|
|
80
|
+
or government institution is use for a permitted purpose
|
|
81
|
+
regardless of the source of funding or obligations resulting
|
|
82
|
+
from the funding.
|
|
83
|
+
|
|
84
|
+
## Fair Use
|
|
85
|
+
|
|
86
|
+
You may have "fair use" rights for the software under the
|
|
87
|
+
law. These terms do not limit them.
|
|
88
|
+
|
|
89
|
+
## No Other Rights
|
|
90
|
+
|
|
91
|
+
These terms do not allow you to sublicense or transfer any of
|
|
92
|
+
your licenses to anyone else, or prevent the licensor from
|
|
93
|
+
granting licenses to anyone else. These terms do not imply
|
|
94
|
+
any other licenses.
|
|
95
|
+
|
|
96
|
+
## Patent Defense
|
|
97
|
+
|
|
98
|
+
If you make any written claim that the software infringes or
|
|
99
|
+
contributes to infringement of any patent, your patent license
|
|
100
|
+
for the software granted under these terms ends immediately. If
|
|
101
|
+
your company makes such a claim, your patent license ends
|
|
102
|
+
immediately for work on behalf of your company.
|
|
103
|
+
|
|
104
|
+
## Violations
|
|
105
|
+
|
|
106
|
+
The first time you are notified in writing that you have
|
|
107
|
+
violated any of these terms, or done anything with the software
|
|
108
|
+
not covered by your licenses, your licenses can nonetheless
|
|
109
|
+
continue if you come into full compliance with these terms,
|
|
110
|
+
and take practical steps to correct past violations, within
|
|
111
|
+
32 days of receiving notice. Otherwise, all your licenses
|
|
112
|
+
end immediately.
|
|
113
|
+
|
|
114
|
+
## No Liability
|
|
115
|
+
|
|
116
|
+
***As far as the law allows, the software comes as is, without
|
|
117
|
+
any warranty or condition, and the licensor will not be liable
|
|
118
|
+
to you for any damages arising out of these terms or the use
|
|
119
|
+
or nature of the software, under any kind of legal claim.***
|
|
120
|
+
|
|
121
|
+
## Definitions
|
|
122
|
+
|
|
123
|
+
The **licensor** is the individual or entity offering these
|
|
124
|
+
terms, and the **software** is the software the licensor makes
|
|
125
|
+
available under these terms.
|
|
126
|
+
|
|
127
|
+
**You** refers to the individual or entity agreeing to these
|
|
128
|
+
terms.
|
|
129
|
+
|
|
130
|
+
**Your company** is any legal entity, sole proprietorship,
|
|
131
|
+
or other kind of organization that you work for, plus all
|
|
132
|
+
organizations that have control over, are under the control of,
|
|
133
|
+
or are under common control with that organization. **Control**
|
|
134
|
+
means ownership of substantially all the assets of an entity,
|
|
135
|
+
or the power to direct its management and policies by vote,
|
|
136
|
+
contract, or otherwise. Control can be direct or indirect.
|
|
137
|
+
|
|
138
|
+
**Your licenses** are all the licenses granted to you for the
|
|
139
|
+
software under these terms.
|
|
140
|
+
|
|
141
|
+
**Use** means anything you do with the software requiring one
|
|
142
|
+
of your licenses.
|
|
143
|
+
|
minmaxrnc-0.1.0/NOTICE
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
minmaxrnc
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Alessandro Ronca.
|
|
4
|
+
|
|
5
|
+
This project is licensed under the PolyForm Noncommercial License 1.0.0.
|
|
6
|
+
|
|
7
|
+
Commercial use requires a separate written license agreement from the copyright holder.
|
|
8
|
+
|
|
9
|
+
This project uses third-party dependencies that are licensed separately.
|
|
10
|
+
See THIRD_PARTY_NOTICES.md for details.
|
minmaxrnc-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: minmaxrnc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MinMax Recurrent Neural Cascade
|
|
5
|
+
Author-email: Alessandro Ronca <alessandro.ronca@iris-ai.org>
|
|
6
|
+
License: This project is licensed under the PolyForm Noncommercial License 1.0.0.
|
|
7
|
+
|
|
8
|
+
Commercial use is not permitted under this public license.
|
|
9
|
+
For commercial licensing, contact:
|
|
10
|
+
|
|
11
|
+
Alessandro Ronca
|
|
12
|
+
alessandro.ronca@iris-ai.org
|
|
13
|
+
|
|
14
|
+
The full license text follows.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# PolyForm Noncommercial License 1.0.0
|
|
18
|
+
|
|
19
|
+
<https://polyformproject.org/licenses/noncommercial/1.0.0>
|
|
20
|
+
|
|
21
|
+
## Acceptance
|
|
22
|
+
|
|
23
|
+
In order to get any license under these terms, you must agree
|
|
24
|
+
to them as both strict obligations and conditions to all
|
|
25
|
+
your licenses.
|
|
26
|
+
|
|
27
|
+
## Copyright License
|
|
28
|
+
|
|
29
|
+
The licensor grants you a copyright license for the
|
|
30
|
+
software to do everything you might do with the software
|
|
31
|
+
that would otherwise infringe the licensor's copyright
|
|
32
|
+
in it for any permitted purpose. However, you may
|
|
33
|
+
only distribute the software according to [Distribution
|
|
34
|
+
License](#distribution-license) and make changes or new works
|
|
35
|
+
based on the software according to [Changes and New Works
|
|
36
|
+
License](#changes-and-new-works-license).
|
|
37
|
+
|
|
38
|
+
## Distribution License
|
|
39
|
+
|
|
40
|
+
The licensor grants you an additional copyright license
|
|
41
|
+
to distribute copies of the software. Your license
|
|
42
|
+
to distribute covers distributing the software with
|
|
43
|
+
changes and new works permitted by [Changes and New Works
|
|
44
|
+
License](#changes-and-new-works-license).
|
|
45
|
+
|
|
46
|
+
## Notices
|
|
47
|
+
|
|
48
|
+
You must ensure that anyone who gets a copy of any part of
|
|
49
|
+
the software from you also gets a copy of these terms or the
|
|
50
|
+
URL for them above, as well as copies of any plain-text lines
|
|
51
|
+
beginning with `Required Notice:` that the licensor provided
|
|
52
|
+
with the software. For example:
|
|
53
|
+
|
|
54
|
+
> Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
|
|
55
|
+
|
|
56
|
+
## Changes and New Works License
|
|
57
|
+
|
|
58
|
+
The licensor grants you an additional copyright license to
|
|
59
|
+
make changes and new works based on the software for any
|
|
60
|
+
permitted purpose.
|
|
61
|
+
|
|
62
|
+
## Patent License
|
|
63
|
+
|
|
64
|
+
The licensor grants you a patent license for the software that
|
|
65
|
+
covers patent claims the licensor can license, or becomes able
|
|
66
|
+
to license, that you would infringe by using the software.
|
|
67
|
+
|
|
68
|
+
## Noncommercial Purposes
|
|
69
|
+
|
|
70
|
+
Any noncommercial purpose is a permitted purpose.
|
|
71
|
+
|
|
72
|
+
## Personal Uses
|
|
73
|
+
|
|
74
|
+
Personal use for research, experiment, and testing for
|
|
75
|
+
the benefit of public knowledge, personal study, private
|
|
76
|
+
entertainment, hobby projects, amateur pursuits, or religious
|
|
77
|
+
observance, without any anticipated commercial application,
|
|
78
|
+
is use for a permitted purpose.
|
|
79
|
+
|
|
80
|
+
## Noncommercial Organizations
|
|
81
|
+
|
|
82
|
+
Use by any charitable organization, educational institution,
|
|
83
|
+
public research organization, public safety or health
|
|
84
|
+
organization, environmental protection organization,
|
|
85
|
+
or government institution is use for a permitted purpose
|
|
86
|
+
regardless of the source of funding or obligations resulting
|
|
87
|
+
from the funding.
|
|
88
|
+
|
|
89
|
+
## Fair Use
|
|
90
|
+
|
|
91
|
+
You may have "fair use" rights for the software under the
|
|
92
|
+
law. These terms do not limit them.
|
|
93
|
+
|
|
94
|
+
## No Other Rights
|
|
95
|
+
|
|
96
|
+
These terms do not allow you to sublicense or transfer any of
|
|
97
|
+
your licenses to anyone else, or prevent the licensor from
|
|
98
|
+
granting licenses to anyone else. These terms do not imply
|
|
99
|
+
any other licenses.
|
|
100
|
+
|
|
101
|
+
## Patent Defense
|
|
102
|
+
|
|
103
|
+
If you make any written claim that the software infringes or
|
|
104
|
+
contributes to infringement of any patent, your patent license
|
|
105
|
+
for the software granted under these terms ends immediately. If
|
|
106
|
+
your company makes such a claim, your patent license ends
|
|
107
|
+
immediately for work on behalf of your company.
|
|
108
|
+
|
|
109
|
+
## Violations
|
|
110
|
+
|
|
111
|
+
The first time you are notified in writing that you have
|
|
112
|
+
violated any of these terms, or done anything with the software
|
|
113
|
+
not covered by your licenses, your licenses can nonetheless
|
|
114
|
+
continue if you come into full compliance with these terms,
|
|
115
|
+
and take practical steps to correct past violations, within
|
|
116
|
+
32 days of receiving notice. Otherwise, all your licenses
|
|
117
|
+
end immediately.
|
|
118
|
+
|
|
119
|
+
## No Liability
|
|
120
|
+
|
|
121
|
+
***As far as the law allows, the software comes as is, without
|
|
122
|
+
any warranty or condition, and the licensor will not be liable
|
|
123
|
+
to you for any damages arising out of these terms or the use
|
|
124
|
+
or nature of the software, under any kind of legal claim.***
|
|
125
|
+
|
|
126
|
+
## Definitions
|
|
127
|
+
|
|
128
|
+
The **licensor** is the individual or entity offering these
|
|
129
|
+
terms, and the **software** is the software the licensor makes
|
|
130
|
+
available under these terms.
|
|
131
|
+
|
|
132
|
+
**You** refers to the individual or entity agreeing to these
|
|
133
|
+
terms.
|
|
134
|
+
|
|
135
|
+
**Your company** is any legal entity, sole proprietorship,
|
|
136
|
+
or other kind of organization that you work for, plus all
|
|
137
|
+
organizations that have control over, are under the control of,
|
|
138
|
+
or are under common control with that organization. **Control**
|
|
139
|
+
means ownership of substantially all the assets of an entity,
|
|
140
|
+
or the power to direct its management and policies by vote,
|
|
141
|
+
contract, or otherwise. Control can be direct or indirect.
|
|
142
|
+
|
|
143
|
+
**Your licenses** are all the licenses granted to you for the
|
|
144
|
+
software under these terms.
|
|
145
|
+
|
|
146
|
+
**Use** means anything you do with the software requiring one
|
|
147
|
+
of your licenses.
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
Project-URL: Homepage, https://github.com/minmaxrnc/model
|
|
151
|
+
Project-URL: Repository, https://github.com/minmaxrnc/model
|
|
152
|
+
Project-URL: Issues, https://github.com/minmaxrnc/model/issues
|
|
153
|
+
Project-URL: Documentation, https://github.com/minmaxrnc/model/blob/main/docs/model.md
|
|
154
|
+
Keywords: deep learning,sequence model,recurrent neural network,language model
|
|
155
|
+
Classifier: Development Status :: 3 - Alpha
|
|
156
|
+
Classifier: Intended Audience :: Science/Research
|
|
157
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
158
|
+
Classifier: Programming Language :: Python :: 3
|
|
159
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
160
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
161
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
162
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
163
|
+
Requires-Python: >=3.10
|
|
164
|
+
Description-Content-Type: text/markdown
|
|
165
|
+
License-File: LICENSE
|
|
166
|
+
License-File: NOTICE
|
|
167
|
+
Requires-Dist: torch>=2.0
|
|
168
|
+
Dynamic: license-file
|
|
169
|
+
|
|
170
|
+
# MinMax Recurrent Neural Cascades
|
|
171
|
+
|
|
172
|
+
A parallelisable recurrent sequence model built on the **MinMax operator** —
|
|
173
|
+
expressively powerful, efficiently implementable, and provably not affected by
|
|
174
|
+
vanishing or exploding gradient.
|
|
175
|
+
|
|
176
|
+
## Key properties
|
|
177
|
+
|
|
178
|
+
- **Perfect memory.** MinMax neurons can store and retain information arbitrarily long (formal
|
|
179
|
+
expressivity: all group-free functions).
|
|
180
|
+
|
|
181
|
+
- **Parallel training.** All hidden states across a sequence of length T are
|
|
182
|
+
computed simultaneously in O(log T) depth, with no sequential bottleneck.
|
|
183
|
+
- **Efficient inference.** Runs as a true RNN: O(1) compute and O(D) memory
|
|
184
|
+
per token, making it practical for long-context streaming generation.
|
|
185
|
+
- **Stable recurrence.** The MinMax operator is bounded and its
|
|
186
|
+
gradients cannot vanish or explode through the state path.
|
|
187
|
+
|
|
188
|
+
## The model
|
|
189
|
+
|
|
190
|
+
Each layer contains three sub-modules applied with pre-norm and residual
|
|
191
|
+
connections:
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
1. **MinMax Neuron** — the recurrent cell, updating a hidden state
|
|
195
|
+
`x_{t+1} = max(min(r_t, x_t), s_t)` element-wise in parallel via a prefix scan.
|
|
196
|
+
2. **Convolution** — one-step causal mixing.
|
|
197
|
+
3. **Feed-forward network** — feature mixing (gated or standard MLP).
|
|
198
|
+
|
|
199
|
+
See [arxiv.org/abs/2605.06384](https://arxiv.org/abs/2605.06384) for the formal description and analyses.
|
|
200
|
+
See [`docs/model.md`](docs/model.md) for the architecture reference.
|
|
201
|
+
|
|
202
|
+
## Installation
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
pip install minmaxrnc
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
PyTorch (≥ 2.0) is required. For GPU support, follow the
|
|
209
|
+
[PyTorch installation guide](https://pytorch.org/get-started/locally/) before
|
|
210
|
+
installing this package.
|
|
211
|
+
|
|
212
|
+
## Quick start
|
|
213
|
+
|
|
214
|
+
### Sequence backbone
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
import torch
|
|
218
|
+
from minmax import MinMaxRNC, MinMaxRNCConfig
|
|
219
|
+
|
|
220
|
+
model = MinMaxRNC(MinMaxRNCConfig.medium()) # d_model=512
|
|
221
|
+
|
|
222
|
+
u = torch.randn(batch_size, seq_len, 512)
|
|
223
|
+
|
|
224
|
+
# Parallel over the full sequence (training)
|
|
225
|
+
y = model(u, unroll_steps=seq_len) # (B, T, 512)
|
|
226
|
+
|
|
227
|
+
# Carry state across calls (streaming inference)
|
|
228
|
+
y, state = model(u, return_state=True)
|
|
229
|
+
y_next = model(u_next, state=state)
|
|
230
|
+
|
|
231
|
+
# Carry state across multi-step calls (streaming inference, 64 steps in parallel)
|
|
232
|
+
y, state = model(u, return_state=True, unroll_steps=64)
|
|
233
|
+
y_next = model(u_next, state=state)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### Language model
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
import torch
|
|
240
|
+
from minmax import MinMaxRNC_LM, MinMaxRNCLMConfig, MinMaxRNCConfig
|
|
241
|
+
|
|
242
|
+
model = MinMaxRNC_LM(
|
|
243
|
+
vocab_size = 50257,
|
|
244
|
+
cfg = MinMaxRNCLMConfig(backbone=MinMaxRNCConfig.medium()),
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
tokens = torch.randint(0, 50257, (batch_size, seq_len))
|
|
248
|
+
logits = model(tokens) # (B, T, vocab_size)
|
|
249
|
+
|
|
250
|
+
# Autoregressive generation
|
|
251
|
+
logits, state = model(tokens[:, :1], return_state=True)
|
|
252
|
+
for _ in range(max_new_tokens):
|
|
253
|
+
next_tok = logits[:, -1].argmax(-1, keepdim=True)
|
|
254
|
+
logits, state = model(next_tok, state=state, return_state=True)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Custom configuration
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
from minmax import MinMaxRNC, MinMaxRNCConfig
|
|
261
|
+
|
|
262
|
+
cfg = MinMaxRNCConfig(
|
|
263
|
+
d_model = 768,
|
|
264
|
+
n_layers = 12,
|
|
265
|
+
d_state = 192, # hidden-state dimension per neuron
|
|
266
|
+
norm = 'rmsnorm',
|
|
267
|
+
ffn_type = 'gated',
|
|
268
|
+
ffn_act_fn = 'swish', # → SwiGLU
|
|
269
|
+
output_gate = True,
|
|
270
|
+
use_postlayers_ffn = True,
|
|
271
|
+
)
|
|
272
|
+
model = MinMaxRNC(cfg)
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
#### Preset sizes
|
|
276
|
+
|
|
277
|
+
| Preset | `d_model` | `n_layers` | `d_state` | Parameters (backbone) | Parameters (LM, GPT-2 vocab) |
|
|
278
|
+
|----------|-----------|------------|-----------|-----------------------|------------------------------|
|
|
279
|
+
| `small` | 90 | 2 | 40 | ~0.1 M | ~4.6 M |
|
|
280
|
+
| `medium` | 512 | 8 | 512 | ~16.6 M | ~42.4 M |
|
|
281
|
+
| `large` | 728 | 12 | 1456 | ~75.9 M | ~112.5 M |
|
|
282
|
+
|
|
283
|
+
## Running the tests
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
python -m unittest src.minmax.test -v
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
## How to cite
|
|
290
|
+
|
|
291
|
+
```bibtex
|
|
292
|
+
@misc{ronca2026minmaxpaper,
|
|
293
|
+
title={{MinMax} Recurrent Neural Cascades},
|
|
294
|
+
author={Alessandro Ronca},
|
|
295
|
+
year={2026},
|
|
296
|
+
eprint={2605.06384},
|
|
297
|
+
archivePrefix={arXiv},
|
|
298
|
+
primaryClass={cs.LG},
|
|
299
|
+
url={https://arxiv.org/abs/2605.06384},
|
|
300
|
+
}
|
|
301
|
+
@software{ronca2026minmaxcode,
|
|
302
|
+
author = {Alessandro Ronca},
|
|
303
|
+
title = {{MinMax} Recurrent Neural Cascades},
|
|
304
|
+
year = {2026},
|
|
305
|
+
url = {https://github.com/minmaxrnc/model},
|
|
306
|
+
version = {0.1.0},
|
|
307
|
+
}
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
## License
|
|
312
|
+
|
|
313
|
+
This project is source-available under the PolyForm Noncommercial License 1.0.0.
|
|
314
|
+
|
|
315
|
+
You may use, copy, modify, and distribute this software only for non-commercial purposes under the terms of that license.
|
|
316
|
+
|
|
317
|
+
Commercial use is not permitted without a separate commercial license from the copyright holder.
|
|
318
|
+
|
|
319
|
+
For commercial licensing, contact:
|
|
320
|
+
|
|
321
|
+
**Alessandro Ronca**
|
|
322
|
+
alessandro.ronca@iris-ai.org
|
|
323
|
+
|
|
324
|
+
## Third-party dependencies
|
|
325
|
+
|
|
326
|
+
This project depends on third-party software, including Python and PyTorch.
|
|
327
|
+
These dependencies are licensed separately by their respective copyright holders.
|
|
328
|
+
|
|
329
|
+
See `THIRD_PARTY_NOTICES.md` for details.
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# MinMax Recurrent Neural Cascades
|
|
2
|
+
|
|
3
|
+
A parallelisable recurrent sequence model built on the **MinMax operator** —
|
|
4
|
+
expressively powerful, efficiently implementable, and provably not affected by
|
|
5
|
+
vanishing or exploding gradient.
|
|
6
|
+
|
|
7
|
+
## Key properties
|
|
8
|
+
|
|
9
|
+
- **Perfect memory.** MinMax neurons can store and retain information arbitrarily long (formal
|
|
10
|
+
expressivity: all group-free functions).
|
|
11
|
+
|
|
12
|
+
- **Parallel training.** All hidden states across a sequence of length T are
|
|
13
|
+
computed simultaneously in O(log T) depth, with no sequential bottleneck.
|
|
14
|
+
- **Efficient inference.** Runs as a true RNN: O(1) compute and O(D) memory
|
|
15
|
+
per token, making it practical for long-context streaming generation.
|
|
16
|
+
- **Stable recurrence.** The MinMax operator is bounded and its
|
|
17
|
+
gradients cannot vanish or explode through the state path.
|
|
18
|
+
|
|
19
|
+
## The model
|
|
20
|
+
|
|
21
|
+
Each layer contains three sub-modules applied with pre-norm and residual
|
|
22
|
+
connections:
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
1. **MinMax Neuron** — the recurrent cell, updating a hidden state
|
|
26
|
+
`x_{t+1} = max(min(r_t, x_t), s_t)` element-wise in parallel via a prefix scan.
|
|
27
|
+
2. **Convolution** — one-step causal mixing.
|
|
28
|
+
3. **Feed-forward network** — feature mixing (gated or standard MLP).
|
|
29
|
+
|
|
30
|
+
See [arxiv.org/abs/2605.06384](https://arxiv.org/abs/2605.06384) for the formal description and analyses.
|
|
31
|
+
See [`docs/model.md`](docs/model.md) for the architecture reference.
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install minmaxrnc
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
PyTorch (≥ 2.0) is required. For GPU support, follow the
|
|
40
|
+
[PyTorch installation guide](https://pytorch.org/get-started/locally/) before
|
|
41
|
+
installing this package.
|
|
42
|
+
|
|
43
|
+
## Quick start
|
|
44
|
+
|
|
45
|
+
### Sequence backbone
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import torch
|
|
49
|
+
from minmax import MinMaxRNC, MinMaxRNCConfig
|
|
50
|
+
|
|
51
|
+
model = MinMaxRNC(MinMaxRNCConfig.medium()) # d_model=512
|
|
52
|
+
|
|
53
|
+
u = torch.randn(batch_size, seq_len, 512)
|
|
54
|
+
|
|
55
|
+
# Parallel over the full sequence (training)
|
|
56
|
+
y = model(u, unroll_steps=seq_len) # (B, T, 512)
|
|
57
|
+
|
|
58
|
+
# Carry state across calls (streaming inference)
|
|
59
|
+
y, state = model(u, return_state=True)
|
|
60
|
+
y_next = model(u_next, state=state)
|
|
61
|
+
|
|
62
|
+
# Carry state across multi-step calls (streaming inference, 64 steps in parallel)
|
|
63
|
+
y, state = model(u, return_state=True, unroll_steps=64)
|
|
64
|
+
y_next = model(u_next, state=state)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Language model
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
import torch
|
|
71
|
+
from minmax import MinMaxRNC_LM, MinMaxRNCLMConfig, MinMaxRNCConfig
|
|
72
|
+
|
|
73
|
+
model = MinMaxRNC_LM(
|
|
74
|
+
vocab_size = 50257,
|
|
75
|
+
cfg = MinMaxRNCLMConfig(backbone=MinMaxRNCConfig.medium()),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
tokens = torch.randint(0, 50257, (batch_size, seq_len))
|
|
79
|
+
logits = model(tokens) # (B, T, vocab_size)
|
|
80
|
+
|
|
81
|
+
# Autoregressive generation
|
|
82
|
+
logits, state = model(tokens[:, :1], return_state=True)
|
|
83
|
+
for _ in range(max_new_tokens):
|
|
84
|
+
next_tok = logits[:, -1].argmax(-1, keepdim=True)
|
|
85
|
+
logits, state = model(next_tok, state=state, return_state=True)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Custom configuration
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from minmax import MinMaxRNC, MinMaxRNCConfig
|
|
92
|
+
|
|
93
|
+
cfg = MinMaxRNCConfig(
|
|
94
|
+
d_model = 768,
|
|
95
|
+
n_layers = 12,
|
|
96
|
+
d_state = 192, # hidden-state dimension per neuron
|
|
97
|
+
norm = 'rmsnorm',
|
|
98
|
+
ffn_type = 'gated',
|
|
99
|
+
ffn_act_fn = 'swish', # → SwiGLU
|
|
100
|
+
output_gate = True,
|
|
101
|
+
use_postlayers_ffn = True,
|
|
102
|
+
)
|
|
103
|
+
model = MinMaxRNC(cfg)
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
#### Preset sizes
|
|
107
|
+
|
|
108
|
+
| Preset | `d_model` | `n_layers` | `d_state` | Parameters (backbone) | Parameters (LM, GPT-2 vocab) |
|
|
109
|
+
|----------|-----------|------------|-----------|-----------------------|------------------------------|
|
|
110
|
+
| `small` | 90 | 2 | 40 | ~0.1 M | ~4.6 M |
|
|
111
|
+
| `medium` | 512 | 8 | 512 | ~16.6 M | ~42.4 M |
|
|
112
|
+
| `large` | 728 | 12 | 1456 | ~75.9 M | ~112.5 M |
|
|
113
|
+
|
|
114
|
+
## Running the tests
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
python -m unittest src.minmax.test -v
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## How to cite
|
|
121
|
+
|
|
122
|
+
```bibtex
|
|
123
|
+
@misc{ronca2026minmaxpaper,
|
|
124
|
+
title={{MinMax} Recurrent Neural Cascades},
|
|
125
|
+
author={Alessandro Ronca},
|
|
126
|
+
year={2026},
|
|
127
|
+
eprint={2605.06384},
|
|
128
|
+
archivePrefix={arXiv},
|
|
129
|
+
primaryClass={cs.LG},
|
|
130
|
+
url={https://arxiv.org/abs/2605.06384},
|
|
131
|
+
}
|
|
132
|
+
@software{ronca2026minmaxcode,
|
|
133
|
+
author = {Alessandro Ronca},
|
|
134
|
+
title = {{MinMax} Recurrent Neural Cascades},
|
|
135
|
+
year = {2026},
|
|
136
|
+
url = {https://github.com/minmaxrnc/model},
|
|
137
|
+
version = {0.1.0},
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
This project is source-available under the PolyForm Noncommercial License 1.0.0.
|
|
145
|
+
|
|
146
|
+
You may use, copy, modify, and distribute this software only for non-commercial purposes under the terms of that license.
|
|
147
|
+
|
|
148
|
+
Commercial use is not permitted without a separate commercial license from the copyright holder.
|
|
149
|
+
|
|
150
|
+
For commercial licensing, contact:
|
|
151
|
+
|
|
152
|
+
**Alessandro Ronca**
|
|
153
|
+
alessandro.ronca@iris-ai.org
|
|
154
|
+
|
|
155
|
+
## Third-party dependencies
|
|
156
|
+
|
|
157
|
+
This project depends on third-party software, including Python and PyTorch.
|
|
158
|
+
These dependencies are licensed separately by their respective copyright holders.
|
|
159
|
+
|
|
160
|
+
See `THIRD_PARTY_NOTICES.md` for details.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Alessandro Ronca
|
|
2
|
+
# SPDX-License-Identifier: PolyForm-Noncommercial-1.0.0
|
|
3
|
+
|
|
4
|
+
[build-system]
|
|
5
|
+
requires = ["setuptools>=77.0.3"]
|
|
6
|
+
build-backend = "setuptools.build_meta"
|
|
7
|
+
|
|
8
|
+
[project]
|
|
9
|
+
name = "minmaxrnc"
|
|
10
|
+
version = "0.1.0"
|
|
11
|
+
description = "MinMax Recurrent Neural Cascade"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.10"
|
|
14
|
+
license = {file = "LICENSE"}
|
|
15
|
+
authors = [
|
|
16
|
+
{ name = "Alessandro Ronca", email = "alessandro.ronca@iris-ai.org" }
|
|
17
|
+
]
|
|
18
|
+
keywords = ["deep learning", "sequence model", "recurrent neural network", "language model"]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 3 - Alpha",
|
|
21
|
+
"Intended Audience :: Science/Research",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
23
|
+
"Programming Language :: Python :: 3",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Programming Language :: Python :: 3.13",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"torch>=2.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/minmaxrnc/model"
|
|
35
|
+
Repository = "https://github.com/minmaxrnc/model"
|
|
36
|
+
Issues = "https://github.com/minmaxrnc/model/issues"
|
|
37
|
+
Documentation = "https://github.com/minmaxrnc/model/blob/main/docs/model.md"
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.packages.find]
|
|
40
|
+
where = ["src"]
|
|
41
|
+
|
|
42
|
+
[tool.pytest.ini_options]
|
|
43
|
+
testpaths = ["tests"]
|