qorva 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qorva-0.1.0/LICENSE +17 -0
- qorva-0.1.0/PKG-INFO +113 -0
- qorva-0.1.0/README.md +83 -0
- qorva-0.1.0/pyproject.toml +48 -0
- qorva-0.1.0/qorva/__init__.py +32 -0
- qorva-0.1.0/qorva/cli.py +48 -0
- qorva-0.1.0/qorva/model/__init__.py +1 -0
- qorva-0.1.0/qorva/model/architecture.py +576 -0
- qorva-0.1.0/qorva/model/config.py +139 -0
- qorva-0.1.0/qorva/utils/__init__.py +1 -0
- qorva-0.1.0/qorva/utils/generation.py +152 -0
- qorva-0.1.0/qorva/utils/loading.py +230 -0
- qorva-0.1.0/qorva.egg-info/PKG-INFO +113 -0
- qorva-0.1.0/qorva.egg-info/SOURCES.txt +18 -0
- qorva-0.1.0/qorva.egg-info/dependency_links.txt +1 -0
- qorva-0.1.0/qorva.egg-info/entry_points.txt +2 -0
- qorva-0.1.0/qorva.egg-info/requires.txt +13 -0
- qorva-0.1.0/qorva.egg-info/top_level.txt +1 -0
- qorva-0.1.0/setup.cfg +4 -0
- qorva-0.1.0/tests/test_basic.py +87 -0
qorva-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
Copyright 2026 Qorva Contributors
|
|
6
|
+
|
|
7
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
you may not use this file except in compliance with the License.
|
|
9
|
+
You may obtain a copy of the License at
|
|
10
|
+
|
|
11
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
|
|
13
|
+
Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
See the License for the specific language governing permissions and
|
|
17
|
+
limitations under the License.
|
qorva-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: qorva
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Hybrid Mamba-2 + Attention + MoE language models, easy to load and run.
|
|
5
|
+
Author-email: Your Name <you@example.com>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/yourusername/qorva
|
|
8
|
+
Project-URL: Repository, https://github.com/yourusername/qorva
|
|
9
|
+
Project-URL: Issues, https://github.com/yourusername/qorva/issues
|
|
10
|
+
Keywords: language-model,mamba,attention,mixture-of-experts,transformer,pytorch
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Requires-Python: >=3.9
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: torch>=2.1.0
|
|
19
|
+
Requires-Dist: tiktoken>=0.5.0
|
|
20
|
+
Requires-Dist: huggingface_hub>=0.20.0
|
|
21
|
+
Requires-Dist: pyyaml>=6.0
|
|
22
|
+
Requires-Dist: numpy>=1.24.0
|
|
23
|
+
Provides-Extra: train
|
|
24
|
+
Requires-Dist: datasets>=2.16.0; extra == "train"
|
|
25
|
+
Requires-Dist: tqdm>=4.66.0; extra == "train"
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
28
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# qorva
|
|
32
|
+
|
|
33
|
+
Hybrid **Mamba-2 + Attention + Mixture-of-Experts** language models — easy to load, run, and fine-tune.
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install qorva
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Quick start
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from qorva import load_pretrained, generate
|
|
43
|
+
|
|
44
|
+
model = load_pretrained("your_username/qorva-model-fast")
|
|
45
|
+
print(generate(model, "Once upon a time in a small village,", max_new_tokens=100))
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Or, using the bound method directly:
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from qorva import load_pretrained
|
|
52
|
+
|
|
53
|
+
model = load_pretrained("your_username/qorva-model-fast")
|
|
54
|
+
print(model.generate("Once upon a time", max_new_tokens=100, temperature=0.8))
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Command line
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
qorva-generate --model your_username/qorva-model-fast --prompt "Once upon a time"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Build from scratch
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from qorva import QorvaModel, QorvaConfig
|
|
67
|
+
|
|
68
|
+
cfg = QorvaConfig.nano_full() # ~370M, Mamba + Attention + MoE
|
|
69
|
+
model = QorvaModel(cfg)
|
|
70
|
+
print(f"{model.num_parameters()/1e6:.1f}M parameters")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Available presets:
|
|
74
|
+
- `QorvaConfig.micro()` — ~32M, for fast experimentation
|
|
75
|
+
- `QorvaConfig.nano_full()` — ~370M, Mamba + Attention + MoE
|
|
76
|
+
- `QorvaConfig.nano_baseline()` — ~370M, Attention-only baseline
|
|
77
|
+
|
|
78
|
+
## Push your trained model to HuggingFace
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from qorva import push_to_hub
|
|
82
|
+
|
|
83
|
+
push_to_hub(model, "your_username/qorva-nano-370m", token="hf_...")
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Perplexity evaluation
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from qorva.utils.generation import compute_perplexity
|
|
90
|
+
|
|
91
|
+
ppl = compute_perplexity(model, "Some evaluation text here...")
|
|
92
|
+
print(f"PPL: {ppl:.2f}")
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Architecture
|
|
96
|
+
|
|
97
|
+
Each layer mixes two branches with learnable weights:
|
|
98
|
+
|
|
99
|
+
```
|
|
100
|
+
x' = x + alpha * Mamba2(LN(x)) + beta * GQAttention(LN(x))
|
|
101
|
+
x = x' + LatentMoE(LN(x')) # stage="full" only
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
| Stage | Branches |
|
|
105
|
+
|---|---|
|
|
106
|
+
| `attn_only` | Attention + FFN |
|
|
107
|
+
| `mamba_only` | Mamba + FFN |
|
|
108
|
+
| `hybrid` | Mamba + Attention + FFN |
|
|
109
|
+
| `full` | Mamba + Attention + LatentMoE |
|
|
110
|
+
|
|
111
|
+
## License
|
|
112
|
+
|
|
113
|
+
Apache-2.0
|
qorva-0.1.0/README.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# qorva
|
|
2
|
+
|
|
3
|
+
Hybrid **Mamba-2 + Attention + Mixture-of-Experts** language models — easy to load, run, and fine-tune.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install qorva
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## Quick start
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from qorva import load_pretrained, generate
|
|
13
|
+
|
|
14
|
+
model = load_pretrained("your_username/qorva-model-fast")
|
|
15
|
+
print(generate(model, "Once upon a time in a small village,", max_new_tokens=100))
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Or, using the bound method directly:
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from qorva import load_pretrained
|
|
22
|
+
|
|
23
|
+
model = load_pretrained("your_username/qorva-model-fast")
|
|
24
|
+
print(model.generate("Once upon a time", max_new_tokens=100, temperature=0.8))
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Command line
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
qorva-generate --model your_username/qorva-model-fast --prompt "Once upon a time"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Build from scratch
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from qorva import QorvaModel, QorvaConfig
|
|
37
|
+
|
|
38
|
+
cfg = QorvaConfig.nano_full() # ~370M, Mamba + Attention + MoE
|
|
39
|
+
model = QorvaModel(cfg)
|
|
40
|
+
print(f"{model.num_parameters()/1e6:.1f}M parameters")
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Available presets:
|
|
44
|
+
- `QorvaConfig.micro()` — ~32M, for fast experimentation
|
|
45
|
+
- `QorvaConfig.nano_full()` — ~370M, Mamba + Attention + MoE
|
|
46
|
+
- `QorvaConfig.nano_baseline()` — ~370M, Attention-only baseline
|
|
47
|
+
|
|
48
|
+
## Push your trained model to HuggingFace
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from qorva import push_to_hub
|
|
52
|
+
|
|
53
|
+
push_to_hub(model, "your_username/qorva-nano-370m", token="hf_...")
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Perplexity evaluation
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from qorva.utils.generation import compute_perplexity
|
|
60
|
+
|
|
61
|
+
ppl = compute_perplexity(model, "Some evaluation text here...")
|
|
62
|
+
print(f"PPL: {ppl:.2f}")
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Architecture
|
|
66
|
+
|
|
67
|
+
Each layer mixes two branches with learnable weights:
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
x' = x + alpha * Mamba2(LN(x)) + beta * GQAttention(LN(x))
|
|
71
|
+
x = x' + LatentMoE(LN(x')) # stage="full" only
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
| Stage | Branches |
|
|
75
|
+
|---|---|
|
|
76
|
+
| `attn_only` | Attention + FFN |
|
|
77
|
+
| `mamba_only` | Mamba + FFN |
|
|
78
|
+
| `hybrid` | Mamba + Attention + FFN |
|
|
79
|
+
| `full` | Mamba + Attention + LatentMoE |
|
|
80
|
+
|
|
81
|
+
## License
|
|
82
|
+
|
|
83
|
+
Apache-2.0
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "qorva"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Hybrid Mamba-2 + Attention + MoE language models, easy to load and run."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "Apache-2.0" }
|
|
11
|
+
authors = [{ name = "Your Name", email = "you@example.com" }]
|
|
12
|
+
requires-python = ">=3.9"
|
|
13
|
+
keywords = ["language-model", "mamba", "attention", "mixture-of-experts", "transformer", "pytorch"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: Apache Software License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
dependencies = [
|
|
22
|
+
"torch>=2.1.0",
|
|
23
|
+
"tiktoken>=0.5.0",
|
|
24
|
+
"huggingface_hub>=0.20.0",
|
|
25
|
+
"pyyaml>=6.0",
|
|
26
|
+
"numpy>=1.24.0",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
train = [
|
|
31
|
+
"datasets>=2.16.0",
|
|
32
|
+
"tqdm>=4.66.0",
|
|
33
|
+
]
|
|
34
|
+
dev = [
|
|
35
|
+
"pytest>=7.0.0",
|
|
36
|
+
"black>=23.0.0",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://github.com/yourusername/qorva"
|
|
41
|
+
Repository = "https://github.com/yourusername/qorva"
|
|
42
|
+
Issues = "https://github.com/yourusername/qorva/issues"
|
|
43
|
+
|
|
44
|
+
[project.scripts]
|
|
45
|
+
qorva-generate = "qorva.cli:main"
|
|
46
|
+
|
|
47
|
+
[tool.setuptools.packages.find]
|
|
48
|
+
include = ["qorva*"]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
qorva — Hybrid Mamba + Attention + MoE Language Models
|
|
3
|
+
=========================================================
|
|
4
|
+
|
|
5
|
+
Quick start:
|
|
6
|
+
|
|
7
|
+
from qorva import QorvaModel, QorvaConfig
|
|
8
|
+
from qorva import load_pretrained
|
|
9
|
+
|
|
10
|
+
model = load_pretrained("your_username/qorva-model-fast")
|
|
11
|
+
text = model.generate("Once upon a time", max_new_tokens=100)
|
|
12
|
+
print(text)
|
|
13
|
+
|
|
14
|
+
Or build from scratch:
|
|
15
|
+
|
|
16
|
+
cfg = QorvaConfig.nano_full()
|
|
17
|
+
model = QorvaModel(cfg)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from qorva.model.config import QorvaConfig
|
|
21
|
+
from qorva.model.architecture import QorvaModel
|
|
22
|
+
from qorva.utils.loading import load_pretrained, push_to_hub
|
|
23
|
+
from qorva.utils.generation import generate
|
|
24
|
+
|
|
25
|
+
__version__ = "0.1.0"
|
|
26
|
+
__all__ = [
|
|
27
|
+
"QorvaModel",
|
|
28
|
+
"QorvaConfig",
|
|
29
|
+
"load_pretrained",
|
|
30
|
+
"push_to_hub",
|
|
31
|
+
"generate",
|
|
32
|
+
]
|
qorva-0.1.0/qorva/cli.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
qorva/cli.py
|
|
3
|
+
============
|
|
4
|
+
Command-line interface for the qorva package.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
qorva-generate --model username/qorva-model --prompt "Once upon a time"
|
|
8
|
+
qorva-generate --model ./checkpoints/qorva_final.pt --prompt "Hello" --max_new_tokens 200
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def main():
|
|
15
|
+
parser = argparse.ArgumentParser(
|
|
16
|
+
prog="qorva-generate",
|
|
17
|
+
description="Generate text using a Qorva model (local or HuggingFace Hub).",
|
|
18
|
+
)
|
|
19
|
+
parser.add_argument("--model", required=True,
|
|
20
|
+
help="HF repo id or local path to checkpoint")
|
|
21
|
+
parser.add_argument("--prompt", required=True, help="Text prompt")
|
|
22
|
+
parser.add_argument("--max_new_tokens", type=int, default=150)
|
|
23
|
+
parser.add_argument("--temperature", type=float, default=0.8)
|
|
24
|
+
parser.add_argument("--top_k", type=int, default=50)
|
|
25
|
+
parser.add_argument("--top_p", type=float, default=1.0)
|
|
26
|
+
parser.add_argument("--device", default="auto")
|
|
27
|
+
parser.add_argument("--token", default=None, help="HF token for private repos")
|
|
28
|
+
args = parser.parse_args()
|
|
29
|
+
|
|
30
|
+
from qorva import load_pretrained, generate
|
|
31
|
+
|
|
32
|
+
print(f"Loading {args.model}...")
|
|
33
|
+
model = load_pretrained(args.model, device=args.device, token=args.token)
|
|
34
|
+
print(f"Loaded {model.num_parameters()/1e6:.1f}M parameter model\n")
|
|
35
|
+
|
|
36
|
+
output = generate(
|
|
37
|
+
model,
|
|
38
|
+
args.prompt,
|
|
39
|
+
max_new_tokens=args.max_new_tokens,
|
|
40
|
+
temperature=args.temperature,
|
|
41
|
+
top_k=args.top_k,
|
|
42
|
+
top_p=args.top_p,
|
|
43
|
+
)
|
|
44
|
+
print(output)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == "__main__":
|
|
48
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|