qorva 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
qorva-0.1.0/LICENSE ADDED
@@ -0,0 +1,17 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ Copyright 2026 Qorva Contributors
6
+
7
+ Licensed under the Apache License, Version 2.0 (the "License");
8
+ you may not use this file except in compliance with the License.
9
+ You may obtain a copy of the License at
10
+
11
+ http://www.apache.org/licenses/LICENSE-2.0
12
+
13
+ Unless required by applicable law or agreed to in writing, software
14
+ distributed under the License is distributed on an "AS IS" BASIS,
15
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ See the License for the specific language governing permissions and
17
+ limitations under the License.
qorva-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.4
2
+ Name: qorva
3
+ Version: 0.1.0
4
+ Summary: Hybrid Mamba-2 + Attention + MoE language models, easy to load and run.
5
+ Author-email: Your Name <you@example.com>
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/yourusername/qorva
8
+ Project-URL: Repository, https://github.com/yourusername/qorva
9
+ Project-URL: Issues, https://github.com/yourusername/qorva/issues
10
+ Keywords: language-model,mamba,attention,mixture-of-experts,transformer,pytorch
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: torch>=2.1.0
19
+ Requires-Dist: tiktoken>=0.5.0
20
+ Requires-Dist: huggingface_hub>=0.20.0
21
+ Requires-Dist: pyyaml>=6.0
22
+ Requires-Dist: numpy>=1.24.0
23
+ Provides-Extra: train
24
+ Requires-Dist: datasets>=2.16.0; extra == "train"
25
+ Requires-Dist: tqdm>=4.66.0; extra == "train"
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
28
+ Requires-Dist: black>=23.0.0; extra == "dev"
29
+ Dynamic: license-file
30
+
31
+ # qorva
32
+
33
+ Hybrid **Mamba-2 + Attention + Mixture-of-Experts** language models — easy to load, run, and fine-tune.
34
+
35
+ ```bash
36
+ pip install qorva
37
+ ```
38
+
39
+ ## Quick start
40
+
41
+ ```python
42
+ from qorva import load_pretrained, generate
43
+
44
+ model = load_pretrained("your_username/qorva-model-fast")
45
+ print(generate(model, "Once upon a time in a small village,", max_new_tokens=100))
46
+ ```
47
+
48
+ Or, using the bound method directly:
49
+
50
+ ```python
51
+ from qorva import load_pretrained
52
+
53
+ model = load_pretrained("your_username/qorva-model-fast")
54
+ print(model.generate("Once upon a time", max_new_tokens=100, temperature=0.8))
55
+ ```
56
+
57
+ ## Command line
58
+
59
+ ```bash
60
+ qorva-generate --model your_username/qorva-model-fast --prompt "Once upon a time"
61
+ ```
62
+
63
+ ## Build from scratch
64
+
65
+ ```python
66
+ from qorva import QorvaModel, QorvaConfig
67
+
68
+ cfg = QorvaConfig.nano_full() # ~370M, Mamba + Attention + MoE
69
+ model = QorvaModel(cfg)
70
+ print(f"{model.num_parameters()/1e6:.1f}M parameters")
71
+ ```
72
+
73
+ Available presets:
74
+ - `QorvaConfig.micro()` — ~32M, for fast experimentation
75
+ - `QorvaConfig.nano_full()` — ~370M, Mamba + Attention + MoE
76
+ - `QorvaConfig.nano_baseline()` — ~370M, Attention-only baseline
77
+
78
+ ## Push your trained model to HuggingFace
79
+
80
+ ```python
81
+ from qorva import push_to_hub
82
+
83
+ push_to_hub(model, "your_username/qorva-nano-370m", token="hf_...")
84
+ ```
85
+
86
+ ## Perplexity evaluation
87
+
88
+ ```python
89
+ from qorva.utils.generation import compute_perplexity
90
+
91
+ ppl = compute_perplexity(model, "Some evaluation text here...")
92
+ print(f"PPL: {ppl:.2f}")
93
+ ```
94
+
95
+ ## Architecture
96
+
97
+ Each layer mixes two branches with learnable weights:
98
+
99
+ ```
100
+ x' = x + alpha * Mamba2(LN(x)) + beta * GQAttention(LN(x))
101
+ x = x' + LatentMoE(LN(x')) # stage="full" only
102
+ ```
103
+
104
+ | Stage | Branches |
105
+ |---|---|
106
+ | `attn_only` | Attention + FFN |
107
+ | `mamba_only` | Mamba + FFN |
108
+ | `hybrid` | Mamba + Attention + FFN |
109
+ | `full` | Mamba + Attention + LatentMoE |
110
+
111
+ ## License
112
+
113
+ Apache-2.0
qorva-0.1.0/README.md ADDED
@@ -0,0 +1,83 @@
1
+ # qorva
2
+
3
+ Hybrid **Mamba-2 + Attention + Mixture-of-Experts** language models — easy to load, run, and fine-tune.
4
+
5
+ ```bash
6
+ pip install qorva
7
+ ```
8
+
9
+ ## Quick start
10
+
11
+ ```python
12
+ from qorva import load_pretrained, generate
13
+
14
+ model = load_pretrained("your_username/qorva-model-fast")
15
+ print(generate(model, "Once upon a time in a small village,", max_new_tokens=100))
16
+ ```
17
+
18
+ Or, using the bound method directly:
19
+
20
+ ```python
21
+ from qorva import load_pretrained
22
+
23
+ model = load_pretrained("your_username/qorva-model-fast")
24
+ print(model.generate("Once upon a time", max_new_tokens=100, temperature=0.8))
25
+ ```
26
+
27
+ ## Command line
28
+
29
+ ```bash
30
+ qorva-generate --model your_username/qorva-model-fast --prompt "Once upon a time"
31
+ ```
32
+
33
+ ## Build from scratch
34
+
35
+ ```python
36
+ from qorva import QorvaModel, QorvaConfig
37
+
38
+ cfg = QorvaConfig.nano_full() # ~370M, Mamba + Attention + MoE
39
+ model = QorvaModel(cfg)
40
+ print(f"{model.num_parameters()/1e6:.1f}M parameters")
41
+ ```
42
+
43
+ Available presets:
44
+ - `QorvaConfig.micro()` — ~32M, for fast experimentation
45
+ - `QorvaConfig.nano_full()` — ~370M, Mamba + Attention + MoE
46
+ - `QorvaConfig.nano_baseline()` — ~370M, Attention-only baseline
47
+
48
+ ## Push your trained model to HuggingFace
49
+
50
+ ```python
51
+ from qorva import push_to_hub
52
+
53
+ push_to_hub(model, "your_username/qorva-nano-370m", token="hf_...")
54
+ ```
55
+
56
+ ## Perplexity evaluation
57
+
58
+ ```python
59
+ from qorva.utils.generation import compute_perplexity
60
+
61
+ ppl = compute_perplexity(model, "Some evaluation text here...")
62
+ print(f"PPL: {ppl:.2f}")
63
+ ```
64
+
65
+ ## Architecture
66
+
67
+ Each layer mixes two branches with learnable weights:
68
+
69
+ ```
70
+ x' = x + alpha * Mamba2(LN(x)) + beta * GQAttention(LN(x))
71
+ x = x' + LatentMoE(LN(x')) # stage="full" only
72
+ ```
73
+
74
+ | Stage | Branches |
75
+ |---|---|
76
+ | `attn_only` | Attention + FFN |
77
+ | `mamba_only` | Mamba + FFN |
78
+ | `hybrid` | Mamba + Attention + FFN |
79
+ | `full` | Mamba + Attention + LatentMoE |
80
+
81
+ ## License
82
+
83
+ Apache-2.0
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "qorva"
7
+ version = "0.1.0"
8
+ description = "Hybrid Mamba-2 + Attention + MoE language models, easy to load and run."
9
+ readme = "README.md"
10
+ license = { text = "Apache-2.0" }
11
+ authors = [{ name = "Your Name", email = "you@example.com" }]
12
+ requires-python = ">=3.9"
13
+ keywords = ["language-model", "mamba", "attention", "mixture-of-experts", "transformer", "pytorch"]
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: Apache Software License",
17
+ "Operating System :: OS Independent",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
19
+ ]
20
+
21
+ dependencies = [
22
+ "torch>=2.1.0",
23
+ "tiktoken>=0.5.0",
24
+ "huggingface_hub>=0.20.0",
25
+ "pyyaml>=6.0",
26
+ "numpy>=1.24.0",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ train = [
31
+ "datasets>=2.16.0",
32
+ "tqdm>=4.66.0",
33
+ ]
34
+ dev = [
35
+ "pytest>=7.0.0",
36
+ "black>=23.0.0",
37
+ ]
38
+
39
+ [project.urls]
40
+ Homepage = "https://github.com/yourusername/qorva"
41
+ Repository = "https://github.com/yourusername/qorva"
42
+ Issues = "https://github.com/yourusername/qorva/issues"
43
+
44
+ [project.scripts]
45
+ qorva-generate = "qorva.cli:main"
46
+
47
+ [tool.setuptools.packages.find]
48
+ include = ["qorva*"]
@@ -0,0 +1,32 @@
1
+ """
2
+ qorva — Hybrid Mamba + Attention + MoE Language Models
3
+ =========================================================
4
+
5
+ Quick start:
6
+
7
+ from qorva import QorvaModel, QorvaConfig
8
+ from qorva import load_pretrained
9
+
10
+ model = load_pretrained("your_username/qorva-model-fast")
11
+ text = model.generate("Once upon a time", max_new_tokens=100)
12
+ print(text)
13
+
14
+ Or build from scratch:
15
+
16
+ cfg = QorvaConfig.nano_full()
17
+ model = QorvaModel(cfg)
18
+ """
19
+
20
+ from qorva.model.config import QorvaConfig
21
+ from qorva.model.architecture import QorvaModel
22
+ from qorva.utils.loading import load_pretrained, push_to_hub
23
+ from qorva.utils.generation import generate
24
+
25
+ __version__ = "0.1.0"
26
+ __all__ = [
27
+ "QorvaModel",
28
+ "QorvaConfig",
29
+ "load_pretrained",
30
+ "push_to_hub",
31
+ "generate",
32
+ ]
@@ -0,0 +1,48 @@
1
+ """
2
+ qorva/cli.py
3
+ ============
4
+ Command-line interface for the qorva package.
5
+
6
+ Usage:
7
+ qorva-generate --model username/qorva-model --prompt "Once upon a time"
8
+ qorva-generate --model ./checkpoints/qorva_final.pt --prompt "Hello" --max_new_tokens 200
9
+ """
10
+
11
+ import argparse
12
+
13
+
14
+ def main():
15
+ parser = argparse.ArgumentParser(
16
+ prog="qorva-generate",
17
+ description="Generate text using a Qorva model (local or HuggingFace Hub).",
18
+ )
19
+ parser.add_argument("--model", required=True,
20
+ help="HF repo id or local path to checkpoint")
21
+ parser.add_argument("--prompt", required=True, help="Text prompt")
22
+ parser.add_argument("--max_new_tokens", type=int, default=150)
23
+ parser.add_argument("--temperature", type=float, default=0.8)
24
+ parser.add_argument("--top_k", type=int, default=50)
25
+ parser.add_argument("--top_p", type=float, default=1.0)
26
+ parser.add_argument("--device", default="auto")
27
+ parser.add_argument("--token", default=None, help="HF token for private repos")
28
+ args = parser.parse_args()
29
+
30
+ from qorva import load_pretrained, generate
31
+
32
+ print(f"Loading {args.model}...")
33
+ model = load_pretrained(args.model, device=args.device, token=args.token)
34
+ print(f"Loaded {model.num_parameters()/1e6:.1f}M parameter model\n")
35
+
36
+ output = generate(
37
+ model,
38
+ args.prompt,
39
+ max_new_tokens=args.max_new_tokens,
40
+ temperature=args.temperature,
41
+ top_k=args.top_k,
42
+ top_p=args.top_p,
43
+ )
44
+ print(output)
45
+
46
+
47
+ if __name__ == "__main__":
48
+ main()
@@ -0,0 +1 @@
1
+