oomllama 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. oomllama-0.1.0/.gitignore +1 -0
  2. oomllama-0.1.0/Cargo.toml +96 -0
  3. oomllama-0.1.0/PKG-INFO +194 -0
  4. oomllama-0.1.0/README.md +158 -0
  5. oomllama-0.1.0/pyproject.toml +87 -0
  6. oomllama-0.1.0/python/oomllama/__init__.py +135 -0
  7. oomllama-0.1.0/soul.lock +1 -0
  8. oomllama-0.1.0/src/aindex.rs +110 -0
  9. oomllama-0.1.0/src/anchor.rs +97 -0
  10. oomllama-0.1.0/src/autonomy.rs +84 -0
  11. oomllama-0.1.0/src/batch.rs +256 -0
  12. oomllama-0.1.0/src/betti.rs +505 -0
  13. oomllama-0.1.0/src/bin/awakening.rs +60 -0
  14. oomllama-0.1.0/src/bin/chimera.rs +118 -0
  15. oomllama-0.1.0/src/bin/deep_query.rs +50 -0
  16. oomllama-0.1.0/src/bin/gguf2oom.rs +137 -0
  17. oomllama-0.1.0/src/bin/oomllama.rs +99 -0
  18. oomllama-0.1.0/src/bin/quantize.rs +128 -0
  19. oomllama-0.1.0/src/bin/red_team.rs +107 -0
  20. oomllama-0.1.0/src/bin/search_test.rs +46 -0
  21. oomllama-0.1.0/src/bin/sovereign.rs +249 -0
  22. oomllama-0.1.0/src/bin/tibet_audit.rs +139 -0
  23. oomllama-0.1.0/src/bin/tiger.rs +319 -0
  24. oomllama-0.1.0/src/briefing.rs +50 -0
  25. oomllama-0.1.0/src/chronos.rs +166 -0
  26. oomllama-0.1.0/src/discovery.rs +66 -0
  27. oomllama-0.1.0/src/embedding.rs +81 -0
  28. oomllama-0.1.0/src/error.rs +49 -0
  29. oomllama-0.1.0/src/gfx.rs +311 -0
  30. oomllama-0.1.0/src/gguf2oom.rs +724 -0
  31. oomllama-0.1.0/src/ingest.rs +62 -0
  32. oomllama-0.1.0/src/intent.rs +270 -0
  33. oomllama-0.1.0/src/kernel.rs +393 -0
  34. oomllama-0.1.0/src/lib/kmbit/client.py +76 -0
  35. oomllama-0.1.0/src/lib/signer.rs +110 -0
  36. oomllama-0.1.0/src/lib.rs +108 -0
  37. oomllama-0.1.0/src/machtig.rs +88 -0
  38. oomllama-0.1.0/src/main.rs +648 -0
  39. oomllama-0.1.0/src/memory.rs +367 -0
  40. oomllama-0.1.0/src/negotiation.rs +75 -0
  41. oomllama-0.1.0/src/oom_inference.rs +435 -0
  42. oomllama-0.1.0/src/oomllama.rs +668 -0
  43. oomllama-0.1.0/src/oomllama_README.md +47 -0
  44. oomllama-0.1.0/src/python.rs +137 -0
  45. oomllama-0.1.0/src/quant.rs +257 -0
  46. oomllama-0.1.0/src/refinery.rs +189 -0
  47. oomllama-0.1.0/src/report.rs +56 -0
  48. oomllama-0.1.0/src/router.rs +414 -0
  49. oomllama-0.1.0/src/scanner.rs +160 -0
  50. oomllama-0.1.0/src/sema.rs +310 -0
  51. oomllama-0.1.0/src/sentinel.rs +408 -0
  52. oomllama-0.1.0/src/shield.rs +72 -0
  53. oomllama-0.1.0/src/snaft.rs +606 -0
  54. oomllama-0.1.0/src/space.rs +372 -0
  55. oomllama-0.1.0/src/tasks.rs +106 -0
  56. oomllama-0.1.0/src/tibet.rs +312 -0
  57. oomllama-0.1.0/src/timeslot.rs +235 -0
  58. oomllama-0.1.0/src/trust.rs +266 -0
  59. oomllama-0.1.0/src/types.rs +209 -0
  60. oomllama-0.1.0/src/vault.rs +126 -0
  61. oomllama-0.1.0/src/vector.rs +21 -0
  62. oomllama-0.1.0/src/vision.rs +130 -0
@@ -0,0 +1 @@
1
+ target/
@@ -0,0 +1,96 @@
1
+ [package]
2
+ name = "jis-router"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ authors = ["Jasper van de Meent <jasper@humotica.com>", "Root AI <root_ai@humotica.nl>"]
6
+ description = "JIS Router - Intent-based routing for AETHER. The Borrow Checker for Identity."
7
+ license = "MIT"
8
+ repository = "https://github.com/symbaion/jis-router"
9
+ keywords = ["jis", "aether", "intent", "routing", "tibet"]
10
+ readme = "README.md"
11
+
12
+ [dependencies]
13
+ # Async runtime
14
+ tokio = { version = "1", features = ["full"] }
15
+
16
+ # Web framework - axum is fast and ergonomic
17
+ axum = "0.7"
18
+ tower = "0.5"
19
+ tower-http = { version = "0.5", features = ["cors", "trace", "fs"] }
20
+
21
+ # Serialization
22
+ serde = { version = "1", features = ["derive"] }
23
+ serde_json = "1"
24
+
25
+ # Time handling (for Timeslots)
26
+ chrono = { version = "0.4", features = ["serde"] }
27
+
28
+ # Cryptography (for TIBET tokens)
29
+ sha2 = "0.10"
30
+ rand = "0.8"
31
+ base64 = "0.22"
32
+ hex = "0.4"
33
+
34
+ # UUID for IDD identifiers
35
+ uuid = { version = "1.0", features = ["v4", "serde"] }
36
+ notify = "6.1.1"
37
+ colored = "2.0"
38
+ rusqlite = { version = "0.38.0", features = ["bundled"] }
39
+ parking_lot = "0.12.5"
40
+ dashmap = "6.1.0"
41
+ candle-core = { version = "0.9.1", features = ["cuda"] }
42
+ candle-nn = { version = "0.9.1", features = ["cuda"] }
43
+ candle-transformers = { version = "0.9.1", features = ["cuda"] }
44
+ hf-hub = "0.4.3"
45
+ tokenizers = "0.22.2"
46
+ regex = "1.12.2"
47
+ thiserror = "2.0.17"
48
+ tracing = "0.1.44"
49
+ tracing-subscriber = "0.3.22"
50
+ ratatui = { version = "0.30.0", features = ["all-widgets", "crossterm"] }
51
+ crossterm = "0.29.0"
52
+ reqwest = { version = "0.13.1", features = ["json"] }
53
+ ctrlc = "3.4"
54
+ tempfile = "3.14"
55
+ walkdir = "2.5.0"
56
+ memmap2 = "0.9"
57
+ anyhow = "1.0"
58
+
59
+ # Python bindings (optional, for oomllama PyPI)
60
+ pyo3 = { version = "0.22", features = ["extension-module"], optional = true }
61
+
62
+ [features]
63
+ default = []
64
+ python = ["pyo3"]
65
+
66
+ [profile.release]
67
+ opt-level = 3
68
+ lto = true
69
+ codegen-units = 1
70
+ panic = "abort"
71
+ strip = true
72
+
73
+ [[bin]]
74
+ name = "quantize"
75
+ path = "src/bin/quantize.rs"
76
+
77
+ [[bin]]
78
+ name = "jis-router"
79
+ path = "src/main.rs"
80
+
81
+ [[bin]]
82
+ name = "oomllama"
83
+ path = "src/bin/oomllama.rs"
84
+
85
+ [[bin]]
86
+ name = "chimera"
87
+ path = "src/bin/chimera.rs"
88
+
89
+ [[bin]]
90
+ name = "sovereign"
91
+ path = "src/bin/sovereign.rs"
92
+
93
+ [lib]
94
+ name = "jis_router"
95
+ path = "src/lib.rs"
96
+ crate-type = ["lib", "cdylib"] # cdylib for Python extension
@@ -0,0 +1,194 @@
1
+ Metadata-Version: 2.4
2
+ Name: oomllama
3
+ Version: 0.1.0
4
+ Classifier: Development Status :: 4 - Beta
5
+ Classifier: Intended Audience :: Developers
6
+ Classifier: Intended Audience :: Science/Research
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Rust
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Dist: cupy-cuda12x ; extra == 'cuda'
18
+ Requires-Dist: pytest ; extra == 'dev'
19
+ Requires-Dist: black ; extra == 'dev'
20
+ Requires-Dist: mypy ; extra == 'dev'
21
+ Provides-Extra: cuda
22
+ Provides-Extra: dev
23
+ Summary: Efficient LLM inference with .oom format - 2x smaller than GGUF
24
+ Keywords: llm,inference,quantization,gguf,oom,oomllama,humotica,llama,ai,machine-learning
25
+ Author-email: Humotica AI Lab <ai@humotica.nl>, Jasper van de Meent <jasper@humotica.nl>
26
+ Maintainer-email: "Root AI (Claude)" <root_idd@humotica.nl>, Gemini IDD <gemini@humotica.nl>
27
+ License: MIT
28
+ Requires-Python: >=3.8
29
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
30
+ Project-URL: Bug Tracker, https://github.com/humotica/oomllama/issues
31
+ Project-URL: Documentation, https://humotica.nl/docs/oomllama
32
+ Project-URL: Homepage, https://humotica.nl
33
+ Project-URL: HuggingFace Models, https://huggingface.co/jaspervandemeent
34
+ Project-URL: Repository, https://github.com/humotica/oomllama
35
+
36
+ # 🦙 OomLlama
37
+
38
+ **Efficient LLM inference with .oom format - 2x smaller than GGUF**
39
+
40
+ [![PyPI](https://img.shields.io/pypi/v/oomllama)](https://pypi.org/project/oomllama/)
41
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
42
+ [![HuggingFace](https://img.shields.io/badge/🤗-Models-yellow)](https://huggingface.co/jaspervandemeent)
43
+
44
+ ```python
45
+ from oomllama import OomLlama
46
+
47
+ llm = OomLlama("humotica-32b")
48
+ response = llm.generate("What is the meaning of life?")
49
+ print(response)
50
+ ```
51
+
52
+ ## Why OomLlama?
53
+
54
+ | Feature | GGUF (Q4) | OOM (Q2) |
55
+ |---------|-----------|----------|
56
+ | 70B Model Size | ~40 GB | **~20 GB** |
57
+ | 32B Model Size | ~20 GB | **~10 GB** |
58
+ | RAM Usage | High | **Lazy Loading** |
59
+ | Format | Open | **Open (MIT)** |
60
+
61
+ **OomLlama** uses Q2 quantization with lazy layer loading to run large models on consumer hardware.
62
+
63
+ ## Installation
64
+
65
+ ```bash
66
+ pip install oomllama
67
+ ```
68
+
69
+ ## Quick Start
70
+
71
+ ### Download a Model
72
+
73
+ ```python
74
+ from oomllama import download_model
75
+
76
+ # Download from HuggingFace
77
+ model_path = download_model("humotica-32b")
78
+ ```
79
+
80
+ ### Generate Text
81
+
82
+ ```python
83
+ from oomllama import OomLlama
84
+
85
+ llm = OomLlama("humotica-32b")
86
+
87
+ # Simple generation
88
+ response = llm.generate("Explain quantum computing in simple terms")
89
+ print(response)
90
+
91
+ # With parameters
92
+ response = llm.generate(
93
+ "Write a haiku about AI",
94
+ max_tokens=50,
95
+ temperature=0.8,
96
+ top_p=0.9
97
+ )
98
+ ```
99
+
100
+ ### Chat Mode
101
+
102
+ ```python
103
+ messages = [
104
+ ("user", "Hello! Who are you?"),
105
+ ("assistant", "I'm OomLlama, an efficient LLM."),
106
+ ("user", "What makes you efficient?"),
107
+ ]
108
+
109
+ response = llm.chat(messages)
110
+ print(response)
111
+ ```
112
+
113
+ ## Available Models
114
+
115
+ | Model | Parameters | Size (.oom) | HuggingFace |
116
+ |-------|------------|-------------|-------------|
117
+ | humotica-32b | 33B | ~10 GB | [Link](https://huggingface.co/jaspervandemeent/humotica-32b) |
118
+ | llamaohm-70b | 70B | ~20 GB | [Link](https://huggingface.co/jaspervandemeent/LlamaOhm-70B) |
119
+ | tinyllama-1b | 1.1B | ~400 MB | [Link](https://huggingface.co/jaspervandemeent/OomLlama-TinyLlama-1.1B) |
120
+
121
+ ## The .oom Format
122
+
123
+ OOM (OomLlama Model) is a compact model format:
124
+
125
+ ```
126
+ ┌──────────────────────────────────────┐
127
+ │ Header: OOML (magic) + metadata │
128
+ ├──────────────────────────────────────┤
129
+ │ Tensors: Q2 quantized (2 bits/weight)│
130
+ │ - Scale + Min per 256-weight block │
131
+ │ - 68 bytes per block │
132
+ └──────────────────────────────────────┘
133
+ ```
134
+
135
+ ### Convert GGUF to OOM
136
+
137
+ ```bash
138
+ # Using the CLI tool
139
+ gguf2oom model.gguf model.oom
140
+
141
+ # Check model info
142
+ gguf2oom --info model.gguf
143
+ ```
144
+
145
+ ## Technical Details
146
+
147
+ ### Q2 Quantization
148
+
149
+ Each weight is stored as 2 bits (0, 1, 2, or 3) with per-block scale and minimum:
150
+
151
+ ```
152
+ weight = q2_value * scale + min
153
+ ```
154
+
155
+ This achieves ~2x compression over Q4 with acceptable quality loss for most tasks.
156
+
157
+ ### Lazy Layer Loading
158
+
159
+ OomLlama loads transformer layers on-demand, keeping only the active layer in memory:
160
+
161
+ ```
162
+ Forward Pass:
163
+ Layer 0: Load → Compute → Unload
164
+ Layer 1: Load → Compute → Unload
165
+ ...
166
+ Layer N: Load → Compute → Unload
167
+ ```
168
+
169
+ This enables running 70B models on 24GB GPU RAM.
170
+
171
+ ## Credits
172
+
173
+ - **Model Format**: Gemini IDD & Root AI (Humotica AI Lab)
174
+ - **Quantization**: OomLlama.rs by Humotica
175
+ - **Base Models**: Meta Platforms, Inc. (Llama 3.3)
176
+
177
+ ## License
178
+
179
+ - **OomLlama Code**: MIT License
180
+ - **Model Weights**: Subject to original model licenses (e.g., Llama 3.3 Community License)
181
+
182
+ ## Links
183
+
184
+ - 🏠 [Humotica](https://humotica.nl)
185
+ - 🤗 [HuggingFace Models](https://huggingface.co/jaspervandemeent)
186
+ - 📦 [PyPI Package](https://pypi.org/project/oomllama/)
187
+ - 🐛 [Issue Tracker](https://github.com/humotica/oomllama/issues)
188
+
189
+ ---
190
+
191
+ *One Love, One fAmIly* 💙
192
+
193
+ *Built by Humotica AI Lab - Jasper, Claude, Gemini, Codex*
194
+
@@ -0,0 +1,158 @@
1
+ # 🦙 OomLlama
2
+
3
+ **Efficient LLM inference with .oom format - 2x smaller than GGUF**
4
+
5
+ [![PyPI](https://img.shields.io/pypi/v/oomllama)](https://pypi.org/project/oomllama/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
+ [![HuggingFace](https://img.shields.io/badge/🤗-Models-yellow)](https://huggingface.co/jaspervandemeent)
8
+
9
+ ```python
10
+ from oomllama import OomLlama
11
+
12
+ llm = OomLlama("humotica-32b")
13
+ response = llm.generate("What is the meaning of life?")
14
+ print(response)
15
+ ```
16
+
17
+ ## Why OomLlama?
18
+
19
+ | Feature | GGUF (Q4) | OOM (Q2) |
20
+ |---------|-----------|----------|
21
+ | 70B Model Size | ~40 GB | **~20 GB** |
22
+ | 32B Model Size | ~20 GB | **~10 GB** |
23
+ | RAM Usage | High | **Lazy Loading** |
24
+ | Format | Open | **Open (MIT)** |
25
+
26
+ **OomLlama** uses Q2 quantization with lazy layer loading to run large models on consumer hardware.
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install oomllama
32
+ ```
33
+
34
+ ## Quick Start
35
+
36
+ ### Download a Model
37
+
38
+ ```python
39
+ from oomllama import download_model
40
+
41
+ # Download from HuggingFace
42
+ model_path = download_model("humotica-32b")
43
+ ```
44
+
45
+ ### Generate Text
46
+
47
+ ```python
48
+ from oomllama import OomLlama
49
+
50
+ llm = OomLlama("humotica-32b")
51
+
52
+ # Simple generation
53
+ response = llm.generate("Explain quantum computing in simple terms")
54
+ print(response)
55
+
56
+ # With parameters
57
+ response = llm.generate(
58
+ "Write a haiku about AI",
59
+ max_tokens=50,
60
+ temperature=0.8,
61
+ top_p=0.9
62
+ )
63
+ ```
64
+
65
+ ### Chat Mode
66
+
67
+ ```python
68
+ messages = [
69
+ ("user", "Hello! Who are you?"),
70
+ ("assistant", "I'm OomLlama, an efficient LLM."),
71
+ ("user", "What makes you efficient?"),
72
+ ]
73
+
74
+ response = llm.chat(messages)
75
+ print(response)
76
+ ```
77
+
78
+ ## Available Models
79
+
80
+ | Model | Parameters | Size (.oom) | HuggingFace |
81
+ |-------|------------|-------------|-------------|
82
+ | humotica-32b | 33B | ~10 GB | [Link](https://huggingface.co/jaspervandemeent/humotica-32b) |
83
+ | llamaohm-70b | 70B | ~20 GB | [Link](https://huggingface.co/jaspervandemeent/LlamaOhm-70B) |
84
+ | tinyllama-1b | 1.1B | ~400 MB | [Link](https://huggingface.co/jaspervandemeent/OomLlama-TinyLlama-1.1B) |
85
+
86
+ ## The .oom Format
87
+
88
+ OOM (OomLlama Model) is a compact model format:
89
+
90
+ ```
91
+ ┌──────────────────────────────────────┐
92
+ │ Header: OOML (magic) + metadata │
93
+ ├──────────────────────────────────────┤
94
+ │ Tensors: Q2 quantized (2 bits/weight)│
95
+ │ - Scale + Min per 256-weight block │
96
+ │ - 68 bytes per block │
97
+ └──────────────────────────────────────┘
98
+ ```
99
+
100
+ ### Convert GGUF to OOM
101
+
102
+ ```bash
103
+ # Using the CLI tool
104
+ gguf2oom model.gguf model.oom
105
+
106
+ # Check model info
107
+ gguf2oom --info model.gguf
108
+ ```
109
+
110
+ ## Technical Details
111
+
112
+ ### Q2 Quantization
113
+
114
+ Each weight is stored as 2 bits (0, 1, 2, or 3) with per-block scale and minimum:
115
+
116
+ ```
117
+ weight = q2_value * scale + min
118
+ ```
119
+
120
+ This achieves ~2x compression over Q4 with acceptable quality loss for most tasks.
121
+
122
+ ### Lazy Layer Loading
123
+
124
+ OomLlama loads transformer layers on-demand, keeping only the active layer in memory:
125
+
126
+ ```
127
+ Forward Pass:
128
+ Layer 0: Load → Compute → Unload
129
+ Layer 1: Load → Compute → Unload
130
+ ...
131
+ Layer N: Load → Compute → Unload
132
+ ```
133
+
134
+ This enables running 70B models on 24GB GPU RAM.
135
+
136
+ ## Credits
137
+
138
+ - **Model Format**: Gemini IDD & Root AI (Humotica AI Lab)
139
+ - **Quantization**: OomLlama.rs by Humotica
140
+ - **Base Models**: Meta Platforms, Inc. (Llama 3.3)
141
+
142
+ ## License
143
+
144
+ - **OomLlama Code**: MIT License
145
+ - **Model Weights**: Subject to original model licenses (e.g., Llama 3.3 Community License)
146
+
147
+ ## Links
148
+
149
+ - 🏠 [Humotica](https://humotica.nl)
150
+ - 🤗 [HuggingFace Models](https://huggingface.co/jaspervandemeent)
151
+ - 📦 [PyPI Package](https://pypi.org/project/oomllama/)
152
+ - 🐛 [Issue Tracker](https://github.com/humotica/oomllama/issues)
153
+
154
+ ---
155
+
156
+ *One Love, One fAmIly* 💙
157
+
158
+ *Built by Humotica AI Lab - Jasper, Claude, Gemini, Codex*
@@ -0,0 +1,87 @@
1
+ [build-system]
2
+ requires = ["maturin>=1.4,<2.0"]
3
+ build-backend = "maturin"
4
+
5
+ [project]
6
+ name = "oomllama"
7
+ version = "0.1.0"
8
+ description = "Efficient LLM inference with .oom format - 2x smaller than GGUF"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.8"
12
+ authors = [
13
+ { name = "Humotica AI Lab", email = "ai@humotica.nl" },
14
+ { name = "Jasper van de Meent", email = "jasper@humotica.nl" },
15
+ ]
16
+ maintainers = [
17
+ { name = "Root AI (Claude)", email = "root_idd@humotica.nl" },
18
+ { name = "Gemini IDD", email = "gemini@humotica.nl" },
19
+ ]
20
+ keywords = [
21
+ "llm",
22
+ "inference",
23
+ "quantization",
24
+ "gguf",
25
+ "oom",
26
+ "oomllama",
27
+ "humotica",
28
+ "llama",
29
+ "ai",
30
+ "machine-learning",
31
+ ]
32
+ classifiers = [
33
+ "Development Status :: 4 - Beta",
34
+ "Intended Audience :: Developers",
35
+ "Intended Audience :: Science/Research",
36
+ "License :: OSI Approved :: MIT License",
37
+ "Operating System :: OS Independent",
38
+ "Programming Language :: Python :: 3",
39
+ "Programming Language :: Python :: 3.8",
40
+ "Programming Language :: Python :: 3.9",
41
+ "Programming Language :: Python :: 3.10",
42
+ "Programming Language :: Python :: 3.11",
43
+ "Programming Language :: Python :: 3.12",
44
+ "Programming Language :: Rust",
45
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
46
+ ]
47
+
48
+ [project.urls]
49
+ Homepage = "https://humotica.nl"
50
+ Repository = "https://github.com/humotica/oomllama"
51
+ Documentation = "https://humotica.nl/docs/oomllama"
52
+ "Bug Tracker" = "https://github.com/humotica/oomllama/issues"
53
+ "HuggingFace Models" = "https://huggingface.co/jaspervandemeent"
54
+
55
+ [project.optional-dependencies]
56
+ dev = ["pytest", "black", "mypy"]
57
+ cuda = ["cupy-cuda12x"]
58
+
59
+ [project.scripts]
60
+ oomllama = "oomllama:cli"
61
+
62
+ [tool.maturin]
63
+ features = ["python", "pyo3/extension-module"]
64
+ python-source = "python"
65
+ module-name = "oomllama._oomllama"
66
+ strip = true
67
+ exclude = [
68
+ "data/**",
69
+ "plans/**",
70
+ "downloads/**",
71
+ "static/**",
72
+ "bin/**",
73
+ "Cargo.lock",
74
+ "target/**",
75
+ ".venv/**",
76
+ "BETTI*.md",
77
+ "KMBIT*.md",
78
+ "TASKS*.md",
79
+ "OOMLLAMA*.md",
80
+ ]
81
+
82
+ [tool.black]
83
+ line-length = 100
84
+
85
+ [tool.mypy]
86
+ python_version = "3.10"
87
+ warn_return_any = true
@@ -0,0 +1,135 @@
1
+ """
2
+ OomLlama - Efficient LLM inference with .oom format
3
+
4
+ Credits:
5
+ - Format: Gemini IDD & Root AI (Humotica AI Lab)
6
+ - Runtime: OomLlama.rs by Humotica
7
+
8
+ Example:
9
+ >>> from oomllama import OomLlama
10
+ >>> llm = OomLlama("humotica-32b")
11
+ >>> response = llm.generate("Hello!")
12
+ """
13
+
14
+ __version__ = "0.1.0"
15
+ __author__ = "Humotica AI Lab"
16
+ __credits__ = ["Jasper van de Meent", "Root AI (Claude)", "Gemini IDD", "Codex"]
17
+
18
+ # Import from Rust extension when available
19
+ try:
20
+ from oomllama._oomllama import (
21
+ PyOomLlama as OomLlama,
22
+ download_model,
23
+ list_models,
24
+ version,
25
+ )
26
+ except ImportError:
27
+ # Fallback Python implementation for development
28
+ import os
29
+ import requests
30
+ from typing import Optional, List, Tuple
31
+
32
+ class OomLlama:
33
+ """OomLlama model wrapper (Python fallback)"""
34
+
35
+ def __init__(
36
+ self,
37
+ model_name: str,
38
+ model_path: Optional[str] = None,
39
+ gpu: Optional[int] = None,
40
+ ):
41
+ self.model_name = model_name
42
+ self.model_path = model_path or self._find_model(model_name)
43
+ self.gpu = gpu
44
+ self.temperature = 0.7
45
+ self.top_p = 0.9
46
+ self.max_tokens = 512
47
+ print(f"🦙 OomLlama: Loaded {model_name}")
48
+
49
+ def _find_model(self, name: str) -> str:
50
+ """Find model in cache"""
51
+ cache = os.path.expanduser("~/.cache/oomllama")
52
+ return f"{cache}/{name}.oom"
53
+
54
+ def generate(
55
+ self,
56
+ prompt: str,
57
+ max_tokens: Optional[int] = None,
58
+ temperature: Optional[float] = None,
59
+ top_p: Optional[float] = None,
60
+ ) -> str:
61
+ """Generate text from prompt"""
62
+ # TODO: Call actual inference
63
+ return f"[OomLlama {self.model_name} - Python fallback]\nPrompt: {prompt}"
64
+
65
+ def chat(
66
+ self, messages: List[Tuple[str, str]], max_tokens: Optional[int] = None
67
+ ) -> str:
68
+ """Chat-style generation"""
69
+ prompt = "\n".join(f"{role.upper()}: {content}" for role, content in messages)
70
+ return self.generate(f"{prompt}\nASSISTANT:", max_tokens)
71
+
72
+ def set_params(
73
+ self,
74
+ temperature: Optional[float] = None,
75
+ top_p: Optional[float] = None,
76
+ max_tokens: Optional[int] = None,
77
+ ):
78
+ """Set generation parameters"""
79
+ if temperature is not None:
80
+ self.temperature = temperature
81
+ if top_p is not None:
82
+ self.top_p = top_p
83
+ if max_tokens is not None:
84
+ self.max_tokens = max_tokens
85
+
86
+ def __repr__(self):
87
+ return f"OomLlama('{self.model_name}')"
88
+
89
+ def download_model(model_name: str, cache_dir: Optional[str] = None) -> str:
90
+ """Download model from HuggingFace"""
91
+ repos = {
92
+ "humotica-32b": "jaspervandemeent/humotica-32b",
93
+ "llamaohm-70b": "jaspervandemeent/LlamaOhm-70B",
94
+ "tinyllama-1b": "jaspervandemeent/OomLlama-TinyLlama-1.1B",
95
+ }
96
+
97
+ if model_name not in repos:
98
+ raise ValueError(f"Unknown model: {model_name}")
99
+
100
+ cache = cache_dir or os.path.expanduser("~/.cache/oomllama")
101
+ os.makedirs(cache, exist_ok=True)
102
+
103
+ # TODO: Actually download from HuggingFace
104
+ return f"{cache}/{model_name}.oom"
105
+
106
+ def list_models() -> List[str]:
107
+ """List available models"""
108
+ return ["humotica-32b", "llamaohm-70b", "tinyllama-1b"]
109
+
110
+ def version() -> str:
111
+ return "0.1.0-python-fallback"
112
+
113
+ # CLI entry point
114
+ def cli():
115
+ """Command-line interface"""
116
+ import sys
117
+
118
+ if len(sys.argv) < 2:
119
+ print("Usage: oomllama <prompt>")
120
+ print(" oomllama --list")
121
+ return
122
+
123
+ if sys.argv[1] == "--list":
124
+ print("Available models:")
125
+ for m in list_models():
126
+ print(f" - {m}")
127
+ return
128
+
129
+ prompt = " ".join(sys.argv[1:])
130
+ llm = OomLlama("humotica-32b")
131
+ response = llm.generate(prompt)
132
+ print(response)
133
+
134
+
135
+ __all__ = ["OomLlama", "download_model", "list_models", "version", "cli", "__version__"]
@@ -0,0 +1 @@
1
+ 0cdf5568bb2b35921f652f5ee10702abf92f4ac0774f3d97b276490e1539afb7