nvhive 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nvhive-0.1.0/LICENSE +21 -0
- nvhive-0.1.0/PKG-INFO +660 -0
- nvhive-0.1.0/README.md +606 -0
- nvhive-0.1.0/nvh/__init__.py +23 -0
- nvhive-0.1.0/nvh/api/__init__.py +0 -0
- nvhive-0.1.0/nvh/api/proxy.py +528 -0
- nvhive-0.1.0/nvh/api/server.py +2573 -0
- nvhive-0.1.0/nvh/auth/__init__.py +0 -0
- nvhive-0.1.0/nvh/auth/auth.py +190 -0
- nvhive-0.1.0/nvh/auth/models.py +57 -0
- nvhive-0.1.0/nvh/cli/__init__.py +0 -0
- nvhive-0.1.0/nvh/cli/completions.py +130 -0
- nvhive-0.1.0/nvh/cli/conversations.py +403 -0
- nvhive-0.1.0/nvh/cli/main.py +6238 -0
- nvhive-0.1.0/nvh/cli/repl.py +930 -0
- nvhive-0.1.0/nvh/config/__init__.py +0 -0
- nvhive-0.1.0/nvh/config/capabilities.yaml +1795 -0
- nvhive-0.1.0/nvh/config/nemoclaw-blueprint.yaml +97 -0
- nvhive-0.1.0/nvh/config/settings.py +465 -0
- nvhive-0.1.0/nvh/core/__init__.py +0 -0
- nvhive-0.1.0/nvh/core/action_detector.py +284 -0
- nvhive-0.1.0/nvh/core/advisor_profiles.py +908 -0
- nvhive-0.1.0/nvh/core/agent_loop.py +279 -0
- nvhive-0.1.0/nvh/core/agents.py +556 -0
- nvhive-0.1.0/nvh/core/benchmark.py +196 -0
- nvhive-0.1.0/nvh/core/context.py +128 -0
- nvhive-0.1.0/nvh/core/context_files.py +229 -0
- nvhive-0.1.0/nvh/core/council.py +762 -0
- nvhive-0.1.0/nvh/core/engine.py +863 -0
- nvhive-0.1.0/nvh/core/file_lock.py +455 -0
- nvhive-0.1.0/nvh/core/free_tier.py +251 -0
- nvhive-0.1.0/nvh/core/hooks.py +117 -0
- nvhive-0.1.0/nvh/core/image_gen.py +121 -0
- nvhive-0.1.0/nvh/core/knowledge.py +222 -0
- nvhive-0.1.0/nvh/core/memory.py +172 -0
- nvhive-0.1.0/nvh/core/notify.py +52 -0
- nvhive-0.1.0/nvh/core/orchestrator.py +398 -0
- nvhive-0.1.0/nvh/core/rate_limiter.py +222 -0
- nvhive-0.1.0/nvh/core/router.py +459 -0
- nvhive-0.1.0/nvh/core/scheduler.py +106 -0
- nvhive-0.1.0/nvh/core/system_tools.py +918 -0
- nvhive-0.1.0/nvh/core/templates.py +310 -0
- nvhive-0.1.0/nvh/core/tools.py +507 -0
- nvhive-0.1.0/nvh/core/voice.py +199 -0
- nvhive-0.1.0/nvh/core/webhooks.py +260 -0
- nvhive-0.1.0/nvh/core/workflows.py +231 -0
- nvhive-0.1.0/nvh/integrations/__init__.py +0 -0
- nvhive-0.1.0/nvh/integrations/cloud_session.py +237 -0
- nvhive-0.1.0/nvh/integrations/openclaw.py +144 -0
- nvhive-0.1.0/nvh/mcp_server.py +338 -0
- nvhive-0.1.0/nvh/plugins/__init__.py +0 -0
- nvhive-0.1.0/nvh/plugins/manager.py +153 -0
- nvhive-0.1.0/nvh/providers/__init__.py +0 -0
- nvhive-0.1.0/nvh/providers/ai21_provider.py +219 -0
- nvhive-0.1.0/nvh/providers/anthropic_provider.py +199 -0
- nvhive-0.1.0/nvh/providers/base.py +215 -0
- nvhive-0.1.0/nvh/providers/cerebras_provider.py +220 -0
- nvhive-0.1.0/nvh/providers/cohere_provider.py +226 -0
- nvhive-0.1.0/nvh/providers/deepseek_provider.py +227 -0
- nvhive-0.1.0/nvh/providers/fireworks_provider.py +219 -0
- nvhive-0.1.0/nvh/providers/github_provider.py +258 -0
- nvhive-0.1.0/nvh/providers/google_provider.py +181 -0
- nvhive-0.1.0/nvh/providers/grok_provider.py +227 -0
- nvhive-0.1.0/nvh/providers/groq_provider.py +225 -0
- nvhive-0.1.0/nvh/providers/huggingface_provider.py +220 -0
- nvhive-0.1.0/nvh/providers/llm7_provider.py +274 -0
- nvhive-0.1.0/nvh/providers/mistral_provider.py +224 -0
- nvhive-0.1.0/nvh/providers/mock_provider.py +330 -0
- nvhive-0.1.0/nvh/providers/nvidia_provider.py +270 -0
- nvhive-0.1.0/nvh/providers/ollama_provider.py +217 -0
- nvhive-0.1.0/nvh/providers/openai_provider.py +312 -0
- nvhive-0.1.0/nvh/providers/openrouter_provider.py +221 -0
- nvhive-0.1.0/nvh/providers/perplexity_provider.py +219 -0
- nvhive-0.1.0/nvh/providers/registry.py +200 -0
- nvhive-0.1.0/nvh/providers/sambanova_provider.py +219 -0
- nvhive-0.1.0/nvh/providers/siliconflow_provider.py +258 -0
- nvhive-0.1.0/nvh/providers/together_provider.py +219 -0
- nvhive-0.1.0/nvh/sandbox/__init__.py +0 -0
- nvhive-0.1.0/nvh/sandbox/executor.py +239 -0
- nvhive-0.1.0/nvh/sdk.py +197 -0
- nvhive-0.1.0/nvh/storage/__init__.py +0 -0
- nvhive-0.1.0/nvh/storage/models.py +102 -0
- nvhive-0.1.0/nvh/storage/repository.py +447 -0
- nvhive-0.1.0/nvh/utils/__init__.py +0 -0
- nvhive-0.1.0/nvh/utils/environment.py +349 -0
- nvhive-0.1.0/nvh/utils/gpu.py +792 -0
- nvhive-0.1.0/nvh/utils/logging.py +45 -0
- nvhive-0.1.0/nvh/utils/sanitize.py +106 -0
- nvhive-0.1.0/nvh/utils/streaming.py +77 -0
- nvhive-0.1.0/nvh/workflows/code_review.yaml +20 -0
- nvhive-0.1.0/nvh/workflows/debug.yaml +20 -0
- nvhive-0.1.0/nvh/workflows/research.yaml +18 -0
- nvhive-0.1.0/nvhive.egg-info/PKG-INFO +660 -0
- nvhive-0.1.0/nvhive.egg-info/SOURCES.txt +111 -0
- nvhive-0.1.0/nvhive.egg-info/dependency_links.txt +1 -0
- nvhive-0.1.0/nvhive.egg-info/entry_points.txt +4 -0
- nvhive-0.1.0/nvhive.egg-info/requires.txt +28 -0
- nvhive-0.1.0/nvhive.egg-info/top_level.txt +1 -0
- nvhive-0.1.0/pyproject.toml +91 -0
- nvhive-0.1.0/setup.cfg +4 -0
- nvhive-0.1.0/tests/test_action_detector.py +118 -0
- nvhive-0.1.0/tests/test_agents.py +122 -0
- nvhive-0.1.0/tests/test_api.py +364 -0
- nvhive-0.1.0/tests/test_council.py +32 -0
- nvhive-0.1.0/tests/test_file_lock.py +140 -0
- nvhive-0.1.0/tests/test_integration.py +541 -0
- nvhive-0.1.0/tests/test_mcp_server.py +67 -0
- nvhive-0.1.0/tests/test_nemoclaw.py +109 -0
- nvhive-0.1.0/tests/test_openclaw.py +80 -0
- nvhive-0.1.0/tests/test_orchestrator.py +313 -0
- nvhive-0.1.0/tests/test_providers.py +123 -0
- nvhive-0.1.0/tests/test_router.py +53 -0
- nvhive-0.1.0/tests/test_tools.py +53 -0
nvhive-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 NVHive Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
nvhive-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,660 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nvhive
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: NVHive — Multi-LLM orchestration platform with intelligent routing, hive consensus, and auto-agent generation
|
|
5
|
+
Author: NVHive Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/thatcooperguy/nvHive
|
|
8
|
+
Project-URL: Repository, https://github.com/thatcooperguy/nvHive
|
|
9
|
+
Project-URL: Issues, https://github.com/thatcooperguy/nvHive/issues
|
|
10
|
+
Keywords: llm,ai,nvidia,gpu,orchestration,multi-model,agents,ollama,nemotron
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Education
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
18
|
+
Classifier: Operating System :: MacOS
|
|
19
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Requires-Python: >=3.11
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: typer[all]>=0.12
|
|
28
|
+
Requires-Dist: litellm>=1.40
|
|
29
|
+
Requires-Dist: pydantic>=2.0
|
|
30
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
31
|
+
Requires-Dist: sqlalchemy[asyncio]>=2.0
|
|
32
|
+
Requires-Dist: aiosqlite>=0.20
|
|
33
|
+
Requires-Dist: keyring>=25.0
|
|
34
|
+
Requires-Dist: rich>=13.0
|
|
35
|
+
Requires-Dist: httpx>=0.27
|
|
36
|
+
Requires-Dist: pyyaml>=6.0
|
|
37
|
+
Requires-Dist: tiktoken>=0.7
|
|
38
|
+
Requires-Dist: anyio>=4.0
|
|
39
|
+
Requires-Dist: fastapi>=0.115
|
|
40
|
+
Requires-Dist: uvicorn[standard]>=0.30
|
|
41
|
+
Requires-Dist: passlib[bcrypt]>=1.7
|
|
42
|
+
Requires-Dist: nvidia-ml-py3>=7.352.0
|
|
43
|
+
Provides-Extra: mcp
|
|
44
|
+
Requires-Dist: mcp[cli]>=1.0; extra == "mcp"
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
47
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
49
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
50
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
51
|
+
Requires-Dist: build>=1.0; extra == "dev"
|
|
52
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
53
|
+
Dynamic: license-file
|
|
54
|
+
|
|
55
|
+
# nvHive
|
|
56
|
+
|
|
57
|
+
**Multi-LLM orchestration platform for NVIDIA GPUs and the cloud.**
|
|
58
|
+
|
|
59
|
+
     
|
|
60
|
+
|
|
61
|
+
## What is nvHive?
|
|
62
|
+
|
|
63
|
+
nvHive routes your questions to the right AI model automatically. It manages 22 providers and 63 models behind a single `nvh` command, picking the best advisor based on task type, cost, and privacy requirements. Simple questions stay local on your GPU (free, private). Complex questions route to the best cloud model. You can also convene a council of AI-generated expert personas to debate a decision, or poll every provider at once to compare answers. Twenty-five models are free with no credit card required.
|
|
64
|
+
|
|
65
|
+
## Platform Support
|
|
66
|
+
|
|
67
|
+
| Platform | GPU Support | Install |
|
|
68
|
+
|----------|-------------|---------|
|
|
69
|
+
| Linux (NVIDIA GPU) | Full (CUDA, pynvml) | `install.sh` or pip |
|
|
70
|
+
| macOS (Apple Silicon) | Metal via Ollama | `install-mac.sh` or pip |
|
|
71
|
+
| macOS (Intel) | CPU only | `pip install nvhive` |
|
|
72
|
+
| Windows (NVIDIA GPU) | Full (CUDA, pynvml) | `install.ps1` or pip |
|
|
73
|
+
| Windows (no GPU) | CPU only | `pip install nvhive` |
|
|
74
|
+
| Linux Desktop | Full (auto-detected) | `install.sh` |
|
|
75
|
+
|
|
76
|
+
## Quick Start
|
|
77
|
+
|
|
78
|
+
**Linux with GPU:**
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
curl -fsSL https://raw.githubusercontent.com/thatcooperguy/nvHive/main/install.sh | bash
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**macOS:**
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
curl -fsSL https://raw.githubusercontent.com/thatcooperguy/nvHive/main/install-mac.sh | bash
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Windows (PowerShell):**
|
|
91
|
+
|
|
92
|
+
```powershell
|
|
93
|
+
iwr -useb https://raw.githubusercontent.com/thatcooperguy/nvHive/main/install.ps1 | iex
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**Any platform (pip):**
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
python3 -m pip install nvhive
|
|
100
|
+
nvh setup # configure your first provider
|
|
101
|
+
nvh "What is machine learning?"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**From source:**
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
git clone https://github.com/thatcooperguy/nvHive.git
|
|
108
|
+
cd nvHive
|
|
109
|
+
pip install -e ".[dev]"
|
|
110
|
+
nvh doctor # verify everything works
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## What Happens Automatically
|
|
114
|
+
|
|
115
|
+
When you install nvHive, everything configures itself:
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
Install runs:
|
|
119
|
+
1. Detects your GPU (NVIDIA via pynvml, Apple Silicon via sysctl)
|
|
120
|
+
2. Reads available VRAM / unified memory
|
|
121
|
+
3. Downloads the right NVIDIA Nemotron model for your hardware:
|
|
122
|
+
|
|
123
|
+
GPU Memory Model Auto-Downloaded Size Speed
|
|
124
|
+
─────────────────────────────────────────────────────────────
|
|
125
|
+
< 4 GB or CPU nemotron-mini (4B) ~2 GB ~30 tok/s
|
|
126
|
+
4–6 GB nemotron-mini (GPU accel.) ~2 GB ~50 tok/s
|
|
127
|
+
6–12 GB nemotron-small (recommended) ~5 GB ~75 tok/s
|
|
128
|
+
12–24 GB nemotron-small + codellama ~9 GB ~110 tok/s
|
|
129
|
+
24–48 GB nemotron 70B (quantized) ~40 GB ~40 tok/s
|
|
130
|
+
48–80 GB nemotron 70B (full quality) ~40 GB ~120 tok/s
|
|
131
|
+
80+ GB nemotron 120B (flagship) ~70 GB ~180 tok/s
|
|
132
|
+
|
|
133
|
+
4. Installs Ollama (local model server) — no root needed
|
|
134
|
+
5. Creates config with Ollama + LLM7 (anonymous, free) enabled
|
|
135
|
+
6. Pulls model in background — you can start chatting immediately
|
|
136
|
+
7. Adds 'nvh' to your PATH
|
|
137
|
+
|
|
138
|
+
First time: ~60 seconds. Reconnect (new VM): ~3 seconds.
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**You never pick a model.** The platform reads your hardware and downloads the best one. On Apple Silicon, it uses Metal via Ollama with unified memory. On NVIDIA, it uses CUDA. On CPU-only systems, it uses free cloud providers.
|
|
142
|
+
|
|
143
|
+
## Your First 60 Seconds
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
$ nvh "Explain Python decorators in 3 sentences"
|
|
147
|
+
╭─ nemotron-small (local, free) ──────────────────────────────────────╮
|
|
148
|
+
│ A decorator is a function that takes another function as input and │
|
|
149
|
+
│ returns a modified version of it. You apply one with @decorator │
|
|
150
|
+
│ syntax above a function definition. They're used for cross-cutting │
|
|
151
|
+
│ concerns like logging, caching, and access control without │
|
|
152
|
+
│ modifying the original function's code. │
|
|
153
|
+
╰─────────────────────────────────────── 0.4s · 52 tokens · $0.00 ───╯
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
No API keys needed for your first query -- nvHive defaults to free local or anonymous providers. Run `nvh setup` to add more providers when you are ready.
|
|
157
|
+
|
|
158
|
+
## Core Commands
|
|
159
|
+
|
|
160
|
+
### Essentials
|
|
161
|
+
|
|
162
|
+
| Command | Description |
|
|
163
|
+
|---------|-------------|
|
|
164
|
+
| `nvh "question"` | Smart default -- routes to the best available advisor |
|
|
165
|
+
| `nvh ask "question"` | Ask a specific advisor (use `-a provider`) |
|
|
166
|
+
| `nvh convene "question"` | Convene a council of AI-generated expert agents |
|
|
167
|
+
| `nvh poll "question"` | Ask every configured advisor, compare answers |
|
|
168
|
+
| `nvh throwdown "question"` | Two-pass deep analysis across all providers |
|
|
169
|
+
| `nvh quick "question"` | Fastest available model, minimal latency |
|
|
170
|
+
| `nvh safe "question"` | Local models only -- nothing leaves your machine |
|
|
171
|
+
| `nvh do "task"` | Detect action intent and execute (install, open, find) |
|
|
172
|
+
|
|
173
|
+
### Focus Modes
|
|
174
|
+
|
|
175
|
+
| Command | Description |
|
|
176
|
+
|---------|-------------|
|
|
177
|
+
| `nvh code "question"` | Code-optimized routing and prompts |
|
|
178
|
+
| `nvh write "question"` | Writing-optimized with style guidance |
|
|
179
|
+
| `nvh research "question"` | Multi-source research with citations |
|
|
180
|
+
| `nvh math "question"` | Math and reasoning, step-by-step |
|
|
181
|
+
|
|
182
|
+
### Tools
|
|
183
|
+
|
|
184
|
+
| Command | Description |
|
|
185
|
+
|---------|-------------|
|
|
186
|
+
| `nvh bench` | GPU benchmark -- measure tokens/second |
|
|
187
|
+
| `nvh scan` | Scan and index project files |
|
|
188
|
+
| `nvh learn "topic"` | Interactive learning sessions |
|
|
189
|
+
| `nvh clip` | Clipboard integration |
|
|
190
|
+
| `nvh voice` | Voice input/output |
|
|
191
|
+
| `nvh imagine "prompt"` | Image generation |
|
|
192
|
+
| `nvh screenshot` | Capture and analyze screenshots |
|
|
193
|
+
| `nvh git` | Git-aware operations |
|
|
194
|
+
|
|
195
|
+
### System
|
|
196
|
+
|
|
197
|
+
| Command | Description |
|
|
198
|
+
|---------|-------------|
|
|
199
|
+
| `nvh status` | Show configured providers, GPU, active model |
|
|
200
|
+
| `nvh savings` | Track how much you have saved with free/local models |
|
|
201
|
+
| `nvh debug` | Debug mode with verbose output |
|
|
202
|
+
| `nvh doctor` | Diagnose configuration and connectivity |
|
|
203
|
+
| `nvh setup` | Interactive provider setup wizard |
|
|
204
|
+
| `nvh keys` | Show all free API key signup links in one table |
|
|
205
|
+
| `nvh keys --open` | Open all free provider signup pages in browser |
|
|
206
|
+
| `nvh webui` | Install and launch the web UI (optional) |
|
|
207
|
+
| `nvh update` | Check for and install updates |
|
|
208
|
+
| `nvh version` | Print version |
|
|
209
|
+
| `nvh mcp` | Start MCP server (Claude Code, Cursor, OpenClaw) |
|
|
210
|
+
| `nvh openclaw` | Generate OpenClaw/NemoClaw tool config |
|
|
211
|
+
| `nvh nemoclaw` | NemoClaw integration setup guide |
|
|
212
|
+
| `nvh nemoclaw --test` | Test NemoClaw proxy connectivity |
|
|
213
|
+
| `nvh nemoclaw --start` | Start proxy server for NemoClaw |
|
|
214
|
+
|
|
215
|
+
### Management
|
|
216
|
+
|
|
217
|
+
| Command | Description |
|
|
218
|
+
|---------|-------------|
|
|
219
|
+
| `nvh advisor` | Manage advisor profiles and routing weights |
|
|
220
|
+
| `nvh agent` | Manage auto-generated expert agents and cabinets |
|
|
221
|
+
| `nvh config` | View and edit configuration |
|
|
222
|
+
| `nvh conversation` | List, export, or resume conversations |
|
|
223
|
+
| `nvh budget` | Set and monitor spending limits |
|
|
224
|
+
| `nvh model` | List, pull, or remove models |
|
|
225
|
+
| `nvh template` | Manage prompt templates |
|
|
226
|
+
| `nvh workflow` | Run multi-step YAML pipelines |
|
|
227
|
+
| `nvh knowledge` | Manage knowledge base entries |
|
|
228
|
+
| `nvh schedule` | Schedule recurring queries |
|
|
229
|
+
| `nvh webhook` | Configure webhook integrations |
|
|
230
|
+
| `nvh auth` | Manage API keys and authentication |
|
|
231
|
+
| `nvh plugins` | Install and manage plugins |
|
|
232
|
+
| `nvh serve` | Start the OpenAI-compatible API server |
|
|
233
|
+
| `nvh repl` | Launch interactive REPL |
|
|
234
|
+
| `nvh completions` | Generate shell completions |
|
|
235
|
+
|
|
236
|
+
### Direct Advisor Access
|
|
237
|
+
|
|
238
|
+
Skip the router and talk directly to a provider:
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
nvh openai "question" # Route to OpenAI
|
|
242
|
+
nvh groq "question" # Route to Groq
|
|
243
|
+
nvh google "question" # Route to Gemini
|
|
244
|
+
nvh ollama "question" # Route to local Ollama
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Works for all 22 providers. Run `nvh <provider>` with no question to launch that provider's setup.
|
|
248
|
+
|
|
249
|
+
## How It Works
|
|
250
|
+
|
|
251
|
+
1. You type a question: `nvh "Should I use Redis or Postgres for sessions?"`
|
|
252
|
+
2. The **action detector** checks if this is a system action (install, open, find). If so, it executes directly -- no LLM needed.
|
|
253
|
+
3. If it is a question, the **router** classifies the task type, scores all configured advisors on relevance, cost, and speed, and picks the best one.
|
|
254
|
+
4. **Local-first**: simple queries stay on Nemotron via Ollama (free, private, no network).
|
|
255
|
+
5. **Cloud when needed**: complex or specialized queries route to the best cloud advisor.
|
|
256
|
+
|
|
257
|
+
Every response shows which advisor answered, how long it took, and what it cost.
|
|
258
|
+
|
|
259
|
+
## Local LLM Orchestration
|
|
260
|
+
|
|
261
|
+
The local Nemotron model doesn't just answer questions — it acts as an intelligent brain that orchestrates every cloud LLM call. All orchestration runs on your GPU for free.
|
|
262
|
+
|
|
263
|
+
### The Orchestrator's Role
|
|
264
|
+
|
|
265
|
+
When you ask a question, before any cloud API is called, the local model:
|
|
266
|
+
|
|
267
|
+
1. **Analyzes your query** — detects task type, complexity, privacy needs, and whether web access or code execution is required.
|
|
268
|
+
2. **Picks the best advisor** — goes beyond keyword matching to understand intent and route to the right cloud model.
|
|
269
|
+
3. **Rewrites your prompt** — optimizes wording for the target advisor's known strengths, reducing tokens and improving answer quality.
|
|
270
|
+
4. **Evaluates the response** — checks if the answer is complete and correct, and flags it for retry if not.
|
|
271
|
+
5. **Synthesizes locally** — when multiple advisors respond, merges their answers on your GPU instead of paying a cloud model to do it.
|
|
272
|
+
6. **Compresses conversation history** — summarizes long chats before sending context to cloud APIs, cutting token costs.
|
|
273
|
+
|
|
274
|
+
### Tiers
|
|
275
|
+
|
|
276
|
+
Orchestration scales automatically based on your GPU's available VRAM:
|
|
277
|
+
|
|
278
|
+
| Tier | VRAM Required | Features |
|
|
279
|
+
|------|--------------|----------|
|
|
280
|
+
| `off` | Any | Keyword routing, template agents (fallback mode) |
|
|
281
|
+
| `light` | 6 GB+ | Smart routing + prompt optimization |
|
|
282
|
+
| `full` | 20 GB+ | All features: routing, agents, eval, synthesis, compression |
|
|
283
|
+
| `auto` | — | Detects tier from available VRAM (default) |
|
|
284
|
+
|
|
285
|
+
With `auto` (the default), nvHive reads your GPU VRAM at startup and enables the highest tier your hardware supports. If no local model is available, the engine falls back gracefully to keyword-based routing — no errors, no configuration needed.
|
|
286
|
+
|
|
287
|
+
### Enabling and Disabling
|
|
288
|
+
|
|
289
|
+
```bash
|
|
290
|
+
# Show current orchestration mode
|
|
291
|
+
nvh config get defaults.orchestration_mode
|
|
292
|
+
|
|
293
|
+
# Disable orchestration (keyword routing only)
|
|
294
|
+
nvh config set defaults.orchestration_mode off
|
|
295
|
+
|
|
296
|
+
# Enable light mode (smart routing + prompt optimization)
|
|
297
|
+
nvh config set defaults.orchestration_mode light
|
|
298
|
+
|
|
299
|
+
# Enable full mode (all features)
|
|
300
|
+
nvh config set defaults.orchestration_mode full
|
|
301
|
+
|
|
302
|
+
# Auto-detect from VRAM (default)
|
|
303
|
+
nvh config set defaults.orchestration_mode auto
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
### Cost Impact
|
|
307
|
+
|
|
308
|
+
Every orchestration call runs on your local GPU — it costs nothing. The savings come indirectly:
|
|
309
|
+
|
|
310
|
+
- **Better routing** reduces expensive cloud calls by sending more queries to cheaper or local models.
|
|
311
|
+
- **Prompt optimization** sends fewer tokens to cloud APIs, directly reducing per-query cost.
|
|
312
|
+
- **Response evaluation** catches bad answers before you need to re-ask, avoiding retry costs.
|
|
313
|
+
- **Local synthesis** replaces cloud synthesis calls (the most expensive part of council mode) with free local inference.
|
|
314
|
+
|
|
315
|
+
## Supported AI Providers
|
|
316
|
+
|
|
317
|
+
| Provider | Free Tier | Best For | Models |
|
|
318
|
+
|----------|-----------|----------|--------|
|
|
319
|
+
| Ollama (Local) | Unlimited | Privacy, offline | nemotron, codellama, llama3 |
|
|
320
|
+
| LLM7 | 30 RPM, no signup | Anonymous, instant start | Multiple |
|
|
321
|
+
| Groq | 30 RPM free | Ultra-fast inference | llama3, mixtral, gemma |
|
|
322
|
+
| GitHub Models | 50-150 req/day | Free frontier models | GPT-4o, Llama, Mistral |
|
|
323
|
+
| Google Gemini | 15 RPM free | Long context, multimodal | Gemini 1.5 Pro/Flash |
|
|
324
|
+
| NVIDIA NIM | 1000 free credits | NVIDIA-optimized | Nemotron, Llama |
|
|
325
|
+
| Cerebras | 30 RPM free | Fast inference | Llama3 |
|
|
326
|
+
| SambaNova | Free tier | Llama models | Llama3 |
|
|
327
|
+
| Fireworks AI | Free tier | Fast open-source | Multiple |
|
|
328
|
+
| SiliconFlow | 1000 RPM free | High-throughput | Multiple |
|
|
329
|
+
| Hugging Face | Free API | Open-source models | Thousands |
|
|
330
|
+
| AI21 Labs | Free tier | Jamba models | Jamba |
|
|
331
|
+
| Mistral | 2 RPM free | Code | Mistral, Mixtral |
|
|
332
|
+
| Cohere | Trial key | RAG, embeddings | Command R+ |
|
|
333
|
+
| OpenAI | Paid | GPT-4o, reasoning | GPT-4o, o1, o3 |
|
|
334
|
+
| Anthropic | Paid | Analysis, coding | Claude 3.5/4 |
|
|
335
|
+
| DeepSeek | Very cheap | Code, reasoning | DeepSeek V3/R1 |
|
|
336
|
+
| Grok (xAI) | Paid | Real-time knowledge | Grok |
|
|
337
|
+
| Perplexity | Paid | Search-augmented | pplx-online |
|
|
338
|
+
| Together AI | Paid | Open-source models | Multiple |
|
|
339
|
+
| OpenRouter | Paid | Meta-router, fallback | All models |
|
|
340
|
+
| Mock | N/A | Unit tests | N/A |
|
|
341
|
+
|
|
342
|
+
**25 models are free** across 14 providers. Run `nvh setup` to configure any of them.
|
|
343
|
+
|
|
344
|
+
## GPU-Adaptive Model Selection
|
|
345
|
+
|
|
346
|
+
nvHive detects your GPU and automatically selects the best local model:
|
|
347
|
+
|
|
348
|
+
| GPU | VRAM | Best Local Model | Performance |
|
|
349
|
+
|-----|------|-------------------|-------------|
|
|
350
|
+
| No GPU | -- | Cloud only | Free tiers: LLM7, Groq, GitHub Models |
|
|
351
|
+
| GTX 1660 / RTX 2060 | 6 GB | nemotron-mini (4B) | ~30 tok/s |
|
|
352
|
+
| RTX 3060 | 12 GB | nemotron-small | ~55 tok/s |
|
|
353
|
+
| RTX 3070 / 3080 | 8-10 GB | nemotron-small | ~75 tok/s |
|
|
354
|
+
| RTX 3090 | 24 GB | nemotron-small + codellama | ~100 tok/s |
|
|
355
|
+
| RTX 4060 | 8 GB | nemotron-small | ~70 tok/s |
|
|
356
|
+
| RTX 4070 | 12 GB | nemotron-small | ~90 tok/s |
|
|
357
|
+
| RTX 4080 | 16 GB | nemotron-small + models | ~130 tok/s |
|
|
358
|
+
| RTX 4090 | 24 GB | nemotron 70B (Q4) | ~40 tok/s (70B) |
|
|
359
|
+
| RTX 5090 | 32 GB | nemotron 70B (Q4) | ~60 tok/s (70B) |
|
|
360
|
+
| A100 / H100 | 80 GB | nemotron 70B (full) | ~120-180 tok/s |
|
|
361
|
+
|
|
362
|
+
Models unload after inactivity to free VRAM for gaming. Run `nvh bench` to measure your actual throughput.
|
|
363
|
+
|
|
364
|
+
## Auto-Agent Council System
|
|
365
|
+
|
|
366
|
+
When you run `nvh convene`, nvHive analyzes your question and generates a panel of expert personas to debate it. Each agent has a defined role, expertise area, and analytical perspective.
|
|
367
|
+
|
|
368
|
+
**12 cabinets** with pre-configured expert panels:
|
|
369
|
+
|
|
370
|
+
| Cabinet | Experts |
|
|
371
|
+
|---------|---------|
|
|
372
|
+
| `executive` | CEO, CFO, CTO, Product Manager |
|
|
373
|
+
| `engineering` | Architect, Backend Engineer, DevOps/SRE, Security, QA |
|
|
374
|
+
| `security_review` | Security Engineer, DevOps/SRE, Architect, Legal/Compliance |
|
|
375
|
+
| `code_review` | Architect, Backend Engineer, QA, Performance Engineer |
|
|
376
|
+
| `product` | Product Manager, UX Designer, Engineering Manager, CEO |
|
|
377
|
+
| `data` | Data Engineer, DBA, ML/AI Engineer, Architect |
|
|
378
|
+
| `full_board` | CEO, CFO, CTO, Architect, Backend, DevOps, Security |
|
|
379
|
+
| `homework_help` | Patient Tutor, Devil's Advocate, Study Coach |
|
|
380
|
+
| `code_tutor` | Code Mentor, Bug Hunter, Best Practices Reviewer |
|
|
381
|
+
| `essay_review` | Writing Coach, Logic Checker, Style Editor |
|
|
382
|
+
| `study_group` | Socratic Questioner, ELI5 Explainer, Practice Problem Generator |
|
|
383
|
+
| `exam_prep` | Exam Coach, Flashcard Creator, Weak Spot Finder |
|
|
384
|
+
|
|
385
|
+
```bash
|
|
386
|
+
nvh convene "Should we migrate to microservices?" --cabinet engineering
|
|
387
|
+
nvh convene "Review my essay on climate policy" --cabinet essay_review
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
## Tool System
|
|
391
|
+
|
|
392
|
+
27 tools across six categories. 18 safe tools run automatically; 9 that modify state require confirmation.
|
|
393
|
+
|
|
394
|
+
| Category | Tools |
|
|
395
|
+
|----------|-------|
|
|
396
|
+
| **Files** | `read_file`, `write_file`, `list_files`, `search_files` |
|
|
397
|
+
| **Code** | `run_code`, `shell` |
|
|
398
|
+
| **System** | `list_processes`, `system_info`, `disk_usage`, `open_app`, `open_url` |
|
|
399
|
+
| **Packages** | `pip_install`, `pip_list`, `npm_install` |
|
|
400
|
+
| **Web** | `download`, `web_search` |
|
|
401
|
+
| **Clipboard** | `get_clipboard`, `set_clipboard` |
|
|
402
|
+
| **Notifications** | `notify` |
|
|
403
|
+
|
|
404
|
+
Enable tools per query with `--tools` or globally in the REPL with `/tools on`.
|
|
405
|
+
|
|
406
|
+
## Privacy and Safe Mode
|
|
407
|
+
|
|
408
|
+
Three privacy tiers:
|
|
409
|
+
|
|
410
|
+
- **Safe mode** (`nvh safe`): Local models only. Nothing leaves your machine. Use for sensitive data, salary info, proprietary code.
|
|
411
|
+
- **Local default**: Simple queries use local Ollama. Complex queries route to cloud with your consent.
|
|
412
|
+
- **Cloud**: Full access to all configured providers for maximum capability.
|
|
413
|
+
|
|
414
|
+
```bash
|
|
415
|
+
nvh safe "Analyze this salary spreadsheet" # stays 100% local
|
|
416
|
+
nvh "Explain quantum computing" # may route to cloud
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
## HIVE.md Context Injection
|
|
420
|
+
|
|
421
|
+
Create a `HIVE.md` file in any project directory. nvHive automatically injects it into the system prompt for every query made from that directory.
|
|
422
|
+
|
|
423
|
+
```markdown
|
|
424
|
+
# HIVE.md
|
|
425
|
+
This is a Python 3.12 FastAPI project using SQLAlchemy and PostgreSQL.
|
|
426
|
+
Follow Google Python Style Guide. Prefer async/await patterns.
|
|
427
|
+
Test with pytest. Deploy target: Ubuntu 22.04 on GKE.
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
Every advisor sees your project context automatically.
|
|
431
|
+
|
|
432
|
+
## Python SDK
|
|
433
|
+
|
|
434
|
+
```python
|
|
435
|
+
from nvh import ask, convene, poll, safe, quick
|
|
436
|
+
|
|
437
|
+
# Simple query
|
|
438
|
+
response = await ask("What is machine learning?")
|
|
439
|
+
|
|
440
|
+
# Specific advisor
|
|
441
|
+
response = await ask("Debug this code", advisor="anthropic")
|
|
442
|
+
|
|
443
|
+
# Council of experts
|
|
444
|
+
result = await convene("Should we use Rust?", cabinet="engineering")
|
|
445
|
+
|
|
446
|
+
# Poll all advisors
|
|
447
|
+
results = await poll("Write a sort function")
|
|
448
|
+
|
|
449
|
+
# Local only
|
|
450
|
+
response = await safe("Analyze my salary data")
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
Synchronous versions available: `ask_sync`, `convene_sync`.
|
|
454
|
+
|
|
455
|
+
## OpenAI-Compatible Proxy
|
|
456
|
+
|
|
457
|
+
Run nvHive as a drop-in backend for any tool that speaks the OpenAI API:
|
|
458
|
+
|
|
459
|
+
```bash
|
|
460
|
+
nvh serve --port 8000
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
Then point any OpenAI SDK client at `http://localhost:8000`:
|
|
464
|
+
|
|
465
|
+
```python
|
|
466
|
+
from openai import OpenAI
|
|
467
|
+
client = OpenAI(base_url="http://localhost:8000/v1", api_key="nvhive")
|
|
468
|
+
response = client.chat.completions.create(
|
|
469
|
+
model="auto", # nvHive picks the best model
|
|
470
|
+
messages=[{"role": "user", "content": "Hello"}]
|
|
471
|
+
)
|
|
472
|
+
```
|
|
473
|
+
|
|
474
|
+
## MCP Server (Claude Code, Cursor, OpenClaw)
|
|
475
|
+
|
|
476
|
+
nvHive exposes its tools via the [Model Context Protocol](https://modelcontextprotocol.io/), making them available to Claude Code, Cursor, OpenClaw, and any MCP-compatible client.
|
|
477
|
+
|
|
478
|
+
```bash
|
|
479
|
+
# Install MCP support
|
|
480
|
+
pip install "nvhive[mcp]"
|
|
481
|
+
|
|
482
|
+
# Register with Claude Code
|
|
483
|
+
claude mcp add nvhive nvh mcp
|
|
484
|
+
|
|
485
|
+
# Or start as HTTP server for remote clients
|
|
486
|
+
nvh mcp -t streamable-http --port 8080
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
Tools available via MCP: `ask`, `ask_safe`, `council`, `throwdown`, `status`, `list_advisors`, `list_cabinets`.
|
|
490
|
+
|
|
491
|
+
For OpenClaw agents, generate the config:
|
|
492
|
+
|
|
493
|
+
```bash
|
|
494
|
+
nvh openclaw # creates openclaw.json with nvHive MCP config
|
|
495
|
+
nvh openclaw --agent # generates NemoClaw agent config
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
## NemoClaw Integration
|
|
499
|
+
|
|
500
|
+
nvHive works as an inference provider inside [NVIDIA NemoClaw](https://github.com/NVIDIA/NemoClaw), giving NemoClaw agents access to multi-model smart routing, council consensus, and throwdown analysis.
|
|
501
|
+
|
|
502
|
+
```bash
|
|
503
|
+
# Setup in three commands:
|
|
504
|
+
nvh nemoclaw --start # 1. Start nvHive proxy
|
|
505
|
+
openshell provider create \ # 2. Register with NemoClaw
|
|
506
|
+
--name nvhive --type openai \
|
|
507
|
+
--credential OPENAI_API_KEY=nvhive \
|
|
508
|
+
--config OPENAI_BASE_URL=http://host.openshell.internal:8000/v1/proxy
|
|
509
|
+
openshell inference set \ # 3. Set as default
|
|
510
|
+
--provider nvhive --model auto
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
NemoClaw agents can request any virtual model:
|
|
514
|
+
|
|
515
|
+
| Model | What It Does |
|
|
516
|
+
|-------|-------------|
|
|
517
|
+
| `auto` | Smart routing — best provider for the query |
|
|
518
|
+
| `safe` | Local only — nothing leaves your machine |
|
|
519
|
+
| `council` | 3-model consensus with synthesis |
|
|
520
|
+
| `council:N` | N-model council (2-10 members) |
|
|
521
|
+
| `throwdown` | Two-pass deep analysis with critique |
|
|
522
|
+
|
|
523
|
+
Privacy-aware routing: set `x-nvhive-privacy: local-only` header to force all inference through local Ollama, integrating with NemoClaw's content sensitivity routing.
|
|
524
|
+
|
|
525
|
+
```
|
|
526
|
+
NemoClaw Sandbox → OpenShell Gateway → nvHive Proxy → 22 providers
|
|
527
|
+
↓
|
|
528
|
+
Smart Router / Council / Throwdown
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
Run `nvh nemoclaw` for the full setup guide, or `nvh nemoclaw --test` to verify connectivity.
|
|
532
|
+
|
|
533
|
+
## Configuration
|
|
534
|
+
|
|
535
|
+
Configuration lives at `~/.config/nvhive/config.yaml`. Manage it with:
|
|
536
|
+
|
|
537
|
+
```bash
|
|
538
|
+
nvh config # view current config
|
|
539
|
+
nvh config set default_advisor groq
|
|
540
|
+
nvh config set safe_mode true
|
|
541
|
+
nvh budget set --daily 1.00 # daily spending cap
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
## Workflows
|
|
545
|
+
|
|
546
|
+
Define multi-step pipelines in YAML:
|
|
547
|
+
|
|
548
|
+
```yaml
|
|
549
|
+
name: Code Review Pipeline
|
|
550
|
+
steps:
|
|
551
|
+
- name: security_scan
|
|
552
|
+
action: ask
|
|
553
|
+
prompt: "Analyze for security vulnerabilities:\n\n{{input}}"
|
|
554
|
+
advisor: anthropic
|
|
555
|
+
save_as: security
|
|
556
|
+
|
|
557
|
+
- name: quality_review
|
|
558
|
+
action: ask
|
|
559
|
+
prompt: "Review for quality and best practices:\n\n{{input}}"
|
|
560
|
+
advisor: openai
|
|
561
|
+
save_as: quality
|
|
562
|
+
|
|
563
|
+
- name: synthesis
|
|
564
|
+
action: convene
|
|
565
|
+
prompt: "Synthesize findings:\n\nSecurity: {{security}}\nQuality: {{quality}}"
|
|
566
|
+
cabinet: code_review
|
|
567
|
+
save_as: summary
|
|
568
|
+
```
|
|
569
|
+
|
|
570
|
+
```bash
|
|
571
|
+
nvh workflow run code_review.yaml --input "$(cat main.py)"
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
## For Students
|
|
575
|
+
|
|
576
|
+
nvHive was built with students in mind. Five dedicated cabinets teach rather than just answer:
|
|
577
|
+
|
|
578
|
+
- **homework_help** -- Patient Tutor, Devil's Advocate, and Study Coach guide you to understanding
|
|
579
|
+
- **code_tutor** -- Code Mentor, Bug Hunter, and Best Practices Reviewer teach programming
|
|
580
|
+
- **essay_review** -- Writing Coach, Logic Checker, and Style Editor improve your writing
|
|
581
|
+
- **study_group** -- Socratic Questioner, ELI5 Explainer, and Practice Problem Generator
|
|
582
|
+
- **exam_prep** -- Exam Coach, Flashcard Creator, and Weak Spot Finder
|
|
583
|
+
|
|
584
|
+
All work with free models. Track your savings with `nvh savings`.
|
|
585
|
+
|
|
586
|
+
```bash
|
|
587
|
+
nvh convene "Explain recursion step by step" --cabinet code_tutor
|
|
588
|
+
nvh convene "Help me prepare for my calculus final" --cabinet exam_prep
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
## For Linux Desktop
|
|
592
|
+
|
|
593
|
+
nvHive is designed for deployment on Linux Desktop instances:
|
|
594
|
+
|
|
595
|
+
- Auto-detects cloud sessions and adapts to the available GPU tier
|
|
596
|
+
- All tools operate at user level -- no root, no sudo
|
|
597
|
+
- Session-aware: handles ephemeral environments with mounted home directories
|
|
598
|
+
- Auto-healing: reconnects to Ollama if the instance restarts
|
|
599
|
+
- GPU VRAM management: models unload after inactivity so games can reclaim VRAM
|
|
600
|
+
|
|
601
|
+
## Architecture
|
|
602
|
+
|
|
603
|
+
```
|
|
604
|
+
nvh CLI
|
|
605
|
+
|
|
|
606
|
+
+-- Action Detector -----> Direct execution (install, open, find)
|
|
607
|
+
|
|
|
608
|
+
+-- Router
|
|
609
|
+
| |-- Task classifier (code, writing, research, math, general)
|
|
610
|
+
| |-- Advisor scorer (relevance, cost, speed, privacy)
|
|
611
|
+
| +-- Model selector (GPU VRAM, provider availability)
|
|
612
|
+
|
|
|
613
|
+
+-- Providers (22)
|
|
614
|
+
| |-- Local: Ollama (Nemotron, CodeLlama, Llama3)
|
|
615
|
+
| |-- Cloud: OpenAI, Anthropic, Google, Groq, ...
|
|
616
|
+
| +-- Free: LLM7, GitHub Models, NVIDIA NIM, ...
|
|
617
|
+
|
|
|
618
|
+
+-- Agent System
|
|
619
|
+
| |-- Auto-generation from query analysis
|
|
620
|
+
| +-- 12 pre-built cabinets (22 expert personas)
|
|
621
|
+
|
|
|
622
|
+
+-- Tool System (27 tools, 18 safe / 9 confirm)
|
|
623
|
+
|
|
|
624
|
+
+-- SDK + OpenAI-compatible API server
|
|
625
|
+
```
|
|
626
|
+
|
|
627
|
+
## Project Stats
|
|
628
|
+
|
|
629
|
+
| Metric | Value |
|
|
630
|
+
|--------|-------|
|
|
631
|
+
| Python files | 81 |
|
|
632
|
+
| Lines of code | 27,518 |
|
|
633
|
+
| Functions | 810 |
|
|
634
|
+
| Tests | 181 |
|
|
635
|
+
| Providers | 22 |
|
|
636
|
+
| Models | 63 (25 free) |
|
|
637
|
+
| Tools | 27 (18 safe, 9 confirm) |
|
|
638
|
+
| Cabinets | 12 |
|
|
639
|
+
| Expert personas | 22 |
|
|
640
|
+
| Wheel size | 276 KB |
|
|
641
|
+
| Commits | 42 |
|
|
642
|
+
|
|
643
|
+
## Documentation
|
|
644
|
+
|
|
645
|
+
| Document | Description |
|
|
646
|
+
|----------|-------------|
|
|
647
|
+
| [Getting Started](docs/GETTING_STARTED.md) | First-time setup and usage guide |
|
|
648
|
+
| [Hardware Requirements](docs/HARDWARE.md) | GPU tiers, VRAM mapping, performance |
|
|
649
|
+
| [Testing Guide](docs/TESTING_GUIDE.md) | Running and writing tests |
|
|
650
|
+
| [EULA](EULA.md) | End User License Agreement |
|
|
651
|
+
| [Privacy Policy](PRIVACY.md) | Data handling and privacy |
|
|
652
|
+
| [Changelog](CHANGELOG.md) | Version history |
|
|
653
|
+
|
|
654
|
+
## Contributing
|
|
655
|
+
|
|
656
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, coding standards, and pull request guidelines.
|
|
657
|
+
|
|
658
|
+
## License
|
|
659
|
+
|
|
660
|
+
MIT License. See [LICENSE](LICENSE) for details.
|