nvhive 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. nvhive-0.1.0/LICENSE +21 -0
  2. nvhive-0.1.0/PKG-INFO +660 -0
  3. nvhive-0.1.0/README.md +606 -0
  4. nvhive-0.1.0/nvh/__init__.py +23 -0
  5. nvhive-0.1.0/nvh/api/__init__.py +0 -0
  6. nvhive-0.1.0/nvh/api/proxy.py +528 -0
  7. nvhive-0.1.0/nvh/api/server.py +2573 -0
  8. nvhive-0.1.0/nvh/auth/__init__.py +0 -0
  9. nvhive-0.1.0/nvh/auth/auth.py +190 -0
  10. nvhive-0.1.0/nvh/auth/models.py +57 -0
  11. nvhive-0.1.0/nvh/cli/__init__.py +0 -0
  12. nvhive-0.1.0/nvh/cli/completions.py +130 -0
  13. nvhive-0.1.0/nvh/cli/conversations.py +403 -0
  14. nvhive-0.1.0/nvh/cli/main.py +6238 -0
  15. nvhive-0.1.0/nvh/cli/repl.py +930 -0
  16. nvhive-0.1.0/nvh/config/__init__.py +0 -0
  17. nvhive-0.1.0/nvh/config/capabilities.yaml +1795 -0
  18. nvhive-0.1.0/nvh/config/nemoclaw-blueprint.yaml +97 -0
  19. nvhive-0.1.0/nvh/config/settings.py +465 -0
  20. nvhive-0.1.0/nvh/core/__init__.py +0 -0
  21. nvhive-0.1.0/nvh/core/action_detector.py +284 -0
  22. nvhive-0.1.0/nvh/core/advisor_profiles.py +908 -0
  23. nvhive-0.1.0/nvh/core/agent_loop.py +279 -0
  24. nvhive-0.1.0/nvh/core/agents.py +556 -0
  25. nvhive-0.1.0/nvh/core/benchmark.py +196 -0
  26. nvhive-0.1.0/nvh/core/context.py +128 -0
  27. nvhive-0.1.0/nvh/core/context_files.py +229 -0
  28. nvhive-0.1.0/nvh/core/council.py +762 -0
  29. nvhive-0.1.0/nvh/core/engine.py +863 -0
  30. nvhive-0.1.0/nvh/core/file_lock.py +455 -0
  31. nvhive-0.1.0/nvh/core/free_tier.py +251 -0
  32. nvhive-0.1.0/nvh/core/hooks.py +117 -0
  33. nvhive-0.1.0/nvh/core/image_gen.py +121 -0
  34. nvhive-0.1.0/nvh/core/knowledge.py +222 -0
  35. nvhive-0.1.0/nvh/core/memory.py +172 -0
  36. nvhive-0.1.0/nvh/core/notify.py +52 -0
  37. nvhive-0.1.0/nvh/core/orchestrator.py +398 -0
  38. nvhive-0.1.0/nvh/core/rate_limiter.py +222 -0
  39. nvhive-0.1.0/nvh/core/router.py +459 -0
  40. nvhive-0.1.0/nvh/core/scheduler.py +106 -0
  41. nvhive-0.1.0/nvh/core/system_tools.py +918 -0
  42. nvhive-0.1.0/nvh/core/templates.py +310 -0
  43. nvhive-0.1.0/nvh/core/tools.py +507 -0
  44. nvhive-0.1.0/nvh/core/voice.py +199 -0
  45. nvhive-0.1.0/nvh/core/webhooks.py +260 -0
  46. nvhive-0.1.0/nvh/core/workflows.py +231 -0
  47. nvhive-0.1.0/nvh/integrations/__init__.py +0 -0
  48. nvhive-0.1.0/nvh/integrations/cloud_session.py +237 -0
  49. nvhive-0.1.0/nvh/integrations/openclaw.py +144 -0
  50. nvhive-0.1.0/nvh/mcp_server.py +338 -0
  51. nvhive-0.1.0/nvh/plugins/__init__.py +0 -0
  52. nvhive-0.1.0/nvh/plugins/manager.py +153 -0
  53. nvhive-0.1.0/nvh/providers/__init__.py +0 -0
  54. nvhive-0.1.0/nvh/providers/ai21_provider.py +219 -0
  55. nvhive-0.1.0/nvh/providers/anthropic_provider.py +199 -0
  56. nvhive-0.1.0/nvh/providers/base.py +215 -0
  57. nvhive-0.1.0/nvh/providers/cerebras_provider.py +220 -0
  58. nvhive-0.1.0/nvh/providers/cohere_provider.py +226 -0
  59. nvhive-0.1.0/nvh/providers/deepseek_provider.py +227 -0
  60. nvhive-0.1.0/nvh/providers/fireworks_provider.py +219 -0
  61. nvhive-0.1.0/nvh/providers/github_provider.py +258 -0
  62. nvhive-0.1.0/nvh/providers/google_provider.py +181 -0
  63. nvhive-0.1.0/nvh/providers/grok_provider.py +227 -0
  64. nvhive-0.1.0/nvh/providers/groq_provider.py +225 -0
  65. nvhive-0.1.0/nvh/providers/huggingface_provider.py +220 -0
  66. nvhive-0.1.0/nvh/providers/llm7_provider.py +274 -0
  67. nvhive-0.1.0/nvh/providers/mistral_provider.py +224 -0
  68. nvhive-0.1.0/nvh/providers/mock_provider.py +330 -0
  69. nvhive-0.1.0/nvh/providers/nvidia_provider.py +270 -0
  70. nvhive-0.1.0/nvh/providers/ollama_provider.py +217 -0
  71. nvhive-0.1.0/nvh/providers/openai_provider.py +312 -0
  72. nvhive-0.1.0/nvh/providers/openrouter_provider.py +221 -0
  73. nvhive-0.1.0/nvh/providers/perplexity_provider.py +219 -0
  74. nvhive-0.1.0/nvh/providers/registry.py +200 -0
  75. nvhive-0.1.0/nvh/providers/sambanova_provider.py +219 -0
  76. nvhive-0.1.0/nvh/providers/siliconflow_provider.py +258 -0
  77. nvhive-0.1.0/nvh/providers/together_provider.py +219 -0
  78. nvhive-0.1.0/nvh/sandbox/__init__.py +0 -0
  79. nvhive-0.1.0/nvh/sandbox/executor.py +239 -0
  80. nvhive-0.1.0/nvh/sdk.py +197 -0
  81. nvhive-0.1.0/nvh/storage/__init__.py +0 -0
  82. nvhive-0.1.0/nvh/storage/models.py +102 -0
  83. nvhive-0.1.0/nvh/storage/repository.py +447 -0
  84. nvhive-0.1.0/nvh/utils/__init__.py +0 -0
  85. nvhive-0.1.0/nvh/utils/environment.py +349 -0
  86. nvhive-0.1.0/nvh/utils/gpu.py +792 -0
  87. nvhive-0.1.0/nvh/utils/logging.py +45 -0
  88. nvhive-0.1.0/nvh/utils/sanitize.py +106 -0
  89. nvhive-0.1.0/nvh/utils/streaming.py +77 -0
  90. nvhive-0.1.0/nvh/workflows/code_review.yaml +20 -0
  91. nvhive-0.1.0/nvh/workflows/debug.yaml +20 -0
  92. nvhive-0.1.0/nvh/workflows/research.yaml +18 -0
  93. nvhive-0.1.0/nvhive.egg-info/PKG-INFO +660 -0
  94. nvhive-0.1.0/nvhive.egg-info/SOURCES.txt +111 -0
  95. nvhive-0.1.0/nvhive.egg-info/dependency_links.txt +1 -0
  96. nvhive-0.1.0/nvhive.egg-info/entry_points.txt +4 -0
  97. nvhive-0.1.0/nvhive.egg-info/requires.txt +28 -0
  98. nvhive-0.1.0/nvhive.egg-info/top_level.txt +1 -0
  99. nvhive-0.1.0/pyproject.toml +91 -0
  100. nvhive-0.1.0/setup.cfg +4 -0
  101. nvhive-0.1.0/tests/test_action_detector.py +118 -0
  102. nvhive-0.1.0/tests/test_agents.py +122 -0
  103. nvhive-0.1.0/tests/test_api.py +364 -0
  104. nvhive-0.1.0/tests/test_council.py +32 -0
  105. nvhive-0.1.0/tests/test_file_lock.py +140 -0
  106. nvhive-0.1.0/tests/test_integration.py +541 -0
  107. nvhive-0.1.0/tests/test_mcp_server.py +67 -0
  108. nvhive-0.1.0/tests/test_nemoclaw.py +109 -0
  109. nvhive-0.1.0/tests/test_openclaw.py +80 -0
  110. nvhive-0.1.0/tests/test_orchestrator.py +313 -0
  111. nvhive-0.1.0/tests/test_providers.py +123 -0
  112. nvhive-0.1.0/tests/test_router.py +53 -0
  113. nvhive-0.1.0/tests/test_tools.py +53 -0
nvhive-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 NVHive Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
nvhive-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,660 @@
1
+ Metadata-Version: 2.4
2
+ Name: nvhive
3
+ Version: 0.1.0
4
+ Summary: NVHive — Multi-LLM orchestration platform with intelligent routing, hive consensus, and auto-agent generation
5
+ Author: NVHive Contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/thatcooperguy/nvHive
8
+ Project-URL: Repository, https://github.com/thatcooperguy/nvHive
9
+ Project-URL: Issues, https://github.com/thatcooperguy/nvHive/issues
10
+ Keywords: llm,ai,nvidia,gpu,orchestration,multi-model,agents,ollama,nemotron
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Environment :: GPU :: NVIDIA CUDA
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Education
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: Operating System :: POSIX :: Linux
18
+ Classifier: Operating System :: MacOS
19
+ Classifier: Operating System :: Microsoft :: Windows
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Requires-Python: >=3.11
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: typer[all]>=0.12
28
+ Requires-Dist: litellm>=1.40
29
+ Requires-Dist: pydantic>=2.0
30
+ Requires-Dist: pydantic-settings>=2.0
31
+ Requires-Dist: sqlalchemy[asyncio]>=2.0
32
+ Requires-Dist: aiosqlite>=0.20
33
+ Requires-Dist: keyring>=25.0
34
+ Requires-Dist: rich>=13.0
35
+ Requires-Dist: httpx>=0.27
36
+ Requires-Dist: pyyaml>=6.0
37
+ Requires-Dist: tiktoken>=0.7
38
+ Requires-Dist: anyio>=4.0
39
+ Requires-Dist: fastapi>=0.115
40
+ Requires-Dist: uvicorn[standard]>=0.30
41
+ Requires-Dist: passlib[bcrypt]>=1.7
42
+ Requires-Dist: nvidia-ml-py3>=7.352.0
43
+ Provides-Extra: mcp
44
+ Requires-Dist: mcp[cli]>=1.0; extra == "mcp"
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=8.0; extra == "dev"
47
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
48
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
49
+ Requires-Dist: ruff>=0.4; extra == "dev"
50
+ Requires-Dist: mypy>=1.10; extra == "dev"
51
+ Requires-Dist: build>=1.0; extra == "dev"
52
+ Requires-Dist: twine>=5.0; extra == "dev"
53
+ Dynamic: license-file
54
+
55
+ # nvHive
56
+
57
+ **Multi-LLM orchestration platform for NVIDIA GPUs and the cloud.**
58
+
59
+ ![version](https://img.shields.io/badge/version-0.1.0-blue) ![python](https://img.shields.io/badge/python-3.11%2B-blue) ![license](https://img.shields.io/badge/license-MIT-green) ![tests](https://img.shields.io/badge/tests-156%20passing-brightgreen) ![providers](https://img.shields.io/badge/providers-22-orange) ![models](https://img.shields.io/badge/models-63-purple)
60
+
61
+ ## What is nvHive?
62
+
63
+ nvHive routes your questions to the right AI model automatically. It manages 22 providers and 63 models behind a single `nvh` command, picking the best advisor based on task type, cost, and privacy requirements. Simple questions stay local on your GPU (free, private). Complex questions route to the best cloud model. You can also convene a council of AI-generated expert personas to debate a decision, or poll every provider at once to compare answers. Twenty-five models are free with no credit card required.
64
+
65
+ ## Platform Support
66
+
67
+ | Platform | GPU Support | Install |
68
+ |----------|-------------|---------|
69
+ | Linux (NVIDIA GPU) | Full (CUDA, pynvml) | `install.sh` or pip |
70
+ | macOS (Apple Silicon) | Metal via Ollama | `install-mac.sh` or pip |
71
+ | macOS (Intel) | CPU only | `pip install nvhive` |
72
+ | Windows (NVIDIA GPU) | Full (CUDA, pynvml) | `install.ps1` or pip |
73
+ | Windows (no GPU) | CPU only | `pip install nvhive` |
74
+ | Linux Desktop | Full (auto-detected) | `install.sh` |
75
+
76
+ ## Quick Start
77
+
78
+ **Linux with GPU:**
79
+
80
+ ```bash
81
+ curl -fsSL https://raw.githubusercontent.com/thatcooperguy/nvHive/main/install.sh | bash
82
+ ```
83
+
84
+ **macOS:**
85
+
86
+ ```bash
87
+ curl -fsSL https://raw.githubusercontent.com/thatcooperguy/nvHive/main/install-mac.sh | bash
88
+ ```
89
+
90
+ **Windows (PowerShell):**
91
+
92
+ ```powershell
93
+ iwr -useb https://raw.githubusercontent.com/thatcooperguy/nvHive/main/install.ps1 | iex
94
+ ```
95
+
96
+ **Any platform (pip):**
97
+
98
+ ```bash
99
+ python3 -m pip install nvhive
100
+ nvh setup # configure your first provider
101
+ nvh "What is machine learning?"
102
+ ```
103
+
104
+ **From source:**
105
+
106
+ ```bash
107
+ git clone https://github.com/thatcooperguy/nvHive.git
108
+ cd nvHive
109
+ pip install -e ".[dev]"
110
+ nvh doctor # verify everything works
111
+ ```
112
+
113
+ ## What Happens Automatically
114
+
115
+ When you install nvHive, everything configures itself:
116
+
117
+ ```
118
+ Install runs:
119
+ 1. Detects your GPU (NVIDIA via pynvml, Apple Silicon via sysctl)
120
+ 2. Reads available VRAM / unified memory
121
+ 3. Downloads the right NVIDIA Nemotron model for your hardware:
122
+
123
+ GPU Memory Model Auto-Downloaded Size Speed
124
+ ─────────────────────────────────────────────────────────────
125
+ < 4 GB or CPU nemotron-mini (4B) ~2 GB ~30 tok/s
126
+ 4–6 GB nemotron-mini (GPU accel.) ~2 GB ~50 tok/s
127
+ 6–12 GB nemotron-small (recommended) ~5 GB ~75 tok/s
128
+ 12–24 GB nemotron-small + codellama ~9 GB ~110 tok/s
129
+ 24–48 GB nemotron 70B (quantized) ~40 GB ~40 tok/s
130
+ 48–80 GB nemotron 70B (full quality) ~40 GB ~120 tok/s
131
+ 80+ GB nemotron 120B (flagship) ~70 GB ~180 tok/s
132
+
133
+ 4. Installs Ollama (local model server) — no root needed
134
+ 5. Creates config with Ollama + LLM7 (anonymous, free) enabled
135
+ 6. Pulls model in background — you can start chatting immediately
136
+ 7. Adds 'nvh' to your PATH
137
+
138
+ First time: ~60 seconds. Reconnect (new VM): ~3 seconds.
139
+ ```
140
+
141
+ **You never pick a model.** The platform reads your hardware and downloads the best one. On Apple Silicon, it uses Metal via Ollama with unified memory. On NVIDIA, it uses CUDA. On CPU-only systems, it uses free cloud providers.
142
+
143
+ ## Your First 60 Seconds
144
+
145
+ ```
146
+ $ nvh "Explain Python decorators in 3 sentences"
147
+ ╭─ nemotron-small (local, free) ──────────────────────────────────────╮
148
+ │ A decorator is a function that takes another function as input and │
149
+ │ returns a modified version of it. You apply one with @decorator │
150
+ │ syntax above a function definition. They're used for cross-cutting │
151
+ │ concerns like logging, caching, and access control without │
152
+ │ modifying the original function's code. │
153
+ ╰─────────────────────────────────────── 0.4s · 52 tokens · $0.00 ───╯
154
+ ```
155
+
156
+ No API keys needed for your first query -- nvHive defaults to free local or anonymous providers. Run `nvh setup` to add more providers when you are ready.
157
+
158
+ ## Core Commands
159
+
160
+ ### Essentials
161
+
162
+ | Command | Description |
163
+ |---------|-------------|
164
+ | `nvh "question"` | Smart default -- routes to the best available advisor |
165
+ | `nvh ask "question"` | Ask a specific advisor (use `-a provider`) |
166
+ | `nvh convene "question"` | Convene a council of AI-generated expert agents |
167
+ | `nvh poll "question"` | Ask every configured advisor, compare answers |
168
+ | `nvh throwdown "question"` | Two-pass deep analysis across all providers |
169
+ | `nvh quick "question"` | Fastest available model, minimal latency |
170
+ | `nvh safe "question"` | Local models only -- nothing leaves your machine |
171
+ | `nvh do "task"` | Detect action intent and execute (install, open, find) |
172
+
173
+ ### Focus Modes
174
+
175
+ | Command | Description |
176
+ |---------|-------------|
177
+ | `nvh code "question"` | Code-optimized routing and prompts |
178
+ | `nvh write "question"` | Writing-optimized with style guidance |
179
+ | `nvh research "question"` | Multi-source research with citations |
180
+ | `nvh math "question"` | Math and reasoning, step-by-step |
181
+
182
+ ### Tools
183
+
184
+ | Command | Description |
185
+ |---------|-------------|
186
+ | `nvh bench` | GPU benchmark -- measure tokens/second |
187
+ | `nvh scan` | Scan and index project files |
188
+ | `nvh learn "topic"` | Interactive learning sessions |
189
+ | `nvh clip` | Clipboard integration |
190
+ | `nvh voice` | Voice input/output |
191
+ | `nvh imagine "prompt"` | Image generation |
192
+ | `nvh screenshot` | Capture and analyze screenshots |
193
+ | `nvh git` | Git-aware operations |
194
+
195
+ ### System
196
+
197
+ | Command | Description |
198
+ |---------|-------------|
199
+ | `nvh status` | Show configured providers, GPU, active model |
200
+ | `nvh savings` | Track how much you have saved with free/local models |
201
+ | `nvh debug` | Debug mode with verbose output |
202
+ | `nvh doctor` | Diagnose configuration and connectivity |
203
+ | `nvh setup` | Interactive provider setup wizard |
204
+ | `nvh keys` | Show all free API key signup links in one table |
205
+ | `nvh keys --open` | Open all free provider signup pages in browser |
206
+ | `nvh webui` | Install and launch the web UI (optional) |
207
+ | `nvh update` | Check for and install updates |
208
+ | `nvh version` | Print version |
209
+ | `nvh mcp` | Start MCP server (Claude Code, Cursor, OpenClaw) |
210
+ | `nvh openclaw` | Generate OpenClaw/NemoClaw tool config |
211
+ | `nvh nemoclaw` | NemoClaw integration setup guide |
212
+ | `nvh nemoclaw --test` | Test NemoClaw proxy connectivity |
213
+ | `nvh nemoclaw --start` | Start proxy server for NemoClaw |
214
+
215
+ ### Management
216
+
217
+ | Command | Description |
218
+ |---------|-------------|
219
+ | `nvh advisor` | Manage advisor profiles and routing weights |
220
+ | `nvh agent` | Manage auto-generated expert agents and cabinets |
221
+ | `nvh config` | View and edit configuration |
222
+ | `nvh conversation` | List, export, or resume conversations |
223
+ | `nvh budget` | Set and monitor spending limits |
224
+ | `nvh model` | List, pull, or remove models |
225
+ | `nvh template` | Manage prompt templates |
226
+ | `nvh workflow` | Run multi-step YAML pipelines |
227
+ | `nvh knowledge` | Manage knowledge base entries |
228
+ | `nvh schedule` | Schedule recurring queries |
229
+ | `nvh webhook` | Configure webhook integrations |
230
+ | `nvh auth` | Manage API keys and authentication |
231
+ | `nvh plugins` | Install and manage plugins |
232
+ | `nvh serve` | Start the OpenAI-compatible API server |
233
+ | `nvh repl` | Launch interactive REPL |
234
+ | `nvh completions` | Generate shell completions |
235
+
236
+ ### Direct Advisor Access
237
+
238
+ Skip the router and talk directly to a provider:
239
+
240
+ ```bash
241
+ nvh openai "question" # Route to OpenAI
242
+ nvh groq "question" # Route to Groq
243
+ nvh google "question" # Route to Gemini
244
+ nvh ollama "question" # Route to local Ollama
245
+ ```
246
+
247
+ Works for all 22 providers. Run `nvh <provider>` with no question to launch that provider's setup.
248
+
249
+ ## How It Works
250
+
251
+ 1. You type a question: `nvh "Should I use Redis or Postgres for sessions?"`
252
+ 2. The **action detector** checks if this is a system action (install, open, find). If so, it executes directly -- no LLM needed.
253
+ 3. If it is a question, the **router** classifies the task type, scores all configured advisors on relevance, cost, and speed, and picks the best one.
254
+ 4. **Local-first**: simple queries stay on Nemotron via Ollama (free, private, no network).
255
+ 5. **Cloud when needed**: complex or specialized queries route to the best cloud advisor.
256
+
257
+ Every response shows which advisor answered, how long it took, and what it cost.
258
+
259
+ ## Local LLM Orchestration
260
+
261
+ The local Nemotron model doesn't just answer questions — it acts as an intelligent brain that orchestrates every cloud LLM call. All orchestration runs on your GPU for free.
262
+
263
+ ### The Orchestrator's Role
264
+
265
+ When you ask a question, before any cloud API is called, the local model:
266
+
267
+ 1. **Analyzes your query** — detects task type, complexity, privacy needs, and whether web access or code execution is required.
268
+ 2. **Picks the best advisor** — goes beyond keyword matching to understand intent and route to the right cloud model.
269
+ 3. **Rewrites your prompt** — optimizes wording for the target advisor's known strengths, reducing tokens and improving answer quality.
270
+ 4. **Evaluates the response** — checks if the answer is complete and correct, and flags it for retry if not.
271
+ 5. **Synthesizes locally** — when multiple advisors respond, merges their answers on your GPU instead of paying a cloud model to do it.
272
+ 6. **Compresses conversation history** — summarizes long chats before sending context to cloud APIs, cutting token costs.
273
+
274
+ ### Tiers
275
+
276
+ Orchestration scales automatically based on your GPU's available VRAM:
277
+
278
+ | Tier | VRAM Required | Features |
279
+ |------|--------------|----------|
280
+ | `off` | Any | Keyword routing, template agents (fallback mode) |
281
+ | `light` | 6 GB+ | Smart routing + prompt optimization |
282
+ | `full` | 20 GB+ | All features: routing, agents, eval, synthesis, compression |
283
+ | `auto` | — | Detects tier from available VRAM (default) |
284
+
285
+ With `auto` (the default), nvHive reads your GPU VRAM at startup and enables the highest tier your hardware supports. If no local model is available, the engine falls back gracefully to keyword-based routing — no errors, no configuration needed.
286
+
287
+ ### Enabling and Disabling
288
+
289
+ ```bash
290
+ # Show current orchestration mode
291
+ nvh config get defaults.orchestration_mode
292
+
293
+ # Disable orchestration (keyword routing only)
294
+ nvh config set defaults.orchestration_mode off
295
+
296
+ # Enable light mode (smart routing + prompt optimization)
297
+ nvh config set defaults.orchestration_mode light
298
+
299
+ # Enable full mode (all features)
300
+ nvh config set defaults.orchestration_mode full
301
+
302
+ # Auto-detect from VRAM (default)
303
+ nvh config set defaults.orchestration_mode auto
304
+ ```
305
+
306
+ ### Cost Impact
307
+
308
+ Every orchestration call runs on your local GPU — it costs nothing. The savings come indirectly:
309
+
310
+ - **Better routing** reduces expensive cloud calls by sending more queries to cheaper or local models.
311
+ - **Prompt optimization** sends fewer tokens to cloud APIs, directly reducing per-query cost.
312
+ - **Response evaluation** catches bad answers before you need to re-ask, avoiding retry costs.
313
+ - **Local synthesis** replaces cloud synthesis calls (the most expensive part of council mode) with free local inference.
314
+
315
+ ## Supported AI Providers
316
+
317
+ | Provider | Free Tier | Best For | Models |
318
+ |----------|-----------|----------|--------|
319
+ | Ollama (Local) | Unlimited | Privacy, offline | nemotron, codellama, llama3 |
320
+ | LLM7 | 30 RPM, no signup | Anonymous, instant start | Multiple |
321
+ | Groq | 30 RPM free | Ultra-fast inference | llama3, mixtral, gemma |
322
+ | GitHub Models | 50-150 req/day | Free frontier models | GPT-4o, Llama, Mistral |
323
+ | Google Gemini | 15 RPM free | Long context, multimodal | Gemini 1.5 Pro/Flash |
324
+ | NVIDIA NIM | 1000 free credits | NVIDIA-optimized | Nemotron, Llama |
325
+ | Cerebras | 30 RPM free | Fast inference | Llama3 |
326
+ | SambaNova | Free tier | Llama models | Llama3 |
327
+ | Fireworks AI | Free tier | Fast open-source | Multiple |
328
+ | SiliconFlow | 1000 RPM free | High-throughput | Multiple |
329
+ | Hugging Face | Free API | Open-source models | Thousands |
330
+ | AI21 Labs | Free tier | Jamba models | Jamba |
331
+ | Mistral | 2 RPM free | Code | Mistral, Mixtral |
332
+ | Cohere | Trial key | RAG, embeddings | Command R+ |
333
+ | OpenAI | Paid | GPT-4o, reasoning | GPT-4o, o1, o3 |
334
+ | Anthropic | Paid | Analysis, coding | Claude 3.5/4 |
335
+ | DeepSeek | Very cheap | Code, reasoning | DeepSeek V3/R1 |
336
+ | Grok (xAI) | Paid | Real-time knowledge | Grok |
337
+ | Perplexity | Paid | Search-augmented | pplx-online |
338
+ | Together AI | Paid | Open-source models | Multiple |
339
+ | OpenRouter | Paid | Meta-router, fallback | All models |
340
+ | Mock | N/A | Unit tests | N/A |
341
+
342
+ **25 models are free** across 14 providers. Run `nvh setup` to configure any of them.
343
+
344
+ ## GPU-Adaptive Model Selection
345
+
346
+ nvHive detects your GPU and automatically selects the best local model:
347
+
348
+ | GPU | VRAM | Best Local Model | Performance |
349
+ |-----|------|-------------------|-------------|
350
+ | No GPU | -- | Cloud only | Free tiers: LLM7, Groq, GitHub Models |
351
+ | GTX 1660 / RTX 2060 | 6 GB | nemotron-mini (4B) | ~30 tok/s |
352
+ | RTX 3060 | 12 GB | nemotron-small | ~55 tok/s |
353
+ | RTX 3070 / 3080 | 8-10 GB | nemotron-small | ~75 tok/s |
354
+ | RTX 3090 | 24 GB | nemotron-small + codellama | ~100 tok/s |
355
+ | RTX 4060 | 8 GB | nemotron-small | ~70 tok/s |
356
+ | RTX 4070 | 12 GB | nemotron-small | ~90 tok/s |
357
+ | RTX 4080 | 16 GB | nemotron-small + models | ~130 tok/s |
358
+ | RTX 4090 | 24 GB | nemotron 70B (Q4) | ~40 tok/s (70B) |
359
+ | RTX 5090 | 32 GB | nemotron 70B (Q4) | ~60 tok/s (70B) |
360
+ | A100 / H100 | 80 GB | nemotron 70B (full) | ~120-180 tok/s |
361
+
362
+ Models unload after inactivity to free VRAM for gaming. Run `nvh bench` to measure your actual throughput.
363
+
364
+ ## Auto-Agent Council System
365
+
366
+ When you run `nvh convene`, nvHive analyzes your question and generates a panel of expert personas to debate it. Each agent has a defined role, expertise area, and analytical perspective.
367
+
368
+ **12 cabinets** with pre-configured expert panels:
369
+
370
+ | Cabinet | Experts |
371
+ |---------|---------|
372
+ | `executive` | CEO, CFO, CTO, Product Manager |
373
+ | `engineering` | Architect, Backend Engineer, DevOps/SRE, Security, QA |
374
+ | `security_review` | Security Engineer, DevOps/SRE, Architect, Legal/Compliance |
375
+ | `code_review` | Architect, Backend Engineer, QA, Performance Engineer |
376
+ | `product` | Product Manager, UX Designer, Engineering Manager, CEO |
377
+ | `data` | Data Engineer, DBA, ML/AI Engineer, Architect |
378
+ | `full_board` | CEO, CFO, CTO, Architect, Backend, DevOps, Security |
379
+ | `homework_help` | Patient Tutor, Devil's Advocate, Study Coach |
380
+ | `code_tutor` | Code Mentor, Bug Hunter, Best Practices Reviewer |
381
+ | `essay_review` | Writing Coach, Logic Checker, Style Editor |
382
+ | `study_group` | Socratic Questioner, ELI5 Explainer, Practice Problem Generator |
383
+ | `exam_prep` | Exam Coach, Flashcard Creator, Weak Spot Finder |
384
+
385
+ ```bash
386
+ nvh convene "Should we migrate to microservices?" --cabinet engineering
387
+ nvh convene "Review my essay on climate policy" --cabinet essay_review
388
+ ```
389
+
390
+ ## Tool System
391
+
392
+ 27 tools across six categories. 18 safe tools run automatically; 9 that modify state require confirmation.
393
+
394
+ | Category | Tools |
395
+ |----------|-------|
396
+ | **Files** | `read_file`, `write_file`, `list_files`, `search_files` |
397
+ | **Code** | `run_code`, `shell` |
398
+ | **System** | `list_processes`, `system_info`, `disk_usage`, `open_app`, `open_url` |
399
+ | **Packages** | `pip_install`, `pip_list`, `npm_install` |
400
+ | **Web** | `download`, `web_search` |
401
+ | **Clipboard** | `get_clipboard`, `set_clipboard` |
402
+ | **Notifications** | `notify` |
403
+
404
+ Enable tools per query with `--tools` or globally in the REPL with `/tools on`.
405
+
406
+ ## Privacy and Safe Mode
407
+
408
+ Three privacy tiers:
409
+
410
+ - **Safe mode** (`nvh safe`): Local models only. Nothing leaves your machine. Use for sensitive data, salary info, proprietary code.
411
+ - **Local default**: Simple queries use local Ollama. Complex queries route to cloud with your consent.
412
+ - **Cloud**: Full access to all configured providers for maximum capability.
413
+
414
+ ```bash
415
+ nvh safe "Analyze this salary spreadsheet" # stays 100% local
416
+ nvh "Explain quantum computing" # may route to cloud
417
+ ```
418
+
419
+ ## HIVE.md Context Injection
420
+
421
+ Create a `HIVE.md` file in any project directory. nvHive automatically injects it into the system prompt for every query made from that directory.
422
+
423
+ ```markdown
424
+ # HIVE.md
425
+ This is a Python 3.12 FastAPI project using SQLAlchemy and PostgreSQL.
426
+ Follow Google Python Style Guide. Prefer async/await patterns.
427
+ Test with pytest. Deploy target: Ubuntu 22.04 on GKE.
428
+ ```
429
+
430
+ Every advisor sees your project context automatically.
431
+
432
+ ## Python SDK
433
+
434
+ ```python
435
+ from nvh import ask, convene, poll, safe, quick
436
+
437
+ # Simple query
438
+ response = await ask("What is machine learning?")
439
+
440
+ # Specific advisor
441
+ response = await ask("Debug this code", advisor="anthropic")
442
+
443
+ # Council of experts
444
+ result = await convene("Should we use Rust?", cabinet="engineering")
445
+
446
+ # Poll all advisors
447
+ results = await poll("Write a sort function")
448
+
449
+ # Local only
450
+ response = await safe("Analyze my salary data")
451
+ ```
452
+
453
+ Synchronous versions available: `ask_sync`, `convene_sync`.
454
+
455
+ ## OpenAI-Compatible Proxy
456
+
457
+ Run nvHive as a drop-in backend for any tool that speaks the OpenAI API:
458
+
459
+ ```bash
460
+ nvh serve --port 8000
461
+ ```
462
+
463
+ Then point any OpenAI SDK client at `http://localhost:8000`:
464
+
465
+ ```python
466
+ from openai import OpenAI
467
+ client = OpenAI(base_url="http://localhost:8000/v1", api_key="nvhive")
468
+ response = client.chat.completions.create(
469
+ model="auto", # nvHive picks the best model
470
+ messages=[{"role": "user", "content": "Hello"}]
471
+ )
472
+ ```
473
+
474
+ ## MCP Server (Claude Code, Cursor, OpenClaw)
475
+
476
+ nvHive exposes its tools via the [Model Context Protocol](https://modelcontextprotocol.io/), making them available to Claude Code, Cursor, OpenClaw, and any MCP-compatible client.
477
+
478
+ ```bash
479
+ # Install MCP support
480
+ pip install "nvhive[mcp]"
481
+
482
+ # Register with Claude Code
483
+ claude mcp add nvhive nvh mcp
484
+
485
+ # Or start as HTTP server for remote clients
486
+ nvh mcp -t streamable-http --port 8080
487
+ ```
488
+
489
+ Tools available via MCP: `ask`, `ask_safe`, `council`, `throwdown`, `status`, `list_advisors`, `list_cabinets`.
490
+
491
+ For OpenClaw agents, generate the config:
492
+
493
+ ```bash
494
+ nvh openclaw # creates openclaw.json with nvHive MCP config
495
+ nvh openclaw --agent # generates NemoClaw agent config
496
+ ```
497
+
498
+ ## NemoClaw Integration
499
+
500
+ nvHive works as an inference provider inside [NVIDIA NemoClaw](https://github.com/NVIDIA/NemoClaw), giving NemoClaw agents access to multi-model smart routing, council consensus, and throwdown analysis.
501
+
502
+ ```bash
503
+ # Setup in three commands:
504
+ nvh nemoclaw --start # 1. Start nvHive proxy
505
+ openshell provider create \ # 2. Register with NemoClaw
506
+ --name nvhive --type openai \
507
+ --credential OPENAI_API_KEY=nvhive \
508
+ --config OPENAI_BASE_URL=http://host.openshell.internal:8000/v1/proxy
509
+ openshell inference set \ # 3. Set as default
510
+ --provider nvhive --model auto
511
+ ```
512
+
513
+ NemoClaw agents can request any virtual model:
514
+
515
+ | Model | What It Does |
516
+ |-------|-------------|
517
+ | `auto` | Smart routing — best provider for the query |
518
+ | `safe` | Local only — nothing leaves your machine |
519
+ | `council` | 3-model consensus with synthesis |
520
+ | `council:N` | N-model council (2-10 members) |
521
+ | `throwdown` | Two-pass deep analysis with critique |
522
+
523
+ Privacy-aware routing: set `x-nvhive-privacy: local-only` header to force all inference through local Ollama, integrating with NemoClaw's content sensitivity routing.
524
+
525
+ ```
526
+ NemoClaw Sandbox → OpenShell Gateway → nvHive Proxy → 22 providers
527
+
528
+ Smart Router / Council / Throwdown
529
+ ```
530
+
531
+ Run `nvh nemoclaw` for the full setup guide, or `nvh nemoclaw --test` to verify connectivity.
532
+
533
+ ## Configuration
534
+
535
+ Configuration lives at `~/.config/nvhive/config.yaml`. Manage it with:
536
+
537
+ ```bash
538
+ nvh config # view current config
539
+ nvh config set default_advisor groq
540
+ nvh config set safe_mode true
541
+ nvh budget set --daily 1.00 # daily spending cap
542
+ ```
543
+
544
+ ## Workflows
545
+
546
+ Define multi-step pipelines in YAML:
547
+
548
+ ```yaml
549
+ name: Code Review Pipeline
550
+ steps:
551
+ - name: security_scan
552
+ action: ask
553
+ prompt: "Analyze for security vulnerabilities:\n\n{{input}}"
554
+ advisor: anthropic
555
+ save_as: security
556
+
557
+ - name: quality_review
558
+ action: ask
559
+ prompt: "Review for quality and best practices:\n\n{{input}}"
560
+ advisor: openai
561
+ save_as: quality
562
+
563
+ - name: synthesis
564
+ action: convene
565
+ prompt: "Synthesize findings:\n\nSecurity: {{security}}\nQuality: {{quality}}"
566
+ cabinet: code_review
567
+ save_as: summary
568
+ ```
569
+
570
+ ```bash
571
+ nvh workflow run code_review.yaml --input "$(cat main.py)"
572
+ ```
573
+
574
+ ## For Students
575
+
576
+ nvHive was built with students in mind. Five dedicated cabinets teach rather than just answer:
577
+
578
+ - **homework_help** -- Patient Tutor, Devil's Advocate, and Study Coach guide you to understanding
579
+ - **code_tutor** -- Code Mentor, Bug Hunter, and Best Practices Reviewer teach programming
580
+ - **essay_review** -- Writing Coach, Logic Checker, and Style Editor improve your writing
581
+ - **study_group** -- Socratic Questioner, ELI5 Explainer, and Practice Problem Generator
582
+ - **exam_prep** -- Exam Coach, Flashcard Creator, and Weak Spot Finder
583
+
584
+ All work with free models. Track your savings with `nvh savings`.
585
+
586
+ ```bash
587
+ nvh convene "Explain recursion step by step" --cabinet code_tutor
588
+ nvh convene "Help me prepare for my calculus final" --cabinet exam_prep
589
+ ```
590
+
591
+ ## For Linux Desktop
592
+
593
+ nvHive is designed for deployment on Linux Desktop instances:
594
+
595
+ - Auto-detects cloud sessions and adapts to the available GPU tier
596
+ - All tools operate at user level -- no root, no sudo
597
+ - Session-aware: handles ephemeral environments with mounted home directories
598
+ - Auto-healing: reconnects to Ollama if the instance restarts
599
+ - GPU VRAM management: models unload after inactivity so games can reclaim VRAM
600
+
601
+ ## Architecture
602
+
603
+ ```
604
+ nvh CLI
605
+ |
606
+ +-- Action Detector -----> Direct execution (install, open, find)
607
+ |
608
+ +-- Router
609
+ | |-- Task classifier (code, writing, research, math, general)
610
+ | |-- Advisor scorer (relevance, cost, speed, privacy)
611
+ | +-- Model selector (GPU VRAM, provider availability)
612
+ |
613
+ +-- Providers (22)
614
+ | |-- Local: Ollama (Nemotron, CodeLlama, Llama3)
615
+ | |-- Cloud: OpenAI, Anthropic, Google, Groq, ...
616
+ | +-- Free: LLM7, GitHub Models, NVIDIA NIM, ...
617
+ |
618
+ +-- Agent System
619
+ | |-- Auto-generation from query analysis
620
+ | +-- 12 pre-built cabinets (22 expert personas)
621
+ |
622
+ +-- Tool System (27 tools, 18 safe / 9 confirm)
623
+ |
624
+ +-- SDK + OpenAI-compatible API server
625
+ ```
626
+
627
+ ## Project Stats
628
+
629
+ | Metric | Value |
630
+ |--------|-------|
631
+ | Python files | 81 |
632
+ | Lines of code | 27,518 |
633
+ | Functions | 810 |
634
+ | Tests | 181 |
635
+ | Providers | 22 |
636
+ | Models | 63 (25 free) |
637
+ | Tools | 27 (18 safe, 9 confirm) |
638
+ | Cabinets | 12 |
639
+ | Expert personas | 22 |
640
+ | Wheel size | 276 KB |
641
+ | Commits | 42 |
642
+
643
+ ## Documentation
644
+
645
+ | Document | Description |
646
+ |----------|-------------|
647
+ | [Getting Started](docs/GETTING_STARTED.md) | First-time setup and usage guide |
648
+ | [Hardware Requirements](docs/HARDWARE.md) | GPU tiers, VRAM mapping, performance |
649
+ | [Testing Guide](docs/TESTING_GUIDE.md) | Running and writing tests |
650
+ | [EULA](EULA.md) | End User License Agreement |
651
+ | [Privacy Policy](PRIVACY.md) | Data handling and privacy |
652
+ | [Changelog](CHANGELOG.md) | Version history |
653
+
654
+ ## Contributing
655
+
656
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, coding standards, and pull request guidelines.
657
+
658
+ ## License
659
+
660
+ MIT License. See [LICENSE](LICENSE) for details.