loclaude 0.0.1-alpha.2 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -2
- package/README.md +201 -29
- package/docker/docker-compose.yml +124 -37
- package/libs/cli/CHANGELOG.md +59 -0
- package/libs/cli/dist/cac.d.ts.map +1 -1
- package/libs/cli/dist/commands/config.d.ts.map +1 -1
- package/libs/cli/dist/commands/docker.d.ts.map +1 -1
- package/libs/cli/dist/commands/doctor.d.ts +4 -0
- package/libs/cli/dist/commands/doctor.d.ts.map +1 -1
- package/libs/cli/dist/commands/init.d.ts +2 -0
- package/libs/cli/dist/commands/init.d.ts.map +1 -1
- package/libs/cli/dist/commands/models.d.ts.map +1 -1
- package/libs/cli/dist/index.bun.js +1174 -549
- package/libs/cli/dist/index.bun.js.map +12 -10
- package/libs/cli/dist/index.js +1174 -549
- package/libs/cli/dist/index.js.map +12 -10
- package/libs/cli/dist/output.d.ts +107 -0
- package/libs/cli/dist/output.d.ts.map +1 -0
- package/libs/cli/dist/types.d.ts +40 -0
- package/libs/cli/dist/types.d.ts.map +1 -1
- package/libs/cli/dist/utils.d.ts +19 -1
- package/libs/cli/dist/utils.d.ts.map +1 -1
- package/libs/cli/package.json +8 -6
- package/package.json +20 -9
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.0.1-alpha.3] - 2025-01-21
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Adds support for CPU Only Ollama Hosts
|
|
15
|
+
- Adds `auto-start` model enablement to `x` command
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
|
|
19
|
+
- Bumps `@loclaude-internal/cli` dependency reference from `v0.0.1-alpha.1` to pinned version `v0.0.1-alpha.2`
|
|
20
|
+
- Modifies documentation on output files from `init` command
|
|
21
|
+
|
|
10
22
|
## [0.0.1-alpha.2] - 2025-01-20
|
|
11
23
|
|
|
12
24
|
### Changed
|
|
@@ -51,5 +63,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
51
63
|
|
|
52
64
|
This is an alpha release. The API and command structure may change before 1.0.
|
|
53
65
|
|
|
54
|
-
[Unreleased]: https://github.com/nicholasgalante1997/
|
|
55
|
-
[0.0.1-alpha.1]: https://github.com/nicholasgalante1997/
|
|
66
|
+
[Unreleased]: https://github.com/nicholasgalante1997/loclaude/compare/v0.0.1-rc.1...HEAD
|
|
67
|
+
[0.0.1-alpha.1]: https://github.com/nicholasgalante1997/loclaude/releases/tag/v0.0.1-alpha.1
|
package/README.md
CHANGED
|
@@ -1,12 +1,42 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
1
3
|
# loclaude
|
|
2
4
|
|
|
3
|
-
|
|
5
|
+
**Claude Code with Local LLMs**
|
|
6
|
+
|
|
7
|
+
Stop burning through Claude API usage limits. Run Claude Code's powerful agentic workflow with local Ollama models on your own hardware.
|
|
8
|
+
|
|
9
|
+
> **Requires ollama v0.14.2 or higher**
|
|
10
|
+
|
|
11
|
+
**Zero API costs. No rate limits. Complete privacy.**
|
|
12
|
+
|
|
13
|
+
[](https://www.npmjs.com/package/loclaude)
|
|
14
|
+
[](https://opensource.org/licenses/MIT)
|
|
15
|
+
|
|
16
|
+
[Quick Start](#quick-start-5-minutes) • [Why loclaude?](#why-loclaude) • [Installation](#installation) • [FAQ](#faq)
|
|
17
|
+
|
|
18
|
+
</div>
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Why loclaude?
|
|
23
|
+
|
|
24
|
+
### Real Value
|
|
4
25
|
|
|
5
|
-
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
26
|
+
- **No Rate Limits**: Use Claude Code as much as you want
|
|
27
|
+
- **Privacy**: Your code never leaves your machine
|
|
28
|
+
- **Cost Control**: Use your own hardware, pay for electricity not tokens
|
|
29
|
+
- **Offline Capable**: Work without internet (after model download)
|
|
30
|
+
- **GPU or CPU**: Works with NVIDIA GPUs or CPU-only systems
|
|
31
|
+
|
|
32
|
+
### What to Expect
|
|
33
|
+
|
|
34
|
+
loclaude provides:
|
|
35
|
+
|
|
36
|
+
- One-command setup for Ollama + Open WebUI containers
|
|
37
|
+
- Smart model management with auto-loading
|
|
38
|
+
- GPU auto-detection with CPU fallback
|
|
39
|
+
- Project scaffolding with Docker configs
|
|
10
40
|
|
|
11
41
|
## Installation
|
|
12
42
|
|
|
@@ -14,38 +44,84 @@ loclaude provides a CLI to:
|
|
|
14
44
|
# With npm (requires Node.js 18+)
|
|
15
45
|
npm install -g loclaude
|
|
16
46
|
|
|
17
|
-
# With bun (recommended)
|
|
18
|
-
bun install -g loclaude
|
|
47
|
+
# With bun (faster, recommended)
|
|
48
|
+
bun install -g loclaude # use bun-loclaude for commands
|
|
19
49
|
```
|
|
20
50
|
|
|
21
|
-
|
|
51
|
+
### vs. Other Solutions
|
|
22
52
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
53
|
+
| Solution | Cost | Speed | Privacy | Limits |
|
|
54
|
+
|----------|------|-------|---------|--------|
|
|
55
|
+
| **loclaude** | Free after setup | Fast (GPU) | 100% local | None |
|
|
56
|
+
| Claude API/Web | $20-200+/month | Fast | Cloud-based | Rate limited |
|
|
57
|
+
| GitHub Copilot | $10-20/month | Fast | Cloud-based | Context limited |
|
|
58
|
+
| Cursor/Codeium | $20+/month | Fast | Cloud-based | Usage limits |
|
|
26
59
|
|
|
27
|
-
|
|
60
|
+
loclaude gives you the utility of Ollama with the convenience of a managed solution for claude code integration.
|
|
61
|
+
|
|
62
|
+
## Quick Start (5 Minutes)
|
|
28
63
|
|
|
29
64
|
```bash
|
|
30
|
-
loclaude
|
|
31
|
-
|
|
65
|
+
# 1. Install loclaude
|
|
66
|
+
npm install -g loclaude
|
|
32
67
|
|
|
33
|
-
|
|
68
|
+
# 2. Install Claude Code (if you haven't already)
|
|
69
|
+
npm install -g @anthropic-ai/claude-code
|
|
34
70
|
|
|
35
|
-
|
|
36
|
-
# Initialize a new project with Docker configs
|
|
71
|
+
# 3. Setup your project (auto-detects GPU)
|
|
37
72
|
loclaude init
|
|
38
73
|
|
|
39
|
-
# Start Ollama
|
|
74
|
+
# 4. Start Ollama container
|
|
40
75
|
loclaude docker-up
|
|
41
76
|
|
|
42
|
-
# Pull a model
|
|
43
|
-
loclaude models-pull qwen3-coder:30b
|
|
77
|
+
# 5. Pull a model (choose based on your hardware)
|
|
78
|
+
loclaude models-pull qwen3-coder:30b # GPU with 16GB+ VRAM
|
|
79
|
+
# OR
|
|
80
|
+
loclaude models-pull qwen2.5-coder:7b # CPU or limited VRAM
|
|
44
81
|
|
|
45
|
-
# Run Claude Code with local LLM
|
|
82
|
+
# 6. Run Claude Code with unlimited local LLM
|
|
46
83
|
loclaude run
|
|
47
84
|
```
|
|
48
85
|
|
|
86
|
+
That's it! You now have unlimited Claude Code sessions with local models.
|
|
87
|
+
|
|
88
|
+
## Prerequisites
|
|
89
|
+
|
|
90
|
+
**Required:**
|
|
91
|
+
|
|
92
|
+
- [Docker](https://docs.docker.com/get-docker/) with Docker Compose v2
|
|
93
|
+
- [Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code) (`npm install -g @anthropic-ai/claude-code`)
|
|
94
|
+
|
|
95
|
+
**Optional (for GPU acceleration):**
|
|
96
|
+
|
|
97
|
+
- NVIDIA GPU with 16GB+ VRAM (RTX 3090, 4090, A5000, etc.)
|
|
98
|
+
- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
|
|
99
|
+
|
|
100
|
+
**CPU-only systems work fine!** Use `--no-gpu` flag during init and smaller models.
|
|
101
|
+
|
|
102
|
+
**Check your setup:**
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
loclaude doctor
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Features
|
|
109
|
+
|
|
110
|
+
### Automatic Model Loading
|
|
111
|
+
|
|
112
|
+
When you run `loclaude run`, it automatically:
|
|
113
|
+
|
|
114
|
+
1. Checks if your selected model is loaded in Ollama
|
|
115
|
+
2. If not loaded, warms up the model with a 10-minute keep-alive (Configurable through env vars)
|
|
116
|
+
3. Shows `[loaded]` indicator in model selection for running models
|
|
117
|
+
|
|
118
|
+
### GPU Auto-Detection
|
|
119
|
+
|
|
120
|
+
`loclaude init` automatically detects NVIDIA GPUs and configures the appropriate Docker setup:
|
|
121
|
+
|
|
122
|
+
- **GPU detected**: Uses `runtime: nvidia` and CUDA-enabled images
|
|
123
|
+
- **No GPU**: Uses CPU-only configuration with smaller default models
|
|
124
|
+
|
|
49
125
|
## Commands
|
|
50
126
|
|
|
51
127
|
### Running Claude Code
|
|
@@ -59,7 +135,9 @@ loclaude run -- --help # Pass args to claude
|
|
|
59
135
|
### Project Setup
|
|
60
136
|
|
|
61
137
|
```bash
|
|
62
|
-
loclaude init #
|
|
138
|
+
loclaude init # Auto-detect GPU, scaffold project
|
|
139
|
+
loclaude init --gpu # Force GPU mode
|
|
140
|
+
loclaude init --no-gpu # Force CPU-only mode
|
|
63
141
|
loclaude init --force # Overwrite existing files
|
|
64
142
|
loclaude init --no-webui # Skip Open WebUI in compose file
|
|
65
143
|
```
|
|
@@ -94,6 +172,25 @@ loclaude config # Show current configuration
|
|
|
94
172
|
loclaude config-paths # Show config file search paths
|
|
95
173
|
```
|
|
96
174
|
|
|
175
|
+
## Recommended Models
|
|
176
|
+
|
|
177
|
+
### For GPU (16GB+ VRAM) - Best Experience
|
|
178
|
+
|
|
179
|
+
| Model | Size | Speed | Quality | Best For |
|
|
180
|
+
|-------|------|-------|---------|----------|
|
|
181
|
+
| `qwen3-coder:30b` | ~17 GB | ~50-100 tok/s | Excellent | **Most coding tasks, refactoring, debugging** |
|
|
182
|
+
| `deepseek-coder:33b` | ~18 GB | ~40-80 tok/s | Excellent | Code understanding, complex logic |
|
|
183
|
+
|
|
184
|
+
**Recommendation:** Start with `qwen3-coder:30b` for the best balance of speed and quality.
|
|
185
|
+
|
|
186
|
+
### For CPU or Limited VRAM (<16GB) - Still Productive
|
|
187
|
+
|
|
188
|
+
| Model | Size | Speed | Quality | Best For |
|
|
189
|
+
|-------|------|-------|---------|----------|
|
|
190
|
+
| `qwen2.5-coder:7b` | ~4 GB | ~10-20 tok/s | Good | **Code completion, simple refactoring** |
|
|
191
|
+
| `deepseek-coder:6.7b` | ~4 GB | ~10-20 tok/s | Good | Understanding existing code |
|
|
192
|
+
| `llama3.2:3b` | ~2 GB | ~15-30 tok/s | Fair | Quick edits, file operations |
|
|
193
|
+
|
|
97
194
|
## Configuration
|
|
98
195
|
|
|
99
196
|
loclaude supports configuration via files and environment variables.
|
|
@@ -148,8 +245,8 @@ When containers are running:
|
|
|
148
245
|
|
|
149
246
|
| Service | URL | Description |
|
|
150
247
|
|---------|-----|-------------|
|
|
151
|
-
| Ollama API | http://localhost:11434 | LLM inference API |
|
|
152
|
-
| Open WebUI | http://localhost:3000 | Chat interface |
|
|
248
|
+
| Ollama API | <http://localhost:11434> | LLM inference API |
|
|
249
|
+
| Open WebUI | <http://localhost:3000> | Chat interface |
|
|
153
250
|
|
|
154
251
|
## Project Structure
|
|
155
252
|
|
|
@@ -162,7 +259,7 @@ After running `loclaude init`:
|
|
|
162
259
|
├── .loclaude/
|
|
163
260
|
│ └── config.json # Loclaude configuration
|
|
164
261
|
├── models/ # Ollama model storage (gitignored)
|
|
165
|
-
├── docker-compose.yml # Container definitions
|
|
262
|
+
├── docker-compose.yml # Container definitions (GPU or CPU mode)
|
|
166
263
|
├── mise.toml # Task runner configuration
|
|
167
264
|
└── README.md
|
|
168
265
|
```
|
|
@@ -179,6 +276,30 @@ mise run pull <model> # loclaude models-pull <model>
|
|
|
179
276
|
mise run doctor # loclaude doctor
|
|
180
277
|
```
|
|
181
278
|
|
|
279
|
+
## FAQ
|
|
280
|
+
|
|
281
|
+
### Is this really unlimited?
|
|
282
|
+
|
|
283
|
+
Yes! Once you have models downloaded, you can run as many sessions as you want with zero additional cost.
|
|
284
|
+
|
|
285
|
+
### How does the quality compare to Claude API?
|
|
286
|
+
|
|
287
|
+
30B parameter models (qwen3-coder:30b) are comparable to GPT-3.5 and work okay for most coding tasks. Larger models have a bit more success. Claude API is still better, but this allows for continuing work when you have hit that pesky usage limit.
|
|
288
|
+
|
|
289
|
+
### Do I need a GPU?
|
|
290
|
+
|
|
291
|
+
No, but highly recommended. CPU-only mode works with smaller models at ~10-20 tokens/sec. A GPU (16GB+ VRAM) gives you 50-100 tokens/sec with larger, better models.
|
|
292
|
+
|
|
293
|
+
### What's the catch?
|
|
294
|
+
|
|
295
|
+
- Initial setup takes 5-10 minutes
|
|
296
|
+
- Model downloads are large (4-20GB)
|
|
297
|
+
- GPU hardware investment if you don't have one (~$500-1500 used)
|
|
298
|
+
|
|
299
|
+
### Can I use this with the Claude API too?
|
|
300
|
+
|
|
301
|
+
Absolutely! Keep using Claude API for critical tasks, use loclaude for everything else to save money and avoid limits.
|
|
302
|
+
|
|
182
303
|
## Troubleshooting
|
|
183
304
|
|
|
184
305
|
### Check System Requirements
|
|
@@ -188,9 +309,10 @@ loclaude doctor
|
|
|
188
309
|
```
|
|
189
310
|
|
|
190
311
|
This verifies:
|
|
312
|
+
|
|
191
313
|
- Docker and Docker Compose installation
|
|
192
|
-
- NVIDIA GPU detection
|
|
193
|
-
- NVIDIA Container Toolkit
|
|
314
|
+
- NVIDIA GPU detection (optional)
|
|
315
|
+
- NVIDIA Container Toolkit (optional)
|
|
194
316
|
- Claude Code CLI
|
|
195
317
|
- Ollama API connectivity
|
|
196
318
|
|
|
@@ -215,12 +337,62 @@ If Claude Code can't connect to Ollama:
|
|
|
215
337
|
2. Check the API: `curl http://localhost:11434/api/tags`
|
|
216
338
|
3. Verify your config: `loclaude config`
|
|
217
339
|
|
|
340
|
+
### GPU Not Detected
|
|
341
|
+
|
|
342
|
+
If you have a GPU but it's not detected:
|
|
343
|
+
|
|
344
|
+
1. Check NVIDIA drivers: `nvidia-smi`
|
|
345
|
+
2. Test Docker GPU access: `docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi`
|
|
346
|
+
3. Install NVIDIA Container Toolkit if missing
|
|
347
|
+
4. Re-run `loclaude init --gpu` to force GPU mode
|
|
348
|
+
|
|
349
|
+
### Running on CPU
|
|
350
|
+
|
|
351
|
+
If inference is slow on CPU:
|
|
352
|
+
|
|
353
|
+
1. Use smaller, quantized models: `qwen2.5-coder:7b`, `llama3.2:3b`
|
|
354
|
+
2. Expect ~10-20 tokens/sec on modern CPUs
|
|
355
|
+
3. Consider cloud models via Ollama: `glm-4.7:cloud`
|
|
356
|
+
|
|
357
|
+
## Contributing
|
|
358
|
+
|
|
359
|
+
loclaude is open source and welcomes contributions! Here's how you can help:
|
|
360
|
+
|
|
361
|
+
### Share Your Experience
|
|
362
|
+
|
|
363
|
+
- Star the repo if loclaude saves you money or rate limits
|
|
364
|
+
- Share your setup and model recommendations
|
|
365
|
+
- Write about your experience on dev.to, Twitter, or your blog
|
|
366
|
+
- Report bugs and request features via GitHub Issues
|
|
367
|
+
|
|
368
|
+
### Code Contributions
|
|
369
|
+
|
|
370
|
+
- Fix bugs or add features (see open issues)
|
|
371
|
+
- Improve documentation or examples
|
|
372
|
+
- Add support for new model providers
|
|
373
|
+
- Optimize model loading and performance
|
|
374
|
+
|
|
375
|
+
### Spread the Word
|
|
376
|
+
|
|
377
|
+
- Post on r/LocalLLaMA, r/selfhosted, r/ClaudeAI
|
|
378
|
+
- Share in Discord/Slack dev communities
|
|
379
|
+
- Help others troubleshoot in GitHub Discussions
|
|
380
|
+
|
|
381
|
+
Every star, issue report, and shared experience helps more developers discover unlimited local Claude Code.
|
|
382
|
+
|
|
383
|
+
## Getting Help
|
|
384
|
+
|
|
385
|
+
- **Issues/Bugs**: [GitHub Issues](https://github.com/nicholasgalante1997/loclaude/issues)
|
|
386
|
+
- **Questions**: [GitHub Discussions](https://github.com/nicholasgalante1997/loclaude/discussions)
|
|
387
|
+
- **Documentation**: Run `loclaude --help` or check this README
|
|
388
|
+
- **System Check**: Run `loclaude doctor` to diagnose problems
|
|
389
|
+
|
|
218
390
|
## Development
|
|
219
391
|
|
|
220
392
|
### Building from Source
|
|
221
393
|
|
|
222
394
|
```bash
|
|
223
|
-
git clone https://github.com/nicholasgalante1997/
|
|
395
|
+
git clone https://github.com/nicholasgalante1997/loclaude.git loclaude
|
|
224
396
|
cd loclaude
|
|
225
397
|
bun install
|
|
226
398
|
bun run build
|
|
@@ -1,78 +1,159 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# LOCLAUDE DOCKER COMPOSE - GPU MODE
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# This configuration runs Ollama with NVIDIA GPU acceleration for fast inference.
|
|
5
|
+
# Bundled with loclaude package for use as a fallback when no local compose exists.
|
|
6
|
+
#
|
|
7
|
+
# Prerequisites:
|
|
8
|
+
# - NVIDIA GPU with CUDA support
|
|
9
|
+
# - NVIDIA drivers installed on host
|
|
10
|
+
# - NVIDIA Container Toolkit: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit
|
|
11
|
+
#
|
|
12
|
+
# Quick test for GPU support:
|
|
13
|
+
# docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
|
|
14
|
+
#
|
|
15
|
+
# =============================================================================
|
|
16
|
+
|
|
1
17
|
services:
|
|
18
|
+
# ===========================================================================
|
|
19
|
+
# OLLAMA - Local LLM Inference Server
|
|
20
|
+
# ===========================================================================
|
|
21
|
+
# Ollama provides the AI backend that Claude Code connects to.
|
|
22
|
+
# It runs large language models locally on your hardware.
|
|
23
|
+
#
|
|
24
|
+
# API Documentation: https://github.com/ollama/ollama/blob/main/docs/api.md
|
|
25
|
+
# Model Library: https://ollama.com/library
|
|
26
|
+
# ===========================================================================
|
|
2
27
|
ollama:
|
|
28
|
+
# Official Ollama image - 'latest' ensures newest features and model support
|
|
3
29
|
image: ollama/ollama:latest
|
|
30
|
+
|
|
31
|
+
# Fixed container name for easy CLI access:
|
|
32
|
+
# docker exec ollama ollama list
|
|
33
|
+
# docker logs ollama
|
|
4
34
|
container_name: ollama
|
|
5
|
-
|
|
6
|
-
#
|
|
35
|
+
|
|
36
|
+
# NVIDIA Container Runtime - Required for GPU access
|
|
37
|
+
# This makes CUDA libraries available inside the container
|
|
7
38
|
runtime: nvidia
|
|
39
|
+
|
|
8
40
|
environment:
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
#
|
|
12
|
-
#
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# GPU Configuration
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# NVIDIA_VISIBLE_DEVICES: Which GPUs to expose to the container
|
|
45
|
+
# - 'all': Use all available GPUs (recommended for most setups)
|
|
46
|
+
# - '0': Use only GPU 0
|
|
47
|
+
# - '0,1': Use GPUs 0 and 1
|
|
48
|
+
- NVIDIA_VISIBLE_DEVICES=all
|
|
49
|
+
|
|
50
|
+
# NVIDIA_DRIVER_CAPABILITIES: What GPU features to enable
|
|
51
|
+
# - 'compute': CUDA compute (required for inference)
|
|
52
|
+
# - 'utility': nvidia-smi and other tools
|
|
53
|
+
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
|
54
|
+
|
|
55
|
+
# ---------------------------------------------------------------------------
|
|
56
|
+
# Ollama Configuration (Optional)
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Uncomment these to customize Ollama behavior:
|
|
59
|
+
|
|
60
|
+
# Maximum number of models loaded in memory simultaneously
|
|
61
|
+
# Lower this if you're running out of VRAM
|
|
13
62
|
# - OLLAMA_MAX_LOADED_MODELS=1
|
|
63
|
+
|
|
64
|
+
# Maximum parallel inference requests per model
|
|
65
|
+
# Higher values use more VRAM but handle more concurrent requests
|
|
14
66
|
# - OLLAMA_NUM_PARALLEL=1
|
|
15
|
-
|
|
16
|
-
#
|
|
67
|
+
|
|
68
|
+
# Enable debug logging for troubleshooting
|
|
17
69
|
# - OLLAMA_DEBUG=1
|
|
18
|
-
|
|
19
|
-
# Volume mounts: maps host directories/files into the container
|
|
70
|
+
|
|
20
71
|
volumes:
|
|
21
|
-
#
|
|
22
|
-
#
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Model Storage
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Maps ./models on your host to /root/.ollama in the container
|
|
76
|
+
# This persists downloaded models across container restarts
|
|
77
|
+
#
|
|
78
|
+
# Disk space requirements (approximate):
|
|
79
|
+
# - 7B model: ~4GB
|
|
80
|
+
# - 13B model: ~8GB
|
|
81
|
+
# - 30B model: ~16GB
|
|
82
|
+
# - 70B model: ~40GB
|
|
23
83
|
- ./models:/root/.ollama
|
|
24
|
-
|
|
25
|
-
# Keep container time in sync with host (good practice)
|
|
26
|
-
# - /etc/localtime:/etc/localtime:ro
|
|
27
|
-
|
|
28
|
-
# OPTIONAL: Mount a custom config directory
|
|
29
|
-
# Uncomment if you want to customize Ollama settings
|
|
30
|
-
# - ./config:/root/.ollama/config
|
|
31
84
|
|
|
32
85
|
ports:
|
|
86
|
+
# Ollama API port - access at http://localhost:11434
|
|
87
|
+
# Used by Claude Code and other Ollama clients
|
|
33
88
|
- "11434:11434"
|
|
89
|
+
|
|
90
|
+
# Restart policy - keeps Ollama running unless manually stopped
|
|
34
91
|
restart: unless-stopped
|
|
92
|
+
|
|
35
93
|
healthcheck:
|
|
94
|
+
# Verify Ollama is responsive by listing models
|
|
36
95
|
test: ["CMD", "ollama", "list"]
|
|
37
|
-
interval: 300s
|
|
38
|
-
timeout: 2s
|
|
39
|
-
retries: 3
|
|
40
|
-
start_period: 40s
|
|
41
|
-
|
|
42
|
-
# OPTIONAL: Resource limits and reservations
|
|
43
|
-
# Uncomment to constrain CPU and memory usage
|
|
96
|
+
interval: 300s # Check every 5 minutes
|
|
97
|
+
timeout: 2s # Fail if no response in 2 seconds
|
|
98
|
+
retries: 3 # Mark unhealthy after 3 consecutive failures
|
|
99
|
+
start_period: 40s # Grace period for initial model loading
|
|
100
|
+
|
|
44
101
|
deploy:
|
|
45
102
|
resources:
|
|
46
|
-
# limits:
|
|
47
|
-
# cpus: '4' # Limit to 4 CPU cores
|
|
48
|
-
# memory: 32G # Limit to 32GB RAM
|
|
49
103
|
reservations:
|
|
50
|
-
# cpus: '2' # Reserve at least 2 CPU cores
|
|
51
|
-
# memory: 16G # Reserve at least 16GB RAM
|
|
52
104
|
devices:
|
|
105
|
+
# Request GPU access from Docker
|
|
53
106
|
- driver: nvidia
|
|
54
|
-
count: all
|
|
55
|
-
capabilities: [gpu]
|
|
107
|
+
count: all # Use all available GPUs
|
|
108
|
+
capabilities: [gpu] # Request GPU compute capability
|
|
109
|
+
|
|
110
|
+
# ===========================================================================
|
|
111
|
+
# OPEN WEBUI - Chat Interface (Optional)
|
|
112
|
+
# ===========================================================================
|
|
113
|
+
# Open WebUI provides a ChatGPT-like interface for your local models.
|
|
114
|
+
# Access at http://localhost:3000 after starting containers.
|
|
115
|
+
#
|
|
116
|
+
# Features:
|
|
117
|
+
# - Multi-model chat interface
|
|
118
|
+
# - Conversation history
|
|
119
|
+
# - Model management UI
|
|
120
|
+
# - RAG/document upload support
|
|
121
|
+
#
|
|
122
|
+
# Documentation: https://docs.openwebui.com/
|
|
123
|
+
# ===========================================================================
|
|
56
124
|
open-webui:
|
|
57
|
-
|
|
125
|
+
# CUDA-enabled image for GPU-accelerated features (embeddings, etc.)
|
|
126
|
+
# Change to :main if you don't need GPU features in the UI
|
|
127
|
+
image: ghcr.io/open-webui/open-webui:cuda
|
|
128
|
+
|
|
58
129
|
container_name: open-webui
|
|
130
|
+
|
|
59
131
|
ports:
|
|
132
|
+
# Web UI port - access at http://localhost:3000
|
|
60
133
|
- "3000:8080"
|
|
134
|
+
|
|
61
135
|
environment:
|
|
62
|
-
#
|
|
63
|
-
#
|
|
136
|
+
# Tell Open WebUI where to find Ollama
|
|
137
|
+
# Uses Docker internal networking (service name as hostname)
|
|
64
138
|
- OLLAMA_BASE_URL=http://ollama:11434
|
|
139
|
+
|
|
140
|
+
# Wait for Ollama to be ready before starting
|
|
65
141
|
depends_on:
|
|
66
|
-
- ollama
|
|
142
|
+
- ollama
|
|
143
|
+
|
|
67
144
|
restart: unless-stopped
|
|
145
|
+
|
|
68
146
|
healthcheck:
|
|
69
147
|
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
|
70
148
|
interval: 30s
|
|
71
149
|
timeout: 10s
|
|
72
150
|
retries: 3
|
|
73
151
|
start_period: 60s
|
|
152
|
+
|
|
74
153
|
volumes:
|
|
154
|
+
# Persistent storage for conversations, settings, and user data
|
|
75
155
|
- open-webui:/app/backend/data
|
|
156
|
+
|
|
76
157
|
deploy:
|
|
77
158
|
resources:
|
|
78
159
|
reservations:
|
|
@@ -81,5 +162,11 @@ services:
|
|
|
81
162
|
count: all
|
|
82
163
|
capabilities: [gpu]
|
|
83
164
|
|
|
165
|
+
# =============================================================================
|
|
166
|
+
# VOLUMES
|
|
167
|
+
# =============================================================================
|
|
168
|
+
# Named volumes for persistent data that survives container recreation
|
|
84
169
|
volumes:
|
|
85
170
|
open-webui:
|
|
171
|
+
# Open WebUI data: conversations, user settings, uploads
|
|
172
|
+
# Located at /var/lib/docker/volumes/open-webui/_data on host
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.0.1-alpha.2] - 2025-01-20
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Adds support for CPU Only Ollama Hosts
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
|
|
18
|
+
- Modifies documentation on output files from `init` command
|
|
19
|
+
|
|
20
|
+
## [0.0.1-alpha.1] - 2025-01-19
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
|
|
24
|
+
- **CLI Commands**
|
|
25
|
+
- `loclaude run` - Run Claude Code with local Ollama (interactive model selection)
|
|
26
|
+
- `loclaude init` - Scaffold docker-compose.yml, config, and mise.toml
|
|
27
|
+
- `loclaude doctor` - Check system prerequisites (Docker, GPU, Claude CLI)
|
|
28
|
+
- `loclaude config` / `loclaude config-paths` - View configuration
|
|
29
|
+
- `loclaude docker-up/down/status/logs/restart` - Docker container management
|
|
30
|
+
- `loclaude models` - List installed Ollama models
|
|
31
|
+
- `loclaude models-pull/rm/show/run` - Model management commands
|
|
32
|
+
|
|
33
|
+
- **Configuration System**
|
|
34
|
+
- Project-local config: `./.loclaude/config.json`
|
|
35
|
+
- User global config: `~/.config/loclaude/config.json`
|
|
36
|
+
- Environment variable support (`OLLAMA_URL`, `OLLAMA_MODEL`, etc.)
|
|
37
|
+
- Layered config merging with clear priority
|
|
38
|
+
|
|
39
|
+
- **Cross-Runtime Support**
|
|
40
|
+
- Works with both Bun and Node.js runtimes
|
|
41
|
+
- Dual entry points: `bin/index.ts` (Bun) and `bin/index.mjs` (Node)
|
|
42
|
+
|
|
43
|
+
- **Docker Integration**
|
|
44
|
+
- Bundled docker-compose.yml template with Ollama + Open WebUI
|
|
45
|
+
- NVIDIA GPU support out of the box
|
|
46
|
+
- Health checks for both services
|
|
47
|
+
|
|
48
|
+
- **Project Scaffolding**
|
|
49
|
+
- `loclaude init` creates complete project structure
|
|
50
|
+
- Generates mise.toml with task aliases
|
|
51
|
+
- Creates .claude/CLAUDE.md for Claude Code instructions
|
|
52
|
+
- Sets up .gitignore for model directory
|
|
53
|
+
|
|
54
|
+
### Notes
|
|
55
|
+
|
|
56
|
+
This is an alpha release. The API and command structure may change before 1.0.
|
|
57
|
+
|
|
58
|
+
[Unreleased]: https://github.com/nicholasgalante1997/loclaude/compare/v0.0.1-rc.1...HEAD
|
|
59
|
+
[0.0.1-alpha.1]: https://github.com/nicholasgalante1997/loclaude/releases/tag/v0.0.1-alpha.1
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cac.d.ts","sourceRoot":"","sources":["../lib/cac.ts"],"names":[],"mappings":"AAqBA,QAAA,MAAM,GAAG,mBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"cac.d.ts","sourceRoot":"","sources":["../lib/cac.ts"],"names":[],"mappings":"AAqBA,QAAA,MAAM,GAAG,mBAAkB,CAAC;AAsI5B,eAAO,MAAM,IAAI,YAAyB,CAAC;AAC3C,eAAO,MAAM,OAAO,YAA4B,CAAC;AAEjD,eAAO,MAAM,OAAO,QAAO,IAE1B,CAAC;AAEF,OAAO,EAAE,GAAG,EAAE,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../lib/commands/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,wBAAsB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../lib/commands/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,wBAAsB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAmChD;AAED,wBAAsB,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC,CA6BjD"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docker.d.ts","sourceRoot":"","sources":["../../lib/commands/docker.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"docker.d.ts","sourceRoot":"","sources":["../../lib/commands/docker.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiEH,MAAM,WAAW,aAAa;IAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAeD,wBAAsB,QAAQ,CAAC,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,IAAI,CAAC,CAoBzE;AAED,wBAAsB,UAAU,CAAC,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,IAAI,CAAC,CAW3E;AAED,wBAAsB,YAAY,CAAC,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,IAAI,CAAC,CAK7E;AAED,wBAAsB,UAAU,CAC9B,OAAO,GAAE,aAAa,GAAG;IAAE,MAAM,CAAC,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAO,GACnE,OAAO,CAAC,IAAI,CAAC,CAiBf;AAED,wBAAsB,aAAa,CAAC,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,IAAI,CAAC,CAW9E;AAED,wBAAsB,UAAU,CAC9B,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,GAAE,aAAkB,GAC1B,OAAO,CAAC,MAAM,CAAC,CAWjB"}
|
|
@@ -2,4 +2,8 @@
|
|
|
2
2
|
* doctor command - Check prerequisites and system health
|
|
3
3
|
*/
|
|
4
4
|
export declare function doctor(): Promise<void>;
|
|
5
|
+
/**
|
|
6
|
+
* Check if NVIDIA GPU is available (exported for use by init command)
|
|
7
|
+
*/
|
|
8
|
+
export declare function hasNvidiaGpu(): Promise<boolean>;
|
|
5
9
|
//# sourceMappingURL=doctor.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"doctor.d.ts","sourceRoot":"","sources":["../../lib/commands/doctor.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"doctor.d.ts","sourceRoot":"","sources":["../../lib/commands/doctor.ts"],"names":[],"mappings":"AAAA;;GAEG;AAmRH,wBAAsB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CA+B5C;AAED;;GAEG;AACH,wBAAsB,YAAY,IAAI,OAAO,CAAC,OAAO,CAAC,CAMrD"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../lib/commands/init.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../lib/commands/init.ts"],"names":[],"mappings":"AAAA;;GAEG;AA4nBH,MAAM,WAAW,WAAW;IAC1B,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,wBAAsB,IAAI,CAAC,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,CAsInE"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../lib/commands/models.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"models.d.ts","sourceRoot":"","sources":["../../lib/commands/models.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiFH,wBAAsB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC,CAyChD;AAED,wBAAsB,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAkBjE;AAED,wBAAsB,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAiB/D;AAED,wBAAsB,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAWjE;AAED,wBAAsB,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAWhE"}
|