llmpm 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llmpm-1.0.0/PKG-INFO +383 -0
- llmpm-1.0.0/README.md +320 -0
- llmpm-1.0.0/llmpm/__init__.py +4 -0
- llmpm-1.0.0/llmpm/__main__.py +6 -0
- llmpm-1.0.0/llmpm/cli.py +143 -0
- llmpm-1.0.0/llmpm/commands/__init__.py +1 -0
- llmpm-1.0.0/llmpm/commands/install.py +280 -0
- llmpm-1.0.0/llmpm/commands/list_cmd.py +133 -0
- llmpm-1.0.0/llmpm/commands/push.py +161 -0
- llmpm-1.0.0/llmpm/commands/run_cmd.py +477 -0
- llmpm-1.0.0/llmpm/commands/serve_cmd.py +159 -0
- llmpm-1.0.0/llmpm/core/__init__.py +1 -0
- llmpm-1.0.0/llmpm/core/chat_ui/dist/assets/index-DA7mKlGQ.css +1 -0
- llmpm-1.0.0/llmpm/core/chat_ui/dist/assets/index-tyQ9Z-hx.js +95 -0
- llmpm-1.0.0/llmpm/core/chat_ui/dist/index.html +24 -0
- llmpm-1.0.0/llmpm/core/downloader.py +237 -0
- llmpm-1.0.0/llmpm/core/model_detector.py +120 -0
- llmpm-1.0.0/llmpm/core/registry.py +114 -0
- llmpm-1.0.0/llmpm/core/runner.py +321 -0
- llmpm-1.0.0/llmpm/core/serve_audio.py +148 -0
- llmpm-1.0.0/llmpm/core/serve_diffusion.py +167 -0
- llmpm-1.0.0/llmpm/core/serve_gguf.py +77 -0
- llmpm-1.0.0/llmpm/core/serve_transformers.py +150 -0
- llmpm-1.0.0/llmpm/core/serve_vision.py +107 -0
- llmpm-1.0.0/llmpm/core/server/__init__.py +88 -0
- llmpm-1.0.0/llmpm/core/server/_audio.py +74 -0
- llmpm-1.0.0/llmpm/core/server/_chat.py +158 -0
- llmpm-1.0.0/llmpm/core/server/_context.py +39 -0
- llmpm-1.0.0/llmpm/core/server/_handler.py +154 -0
- llmpm-1.0.0/llmpm/core/server/_images.py +66 -0
- llmpm-1.0.0/llmpm/core/server/_types.py +23 -0
- llmpm-1.0.0/llmpm/display.py +419 -0
- llmpm-1.0.0/llmpm.egg-info/PKG-INFO +383 -0
- llmpm-1.0.0/llmpm.egg-info/SOURCES.txt +38 -0
- llmpm-1.0.0/llmpm.egg-info/dependency_links.txt +1 -0
- llmpm-1.0.0/llmpm.egg-info/entry_points.txt +2 -0
- llmpm-1.0.0/llmpm.egg-info/requires.txt +47 -0
- llmpm-1.0.0/llmpm.egg-info/top_level.txt +1 -0
- llmpm-1.0.0/pyproject.toml +97 -0
- llmpm-1.0.0/setup.cfg +4 -0
llmpm-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: llmpm
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: LLM Package Manager — download, run, and share AI models from the command line
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/llmpm-dev/llmpm
|
|
7
|
+
Project-URL: Repository, https://github.com/llmpm-dev/llmpm
|
|
8
|
+
Project-URL: Issues, https://github.com/llmpm-dev/llmpm/issues
|
|
9
|
+
Keywords: llm,ai,models,package-manager,huggingface,llama
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: click>=8.1
|
|
23
|
+
Requires-Dist: rich>=13.0
|
|
24
|
+
Requires-Dist: huggingface_hub>=0.20
|
|
25
|
+
Requires-Dist: requests>=2.28
|
|
26
|
+
Requires-Dist: tqdm>=4.65
|
|
27
|
+
Requires-Dist: questionary>=2.0
|
|
28
|
+
Requires-Dist: humanize>=4.0
|
|
29
|
+
Requires-Dist: llama-cpp-python>=0.2.0
|
|
30
|
+
Requires-Dist: diffusers>=0.27
|
|
31
|
+
Requires-Dist: transformers>=4.35
|
|
32
|
+
Requires-Dist: accelerate>=0.24
|
|
33
|
+
Requires-Dist: torch>=2.0
|
|
34
|
+
Provides-Extra: gguf
|
|
35
|
+
Requires-Dist: llama-cpp-python>=0.2.0; extra == "gguf"
|
|
36
|
+
Provides-Extra: transformers
|
|
37
|
+
Requires-Dist: transformers>=4.35; extra == "transformers"
|
|
38
|
+
Requires-Dist: torch>=2.0; extra == "transformers"
|
|
39
|
+
Requires-Dist: accelerate>=0.24; extra == "transformers"
|
|
40
|
+
Provides-Extra: diffusion
|
|
41
|
+
Requires-Dist: diffusers>=0.27; extra == "diffusion"
|
|
42
|
+
Requires-Dist: transformers>=4.35; extra == "diffusion"
|
|
43
|
+
Requires-Dist: torch>=2.0; extra == "diffusion"
|
|
44
|
+
Requires-Dist: accelerate>=0.24; extra == "diffusion"
|
|
45
|
+
Provides-Extra: vision
|
|
46
|
+
Requires-Dist: transformers>=4.35; extra == "vision"
|
|
47
|
+
Requires-Dist: torch>=2.0; extra == "vision"
|
|
48
|
+
Requires-Dist: accelerate>=0.24; extra == "vision"
|
|
49
|
+
Requires-Dist: Pillow>=10.0; extra == "vision"
|
|
50
|
+
Provides-Extra: audio
|
|
51
|
+
Requires-Dist: transformers>=4.35; extra == "audio"
|
|
52
|
+
Requires-Dist: torch>=2.0; extra == "audio"
|
|
53
|
+
Requires-Dist: accelerate>=0.24; extra == "audio"
|
|
54
|
+
Requires-Dist: numpy>=1.24; extra == "audio"
|
|
55
|
+
Provides-Extra: all
|
|
56
|
+
Requires-Dist: llama-cpp-python>=0.2.0; extra == "all"
|
|
57
|
+
Requires-Dist: transformers>=4.35; extra == "all"
|
|
58
|
+
Requires-Dist: torch>=2.0; extra == "all"
|
|
59
|
+
Requires-Dist: accelerate>=0.24; extra == "all"
|
|
60
|
+
Requires-Dist: diffusers>=0.27; extra == "all"
|
|
61
|
+
Requires-Dist: Pillow>=10.0; extra == "all"
|
|
62
|
+
Requires-Dist: numpy>=1.24; extra == "all"
|
|
63
|
+
|
|
64
|
+
# llmpm — LLM Package Manager
|
|
65
|
+
|
|
66
|
+
> Download, run, and share AI models from the command line.
|
|
67
|
+
|
|
68
|
+
`llmpm` is a package manager for AI models, inspired by npm.
|
|
69
|
+
Models are sourced from [HuggingFace Hub](https://huggingface.co).
|
|
70
|
+
Supports text generation (GGUF via **llama.cpp** and Transformer checkpoints), image generation (Diffusion), vision, speech-to-text (ASR), and text-to-speech (TTS).
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Installation
|
|
75
|
+
|
|
76
|
+
### via pip (recommended)
|
|
77
|
+
|
|
78
|
+
```sh
|
|
79
|
+
pip install llmpm
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### via npm
|
|
83
|
+
|
|
84
|
+
```sh
|
|
85
|
+
npm install -g llmpm
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
> The npm package automatically installs the Python backend via pip.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Quick start
|
|
93
|
+
|
|
94
|
+
```sh
|
|
95
|
+
# Install a GGUF model (interactive quantisation picker)
|
|
96
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
97
|
+
|
|
98
|
+
# Run it
|
|
99
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Commands
|
|
105
|
+
|
|
106
|
+
| Command | Description |
|
|
107
|
+
| ---------------------- | --------------------------------------------- |
|
|
108
|
+
| `llmpm install <repo>` | Download and install a model from HuggingFace |
|
|
109
|
+
| `llmpm run <repo>` | Run an installed model (interactive chat) |
|
|
110
|
+
| `llmpm serve <repo>` | Serve a model as an OpenAI-compatible HTTP API |
|
|
111
|
+
| `llmpm push <repo>` | Upload a model to HuggingFace Hub |
|
|
112
|
+
| `llmpm list` | Show all installed models |
|
|
113
|
+
| `llmpm info <repo>` | Show details about a model |
|
|
114
|
+
| `llmpm uninstall <repo>` | Uninstall a model |
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## `llmpm install`
|
|
119
|
+
|
|
120
|
+
```sh
|
|
121
|
+
# Interactively choose a GGUF quantisation
|
|
122
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
123
|
+
|
|
124
|
+
# Install a specific quantisation
|
|
125
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --quant Q4_K_M
|
|
126
|
+
|
|
127
|
+
# Install a full Transformer model
|
|
128
|
+
llmpm install meta-llama/Llama-3.2-1B-Instruct
|
|
129
|
+
|
|
130
|
+
# Install a single specific file
|
|
131
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --file Llama-3.2-1B-Instruct-Q4_K_M.gguf
|
|
132
|
+
|
|
133
|
+
# Skip prompts (pick best default)
|
|
134
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --no-interactive
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Models are stored in `~/.llmpm/models/`.
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## `llmpm run`
|
|
142
|
+
|
|
143
|
+
`llmpm run` auto-detects the model type and launches the appropriate interactive session. It supports text generation, image generation, vision, speech-to-text (ASR), and text-to-speech (TTS) models.
|
|
144
|
+
|
|
145
|
+
### Text generation (GGUF & Transformers)
|
|
146
|
+
|
|
147
|
+
```sh
|
|
148
|
+
# Interactive chat
|
|
149
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
150
|
+
|
|
151
|
+
# Single-turn inference
|
|
152
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --prompt "Explain quantum computing"
|
|
153
|
+
|
|
154
|
+
# With a system prompt
|
|
155
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --system "You are a helpful pirate."
|
|
156
|
+
|
|
157
|
+
# Limit response length
|
|
158
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --max-tokens 512
|
|
159
|
+
|
|
160
|
+
# GGUF: tune context window and GPU layers
|
|
161
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --ctx 8192 --gpu-layers 32
|
|
162
|
+
|
|
163
|
+
# Transformer model
|
|
164
|
+
llmpm run HuggingFaceTB/SmolLM2-1.7B-Instruct
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Image generation (Diffusion)
|
|
168
|
+
|
|
169
|
+
Generates an image from a text prompt and saves it as a PNG on your Desktop.
|
|
170
|
+
|
|
171
|
+
```sh
|
|
172
|
+
# Single prompt → saves llmpm_<timestamp>.png to ~/Desktop
|
|
173
|
+
llmpm run amused/amused-256 --prompt "a cyberpunk city at sunset"
|
|
174
|
+
|
|
175
|
+
# Interactive session (type a prompt, get an image each time)
|
|
176
|
+
llmpm run amused/amused-256
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
In interactive mode type your prompt and press Enter. The output path is printed after each generation. Type `/exit` to quit.
|
|
180
|
+
|
|
181
|
+
> Requires: `pip install diffusers torch accelerate`
|
|
182
|
+
|
|
183
|
+
### Vision (image-to-text)
|
|
184
|
+
|
|
185
|
+
Describe or answer questions about an image. Pass the image file path via `--prompt`.
|
|
186
|
+
|
|
187
|
+
```sh
|
|
188
|
+
# Single image description
|
|
189
|
+
llmpm run Salesforce/blip-image-captioning-base --prompt /path/to/photo.jpg
|
|
190
|
+
|
|
191
|
+
# Interactive session: type an image path at each prompt
|
|
192
|
+
llmpm run Salesforce/blip-image-captioning-base
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
> Requires: `pip install transformers torch Pillow`
|
|
196
|
+
|
|
197
|
+
### Speech-to-text / ASR
|
|
198
|
+
|
|
199
|
+
Transcribe an audio file. Pass the audio file path via `--prompt`.
|
|
200
|
+
|
|
201
|
+
```sh
|
|
202
|
+
# Transcribe a single file
|
|
203
|
+
llmpm run openai/whisper-base --prompt recording.wav
|
|
204
|
+
|
|
205
|
+
# Interactive: enter an audio file path at each prompt
|
|
206
|
+
llmpm run openai/whisper-base
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
Supported formats depend on your installed audio libraries (wav, flac, mp3, …).
|
|
210
|
+
|
|
211
|
+
> Requires: `pip install transformers torch`
|
|
212
|
+
|
|
213
|
+
### Text-to-speech / TTS
|
|
214
|
+
|
|
215
|
+
Convert text to speech. The output WAV file is saved to your Desktop.
|
|
216
|
+
|
|
217
|
+
```sh
|
|
218
|
+
# Single utterance → saves llmpm_<timestamp>.wav to ~/Desktop
|
|
219
|
+
llmpm run suno/bark-small --prompt "Hello, how are you today?"
|
|
220
|
+
|
|
221
|
+
# Interactive session
|
|
222
|
+
llmpm run suno/bark-small
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
> Requires: `pip install transformers torch`
|
|
226
|
+
|
|
227
|
+
### `llmpm run` options
|
|
228
|
+
|
|
229
|
+
| Option | Default | Description |
|
|
230
|
+
| --- | --- | --- |
|
|
231
|
+
| `--prompt` / `-p` | — | Single-turn prompt or input file path (non-interactive) |
|
|
232
|
+
| `--system` / `-s` | — | System prompt (text generation only) |
|
|
233
|
+
| `--max-tokens` | `128000` | Maximum tokens to generate per response |
|
|
234
|
+
| `--ctx` | `128000` | Context window size (GGUF only) |
|
|
235
|
+
| `--gpu-layers` | `-1` | GPU layers to offload, `-1` = all (GGUF only) |
|
|
236
|
+
| `--verbose` | off | Show model loading output |
|
|
237
|
+
|
|
238
|
+
### Interactive session commands
|
|
239
|
+
|
|
240
|
+
These commands work in any interactive session:
|
|
241
|
+
|
|
242
|
+
| Command | Action |
|
|
243
|
+
| ---------------- | --------------------------------------------- |
|
|
244
|
+
| `/exit` | End the session |
|
|
245
|
+
| `/clear` | Clear conversation history (text gen only) |
|
|
246
|
+
| `/system <text>` | Update the system prompt (text gen only) |
|
|
247
|
+
|
|
248
|
+
### Model type detection
|
|
249
|
+
|
|
250
|
+
`llmpm run` reads `config.json` / `model_index.json` from the installed model to determine the pipeline type before loading any weights. The detected type is printed at startup:
|
|
251
|
+
|
|
252
|
+
```
|
|
253
|
+
Detected: Image Generation (Diffusion)
|
|
254
|
+
Loading model… ✓
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
If detection is ambiguous the model falls back to the text-generation backend.
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## `llmpm serve`
|
|
262
|
+
|
|
263
|
+
Start a local HTTP server exposing the model as an OpenAI-compatible REST API.
|
|
264
|
+
A browser-based chat UI is also available at `/chat`.
|
|
265
|
+
|
|
266
|
+
```sh
|
|
267
|
+
# Serve on the default port (8080)
|
|
268
|
+
llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
269
|
+
|
|
270
|
+
# Custom port and host
|
|
271
|
+
llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --port 9000 --host 0.0.0.0
|
|
272
|
+
|
|
273
|
+
# Set the default max tokens (clients may override per-request)
|
|
274
|
+
llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --max-tokens 2048
|
|
275
|
+
|
|
276
|
+
# GGUF options
|
|
277
|
+
llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --ctx 8192 --gpu-layers 32
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### `llmpm serve` options
|
|
281
|
+
|
|
282
|
+
| Option | Default | Description |
|
|
283
|
+
| --- | --- | --- |
|
|
284
|
+
| `--port` / `-p` | `8080` | Port to listen on (auto-increments if busy) |
|
|
285
|
+
| `--host` / `-H` | `localhost` | Host/address to bind to |
|
|
286
|
+
| `--max-tokens` | `128000` | Default max tokens per response (overridable per-request) |
|
|
287
|
+
| `--ctx` | `128000` | Context window size (GGUF only) |
|
|
288
|
+
| `--gpu-layers` | `-1` | GPU layers to offload, `-1` = all (GGUF only) |
|
|
289
|
+
|
|
290
|
+
### Endpoints
|
|
291
|
+
|
|
292
|
+
| Method | Path | Models | Description |
|
|
293
|
+
| --- | --- | --- | --- |
|
|
294
|
+
| `GET` | `/chat` | all | Browser chat / image-gen UI |
|
|
295
|
+
| `GET` | `/health` | all | `{"status":"ok","model":"<id>"}` |
|
|
296
|
+
| `POST` | `/v1/chat/completions` | text-gen | OpenAI-compatible chat inference (SSE streaming supported) |
|
|
297
|
+
| `POST` | `/v1/images/generations` | diffusion | Text-to-image; pass `image` (base64) for image-to-image |
|
|
298
|
+
| `POST` | `/v1/audio/transcriptions` | ASR | Speech-to-text |
|
|
299
|
+
| `POST` | `/v1/audio/speech` | TTS | Text-to-speech |
|
|
300
|
+
|
|
301
|
+
### Example API calls
|
|
302
|
+
|
|
303
|
+
```sh
|
|
304
|
+
# Text generation
|
|
305
|
+
curl -X POST http://localhost:8080/v1/chat/completions \
|
|
306
|
+
-H "Content-Type: application/json" \
|
|
307
|
+
-d '{"messages": [{"role": "user", "content": "Hello!"}], "max_tokens": 256}'
|
|
308
|
+
|
|
309
|
+
# Text-to-image
|
|
310
|
+
curl -X POST http://localhost:8080/v1/images/generations \
|
|
311
|
+
-H "Content-Type: application/json" \
|
|
312
|
+
-d '{"prompt": "a cat in a forest", "n": 1}'
|
|
313
|
+
|
|
314
|
+
# Image-to-image (include the source image as base64 in the same endpoint)
|
|
315
|
+
IMAGE_B64=$(base64 -i input.png)
|
|
316
|
+
curl -X POST http://localhost:8080/v1/images/generations \
|
|
317
|
+
-H "Content-Type: application/json" \
|
|
318
|
+
-d "{\"prompt\": \"turn it into a painting\", \"image\": \"$IMAGE_B64\"}"
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
Response shape (both text-to-image and image-to-image):
|
|
322
|
+
|
|
323
|
+
```json
|
|
324
|
+
{
|
|
325
|
+
"created": 1234567890,
|
|
326
|
+
"data": [{ "b64_json": "<base64-png>" }]
|
|
327
|
+
}
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
---
|
|
331
|
+
|
|
332
|
+
## `llmpm push`
|
|
333
|
+
|
|
334
|
+
```sh
|
|
335
|
+
# Push an already-installed model
|
|
336
|
+
llmpm push my-org/my-fine-tune
|
|
337
|
+
|
|
338
|
+
# Push a local directory
|
|
339
|
+
llmpm push my-org/my-fine-tune --path ./my-model-dir
|
|
340
|
+
|
|
341
|
+
# Push as private repository
|
|
342
|
+
llmpm push my-org/my-fine-tune --private
|
|
343
|
+
|
|
344
|
+
# Custom commit message
|
|
345
|
+
llmpm push my-org/my-fine-tune -m "Add Q4_K_M quantisation"
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
Requires a HuggingFace token (run `huggingface-cli login` or set `HF_TOKEN`).
|
|
349
|
+
|
|
350
|
+
---
|
|
351
|
+
|
|
352
|
+
## Backends
|
|
353
|
+
|
|
354
|
+
| Model type | Pipeline | Backend | Extra install |
|
|
355
|
+
| ----------------------- | ----------------- | ------------------------------ | ------------------------------------------------ |
|
|
356
|
+
| `.gguf` files | Text generation | llama.cpp via llama-cpp-python | `pip install llmpm[gguf]` |
|
|
357
|
+
| `.safetensors` / `.bin` | Text generation | HuggingFace Transformers | `pip install llmpm[transformers]` |
|
|
358
|
+
| Diffusion models | Image generation | HuggingFace Diffusers | `pip install llmpm[diffusion]` |
|
|
359
|
+
| Vision models | Image-to-text | HuggingFace Transformers | `pip install llmpm[vision]` |
|
|
360
|
+
| Whisper / ASR models | Speech-to-text | HuggingFace Transformers | `pip install llmpm[audio]` |
|
|
361
|
+
| TTS models | Text-to-speech | HuggingFace Transformers | `pip install llmpm[audio]` |
|
|
362
|
+
|
|
363
|
+
Install all backends at once:
|
|
364
|
+
|
|
365
|
+
```sh
|
|
366
|
+
pip install llmpm[all]
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
---
|
|
370
|
+
|
|
371
|
+
## Configuration
|
|
372
|
+
|
|
373
|
+
| Variable | Default | Description |
|
|
374
|
+
| ------------- | ---------- | -------------------------------------- |
|
|
375
|
+
| `LLMPM_HOME` | `~/.llmpm` | Root directory for models and registry |
|
|
376
|
+
| `HF_TOKEN` | — | HuggingFace API token for gated models |
|
|
377
|
+
| `LLPM_PYTHON` | `python3` | Python binary used by the npm shim |
|
|
378
|
+
|
|
379
|
+
---
|
|
380
|
+
|
|
381
|
+
## License
|
|
382
|
+
|
|
383
|
+
MIT
|
llmpm-1.0.0/README.md
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# llmpm — LLM Package Manager
|
|
2
|
+
|
|
3
|
+
> Download, run, and share AI models from the command line.
|
|
4
|
+
|
|
5
|
+
`llmpm` is a package manager for AI models, inspired by npm.
|
|
6
|
+
Models are sourced from [HuggingFace Hub](https://huggingface.co).
|
|
7
|
+
Supports text generation (GGUF via **llama.cpp** and Transformer checkpoints), image generation (Diffusion), vision, speech-to-text (ASR), and text-to-speech (TTS).
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
### via pip (recommended)
|
|
14
|
+
|
|
15
|
+
```sh
|
|
16
|
+
pip install llmpm
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
### via npm
|
|
20
|
+
|
|
21
|
+
```sh
|
|
22
|
+
npm install -g llmpm
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
> The npm package automatically installs the Python backend via pip.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Quick start
|
|
30
|
+
|
|
31
|
+
```sh
|
|
32
|
+
# Install a GGUF model (interactive quantisation picker)
|
|
33
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
34
|
+
|
|
35
|
+
# Run it
|
|
36
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Commands
|
|
42
|
+
|
|
43
|
+
| Command | Description |
|
|
44
|
+
| ---------------------- | --------------------------------------------- |
|
|
45
|
+
| `llmpm install <repo>` | Download and install a model from HuggingFace |
|
|
46
|
+
| `llmpm run <repo>` | Run an installed model (interactive chat) |
|
|
47
|
+
| `llmpm serve <repo>` | Serve a model as an OpenAI-compatible HTTP API |
|
|
48
|
+
| `llmpm push <repo>` | Upload a model to HuggingFace Hub |
|
|
49
|
+
| `llmpm list` | Show all installed models |
|
|
50
|
+
| `llmpm info <repo>` | Show details about a model |
|
|
51
|
+
| `llmpm uninstall <repo>` | Uninstall a model |
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## `llmpm install`
|
|
56
|
+
|
|
57
|
+
```sh
|
|
58
|
+
# Interactively choose a GGUF quantisation
|
|
59
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
60
|
+
|
|
61
|
+
# Install a specific quantisation
|
|
62
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --quant Q4_K_M
|
|
63
|
+
|
|
64
|
+
# Install a full Transformer model
|
|
65
|
+
llmpm install meta-llama/Llama-3.2-1B-Instruct
|
|
66
|
+
|
|
67
|
+
# Install a single specific file
|
|
68
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --file Llama-3.2-1B-Instruct-Q4_K_M.gguf
|
|
69
|
+
|
|
70
|
+
# Skip prompts (pick best default)
|
|
71
|
+
llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --no-interactive
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Models are stored in `~/.llmpm/models/`.
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## `llmpm run`
|
|
79
|
+
|
|
80
|
+
`llmpm run` auto-detects the model type and launches the appropriate interactive session. It supports text generation, image generation, vision, speech-to-text (ASR), and text-to-speech (TTS) models.
|
|
81
|
+
|
|
82
|
+
### Text generation (GGUF & Transformers)
|
|
83
|
+
|
|
84
|
+
```sh
|
|
85
|
+
# Interactive chat
|
|
86
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
87
|
+
|
|
88
|
+
# Single-turn inference
|
|
89
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --prompt "Explain quantum computing"
|
|
90
|
+
|
|
91
|
+
# With a system prompt
|
|
92
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --system "You are a helpful pirate."
|
|
93
|
+
|
|
94
|
+
# Limit response length
|
|
95
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --max-tokens 512
|
|
96
|
+
|
|
97
|
+
# GGUF: tune context window and GPU layers
|
|
98
|
+
llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --ctx 8192 --gpu-layers 32
|
|
99
|
+
|
|
100
|
+
# Transformer model
|
|
101
|
+
llmpm run HuggingFaceTB/SmolLM2-1.7B-Instruct
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Image generation (Diffusion)
|
|
105
|
+
|
|
106
|
+
Generates an image from a text prompt and saves it as a PNG on your Desktop.
|
|
107
|
+
|
|
108
|
+
```sh
|
|
109
|
+
# Single prompt → saves llmpm_<timestamp>.png to ~/Desktop
|
|
110
|
+
llmpm run amused/amused-256 --prompt "a cyberpunk city at sunset"
|
|
111
|
+
|
|
112
|
+
# Interactive session (type a prompt, get an image each time)
|
|
113
|
+
llmpm run amused/amused-256
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
In interactive mode type your prompt and press Enter. The output path is printed after each generation. Type `/exit` to quit.
|
|
117
|
+
|
|
118
|
+
> Requires: `pip install diffusers torch accelerate`
|
|
119
|
+
|
|
120
|
+
### Vision (image-to-text)
|
|
121
|
+
|
|
122
|
+
Describe or answer questions about an image. Pass the image file path via `--prompt`.
|
|
123
|
+
|
|
124
|
+
```sh
|
|
125
|
+
# Single image description
|
|
126
|
+
llmpm run Salesforce/blip-image-captioning-base --prompt /path/to/photo.jpg
|
|
127
|
+
|
|
128
|
+
# Interactive session: type an image path at each prompt
|
|
129
|
+
llmpm run Salesforce/blip-image-captioning-base
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
> Requires: `pip install transformers torch Pillow`
|
|
133
|
+
|
|
134
|
+
### Speech-to-text / ASR
|
|
135
|
+
|
|
136
|
+
Transcribe an audio file. Pass the audio file path via `--prompt`.
|
|
137
|
+
|
|
138
|
+
```sh
|
|
139
|
+
# Transcribe a single file
|
|
140
|
+
llmpm run openai/whisper-base --prompt recording.wav
|
|
141
|
+
|
|
142
|
+
# Interactive: enter an audio file path at each prompt
|
|
143
|
+
llmpm run openai/whisper-base
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Supported formats depend on your installed audio libraries (wav, flac, mp3, …).
|
|
147
|
+
|
|
148
|
+
> Requires: `pip install transformers torch`
|
|
149
|
+
|
|
150
|
+
### Text-to-speech / TTS
|
|
151
|
+
|
|
152
|
+
Convert text to speech. The output WAV file is saved to your Desktop.
|
|
153
|
+
|
|
154
|
+
```sh
|
|
155
|
+
# Single utterance → saves llmpm_<timestamp>.wav to ~/Desktop
|
|
156
|
+
llmpm run suno/bark-small --prompt "Hello, how are you today?"
|
|
157
|
+
|
|
158
|
+
# Interactive session
|
|
159
|
+
llmpm run suno/bark-small
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
> Requires: `pip install transformers torch`
|
|
163
|
+
|
|
164
|
+
### `llmpm run` options
|
|
165
|
+
|
|
166
|
+
| Option | Default | Description |
|
|
167
|
+
| --- | --- | --- |
|
|
168
|
+
| `--prompt` / `-p` | — | Single-turn prompt or input file path (non-interactive) |
|
|
169
|
+
| `--system` / `-s` | — | System prompt (text generation only) |
|
|
170
|
+
| `--max-tokens` | `128000` | Maximum tokens to generate per response |
|
|
171
|
+
| `--ctx` | `128000` | Context window size (GGUF only) |
|
|
172
|
+
| `--gpu-layers` | `-1` | GPU layers to offload, `-1` = all (GGUF only) |
|
|
173
|
+
| `--verbose` | off | Show model loading output |
|
|
174
|
+
|
|
175
|
+
### Interactive session commands
|
|
176
|
+
|
|
177
|
+
These commands work in any interactive session:
|
|
178
|
+
|
|
179
|
+
| Command | Action |
|
|
180
|
+
| ---------------- | --------------------------------------------- |
|
|
181
|
+
| `/exit` | End the session |
|
|
182
|
+
| `/clear` | Clear conversation history (text gen only) |
|
|
183
|
+
| `/system <text>` | Update the system prompt (text gen only) |
|
|
184
|
+
|
|
185
|
+
### Model type detection
|
|
186
|
+
|
|
187
|
+
`llmpm run` reads `config.json` / `model_index.json` from the installed model to determine the pipeline type before loading any weights. The detected type is printed at startup:
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
Detected: Image Generation (Diffusion)
|
|
191
|
+
Loading model… ✓
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
If detection is ambiguous the model falls back to the text-generation backend.
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## `llmpm serve`
|
|
199
|
+
|
|
200
|
+
Start a local HTTP server exposing the model as an OpenAI-compatible REST API.
|
|
201
|
+
A browser-based chat UI is also available at `/chat`.
|
|
202
|
+
|
|
203
|
+
```sh
|
|
204
|
+
# Serve on the default port (8080)
|
|
205
|
+
llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF
|
|
206
|
+
|
|
207
|
+
# Custom port and host
|
|
208
|
+
llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --port 9000 --host 0.0.0.0
|
|
209
|
+
|
|
210
|
+
# Set the default max tokens (clients may override per-request)
|
|
211
|
+
llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --max-tokens 2048
|
|
212
|
+
|
|
213
|
+
# GGUF options
|
|
214
|
+
llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --ctx 8192 --gpu-layers 32
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### `llmpm serve` options
|
|
218
|
+
|
|
219
|
+
| Option | Default | Description |
|
|
220
|
+
| --- | --- | --- |
|
|
221
|
+
| `--port` / `-p` | `8080` | Port to listen on (auto-increments if busy) |
|
|
222
|
+
| `--host` / `-H` | `localhost` | Host/address to bind to |
|
|
223
|
+
| `--max-tokens` | `128000` | Default max tokens per response (overridable per-request) |
|
|
224
|
+
| `--ctx` | `128000` | Context window size (GGUF only) |
|
|
225
|
+
| `--gpu-layers` | `-1` | GPU layers to offload, `-1` = all (GGUF only) |
|
|
226
|
+
|
|
227
|
+
### Endpoints
|
|
228
|
+
|
|
229
|
+
| Method | Path | Models | Description |
|
|
230
|
+
| --- | --- | --- | --- |
|
|
231
|
+
| `GET` | `/chat` | all | Browser chat / image-gen UI |
|
|
232
|
+
| `GET` | `/health` | all | `{"status":"ok","model":"<id>"}` |
|
|
233
|
+
| `POST` | `/v1/chat/completions` | text-gen | OpenAI-compatible chat inference (SSE streaming supported) |
|
|
234
|
+
| `POST` | `/v1/images/generations` | diffusion | Text-to-image; pass `image` (base64) for image-to-image |
|
|
235
|
+
| `POST` | `/v1/audio/transcriptions` | ASR | Speech-to-text |
|
|
236
|
+
| `POST` | `/v1/audio/speech` | TTS | Text-to-speech |
|
|
237
|
+
|
|
238
|
+
### Example API calls
|
|
239
|
+
|
|
240
|
+
```sh
|
|
241
|
+
# Text generation
|
|
242
|
+
curl -X POST http://localhost:8080/v1/chat/completions \
|
|
243
|
+
-H "Content-Type: application/json" \
|
|
244
|
+
-d '{"messages": [{"role": "user", "content": "Hello!"}], "max_tokens": 256}'
|
|
245
|
+
|
|
246
|
+
# Text-to-image
|
|
247
|
+
curl -X POST http://localhost:8080/v1/images/generations \
|
|
248
|
+
-H "Content-Type: application/json" \
|
|
249
|
+
-d '{"prompt": "a cat in a forest", "n": 1}'
|
|
250
|
+
|
|
251
|
+
# Image-to-image (include the source image as base64 in the same endpoint)
|
|
252
|
+
IMAGE_B64=$(base64 -i input.png)
|
|
253
|
+
curl -X POST http://localhost:8080/v1/images/generations \
|
|
254
|
+
-H "Content-Type: application/json" \
|
|
255
|
+
-d "{\"prompt\": \"turn it into a painting\", \"image\": \"$IMAGE_B64\"}"
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
Response shape (both text-to-image and image-to-image):
|
|
259
|
+
|
|
260
|
+
```json
|
|
261
|
+
{
|
|
262
|
+
"created": 1234567890,
|
|
263
|
+
"data": [{ "b64_json": "<base64-png>" }]
|
|
264
|
+
}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## `llmpm push`
|
|
270
|
+
|
|
271
|
+
```sh
|
|
272
|
+
# Push an already-installed model
|
|
273
|
+
llmpm push my-org/my-fine-tune
|
|
274
|
+
|
|
275
|
+
# Push a local directory
|
|
276
|
+
llmpm push my-org/my-fine-tune --path ./my-model-dir
|
|
277
|
+
|
|
278
|
+
# Push as private repository
|
|
279
|
+
llmpm push my-org/my-fine-tune --private
|
|
280
|
+
|
|
281
|
+
# Custom commit message
|
|
282
|
+
llmpm push my-org/my-fine-tune -m "Add Q4_K_M quantisation"
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
Requires a HuggingFace token (run `huggingface-cli login` or set `HF_TOKEN`).
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## Backends
|
|
290
|
+
|
|
291
|
+
| Model type | Pipeline | Backend | Extra install |
|
|
292
|
+
| ----------------------- | ----------------- | ------------------------------ | ------------------------------------------------ |
|
|
293
|
+
| `.gguf` files | Text generation | llama.cpp via llama-cpp-python | `pip install llmpm[gguf]` |
|
|
294
|
+
| `.safetensors` / `.bin` | Text generation | HuggingFace Transformers | `pip install llmpm[transformers]` |
|
|
295
|
+
| Diffusion models | Image generation | HuggingFace Diffusers | `pip install llmpm[diffusion]` |
|
|
296
|
+
| Vision models | Image-to-text | HuggingFace Transformers | `pip install llmpm[vision]` |
|
|
297
|
+
| Whisper / ASR models | Speech-to-text | HuggingFace Transformers | `pip install llmpm[audio]` |
|
|
298
|
+
| TTS models | Text-to-speech | HuggingFace Transformers | `pip install llmpm[audio]` |
|
|
299
|
+
|
|
300
|
+
Install all backends at once:
|
|
301
|
+
|
|
302
|
+
```sh
|
|
303
|
+
pip install llmpm[all]
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
## Configuration
|
|
309
|
+
|
|
310
|
+
| Variable | Default | Description |
|
|
311
|
+
| ------------- | ---------- | -------------------------------------- |
|
|
312
|
+
| `LLMPM_HOME` | `~/.llmpm` | Root directory for models and registry |
|
|
313
|
+
| `HF_TOKEN` | — | HuggingFace API token for gated models |
|
|
314
|
+
| `LLPM_PYTHON` | `python3` | Python binary used by the npm shim |
|
|
315
|
+
|
|
316
|
+
---
|
|
317
|
+
|
|
318
|
+
## License
|
|
319
|
+
|
|
320
|
+
MIT
|