llmpm 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. llmpm-1.0.0/PKG-INFO +383 -0
  2. llmpm-1.0.0/README.md +320 -0
  3. llmpm-1.0.0/llmpm/__init__.py +4 -0
  4. llmpm-1.0.0/llmpm/__main__.py +6 -0
  5. llmpm-1.0.0/llmpm/cli.py +143 -0
  6. llmpm-1.0.0/llmpm/commands/__init__.py +1 -0
  7. llmpm-1.0.0/llmpm/commands/install.py +280 -0
  8. llmpm-1.0.0/llmpm/commands/list_cmd.py +133 -0
  9. llmpm-1.0.0/llmpm/commands/push.py +161 -0
  10. llmpm-1.0.0/llmpm/commands/run_cmd.py +477 -0
  11. llmpm-1.0.0/llmpm/commands/serve_cmd.py +159 -0
  12. llmpm-1.0.0/llmpm/core/__init__.py +1 -0
  13. llmpm-1.0.0/llmpm/core/chat_ui/dist/assets/index-DA7mKlGQ.css +1 -0
  14. llmpm-1.0.0/llmpm/core/chat_ui/dist/assets/index-tyQ9Z-hx.js +95 -0
  15. llmpm-1.0.0/llmpm/core/chat_ui/dist/index.html +24 -0
  16. llmpm-1.0.0/llmpm/core/downloader.py +237 -0
  17. llmpm-1.0.0/llmpm/core/model_detector.py +120 -0
  18. llmpm-1.0.0/llmpm/core/registry.py +114 -0
  19. llmpm-1.0.0/llmpm/core/runner.py +321 -0
  20. llmpm-1.0.0/llmpm/core/serve_audio.py +148 -0
  21. llmpm-1.0.0/llmpm/core/serve_diffusion.py +167 -0
  22. llmpm-1.0.0/llmpm/core/serve_gguf.py +77 -0
  23. llmpm-1.0.0/llmpm/core/serve_transformers.py +150 -0
  24. llmpm-1.0.0/llmpm/core/serve_vision.py +107 -0
  25. llmpm-1.0.0/llmpm/core/server/__init__.py +88 -0
  26. llmpm-1.0.0/llmpm/core/server/_audio.py +74 -0
  27. llmpm-1.0.0/llmpm/core/server/_chat.py +158 -0
  28. llmpm-1.0.0/llmpm/core/server/_context.py +39 -0
  29. llmpm-1.0.0/llmpm/core/server/_handler.py +154 -0
  30. llmpm-1.0.0/llmpm/core/server/_images.py +66 -0
  31. llmpm-1.0.0/llmpm/core/server/_types.py +23 -0
  32. llmpm-1.0.0/llmpm/display.py +419 -0
  33. llmpm-1.0.0/llmpm.egg-info/PKG-INFO +383 -0
  34. llmpm-1.0.0/llmpm.egg-info/SOURCES.txt +38 -0
  35. llmpm-1.0.0/llmpm.egg-info/dependency_links.txt +1 -0
  36. llmpm-1.0.0/llmpm.egg-info/entry_points.txt +2 -0
  37. llmpm-1.0.0/llmpm.egg-info/requires.txt +47 -0
  38. llmpm-1.0.0/llmpm.egg-info/top_level.txt +1 -0
  39. llmpm-1.0.0/pyproject.toml +97 -0
  40. llmpm-1.0.0/setup.cfg +4 -0
llmpm-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,383 @@
1
+ Metadata-Version: 2.4
2
+ Name: llmpm
3
+ Version: 1.0.0
4
+ Summary: LLM Package Manager — download, run, and share AI models from the command line
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/llmpm-dev/llmpm
7
+ Project-URL: Repository, https://github.com/llmpm-dev/llmpm
8
+ Project-URL: Issues, https://github.com/llmpm-dev/llmpm/issues
9
+ Keywords: llm,ai,models,package-manager,huggingface,llama
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: click>=8.1
23
+ Requires-Dist: rich>=13.0
24
+ Requires-Dist: huggingface_hub>=0.20
25
+ Requires-Dist: requests>=2.28
26
+ Requires-Dist: tqdm>=4.65
27
+ Requires-Dist: questionary>=2.0
28
+ Requires-Dist: humanize>=4.0
29
+ Requires-Dist: llama-cpp-python>=0.2.0
30
+ Requires-Dist: diffusers>=0.27
31
+ Requires-Dist: transformers>=4.35
32
+ Requires-Dist: accelerate>=0.24
33
+ Requires-Dist: torch>=2.0
34
+ Provides-Extra: gguf
35
+ Requires-Dist: llama-cpp-python>=0.2.0; extra == "gguf"
36
+ Provides-Extra: transformers
37
+ Requires-Dist: transformers>=4.35; extra == "transformers"
38
+ Requires-Dist: torch>=2.0; extra == "transformers"
39
+ Requires-Dist: accelerate>=0.24; extra == "transformers"
40
+ Provides-Extra: diffusion
41
+ Requires-Dist: diffusers>=0.27; extra == "diffusion"
42
+ Requires-Dist: transformers>=4.35; extra == "diffusion"
43
+ Requires-Dist: torch>=2.0; extra == "diffusion"
44
+ Requires-Dist: accelerate>=0.24; extra == "diffusion"
45
+ Provides-Extra: vision
46
+ Requires-Dist: transformers>=4.35; extra == "vision"
47
+ Requires-Dist: torch>=2.0; extra == "vision"
48
+ Requires-Dist: accelerate>=0.24; extra == "vision"
49
+ Requires-Dist: Pillow>=10.0; extra == "vision"
50
+ Provides-Extra: audio
51
+ Requires-Dist: transformers>=4.35; extra == "audio"
52
+ Requires-Dist: torch>=2.0; extra == "audio"
53
+ Requires-Dist: accelerate>=0.24; extra == "audio"
54
+ Requires-Dist: numpy>=1.24; extra == "audio"
55
+ Provides-Extra: all
56
+ Requires-Dist: llama-cpp-python>=0.2.0; extra == "all"
57
+ Requires-Dist: transformers>=4.35; extra == "all"
58
+ Requires-Dist: torch>=2.0; extra == "all"
59
+ Requires-Dist: accelerate>=0.24; extra == "all"
60
+ Requires-Dist: diffusers>=0.27; extra == "all"
61
+ Requires-Dist: Pillow>=10.0; extra == "all"
62
+ Requires-Dist: numpy>=1.24; extra == "all"
63
+
64
+ # llmpm — LLM Package Manager
65
+
66
+ > Download, run, and share AI models from the command line.
67
+
68
+ `llmpm` is a package manager for AI models, inspired by npm.
69
+ Models are sourced from [HuggingFace Hub](https://huggingface.co).
70
+ Supports text generation (GGUF via **llama.cpp** and Transformer checkpoints), image generation (Diffusion), vision, speech-to-text (ASR), and text-to-speech (TTS).
71
+
72
+ ---
73
+
74
+ ## Installation
75
+
76
+ ### via pip (recommended)
77
+
78
+ ```sh
79
+ pip install llmpm
80
+ ```
81
+
82
+ ### via npm
83
+
84
+ ```sh
85
+ npm install -g llmpm
86
+ ```
87
+
88
+ > The npm package automatically installs the Python backend via pip.
89
+
90
+ ---
91
+
92
+ ## Quick start
93
+
94
+ ```sh
95
+ # Install a GGUF model (interactive quantisation picker)
96
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF
97
+
98
+ # Run it
99
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF
100
+ ```
101
+
102
+ ---
103
+
104
+ ## Commands
105
+
106
+ | Command | Description |
107
+ | ---------------------- | --------------------------------------------- |
108
+ | `llmpm install <repo>` | Download and install a model from HuggingFace |
109
+ | `llmpm run <repo>` | Run an installed model (interactive chat) |
110
+ | `llmpm serve <repo>` | Serve a model as an OpenAI-compatible HTTP API |
111
+ | `llmpm push <repo>` | Upload a model to HuggingFace Hub |
112
+ | `llmpm list` | Show all installed models |
113
+ | `llmpm info <repo>` | Show details about a model |
114
+ | `llmpm uninstall <repo>` | Uninstall a model |
115
+
116
+ ---
117
+
118
+ ## `llmpm install`
119
+
120
+ ```sh
121
+ # Interactively choose a GGUF quantisation
122
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF
123
+
124
+ # Install a specific quantisation
125
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --quant Q4_K_M
126
+
127
+ # Install a full Transformer model
128
+ llmpm install meta-llama/Llama-3.2-1B-Instruct
129
+
130
+ # Install a single specific file
131
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --file Llama-3.2-1B-Instruct-Q4_K_M.gguf
132
+
133
+ # Skip prompts (pick best default)
134
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --no-interactive
135
+ ```
136
+
137
+ Models are stored in `~/.llmpm/models/`.
138
+
139
+ ---
140
+
141
+ ## `llmpm run`
142
+
143
+ `llmpm run` auto-detects the model type and launches the appropriate interactive session. It supports text generation, image generation, vision, speech-to-text (ASR), and text-to-speech (TTS) models.
144
+
145
+ ### Text generation (GGUF & Transformers)
146
+
147
+ ```sh
148
+ # Interactive chat
149
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF
150
+
151
+ # Single-turn inference
152
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --prompt "Explain quantum computing"
153
+
154
+ # With a system prompt
155
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --system "You are a helpful pirate."
156
+
157
+ # Limit response length
158
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --max-tokens 512
159
+
160
+ # GGUF: tune context window and GPU layers
161
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --ctx 8192 --gpu-layers 32
162
+
163
+ # Transformer model
164
+ llmpm run HuggingFaceTB/SmolLM2-1.7B-Instruct
165
+ ```
166
+
167
+ ### Image generation (Diffusion)
168
+
169
+ Generates an image from a text prompt and saves it as a PNG on your Desktop.
170
+
171
+ ```sh
172
+ # Single prompt → saves llmpm_<timestamp>.png to ~/Desktop
173
+ llmpm run amused/amused-256 --prompt "a cyberpunk city at sunset"
174
+
175
+ # Interactive session (type a prompt, get an image each time)
176
+ llmpm run amused/amused-256
177
+ ```
178
+
179
+ In interactive mode type your prompt and press Enter. The output path is printed after each generation. Type `/exit` to quit.
180
+
181
+ > Requires: `pip install diffusers torch accelerate`
182
+
183
+ ### Vision (image-to-text)
184
+
185
+ Describe or answer questions about an image. Pass the image file path via `--prompt`.
186
+
187
+ ```sh
188
+ # Single image description
189
+ llmpm run Salesforce/blip-image-captioning-base --prompt /path/to/photo.jpg
190
+
191
+ # Interactive session: type an image path at each prompt
192
+ llmpm run Salesforce/blip-image-captioning-base
193
+ ```
194
+
195
+ > Requires: `pip install transformers torch Pillow`
196
+
197
+ ### Speech-to-text / ASR
198
+
199
+ Transcribe an audio file. Pass the audio file path via `--prompt`.
200
+
201
+ ```sh
202
+ # Transcribe a single file
203
+ llmpm run openai/whisper-base --prompt recording.wav
204
+
205
+ # Interactive: enter an audio file path at each prompt
206
+ llmpm run openai/whisper-base
207
+ ```
208
+
209
+ Supported formats depend on your installed audio libraries (wav, flac, mp3, …).
210
+
211
+ > Requires: `pip install transformers torch`
212
+
213
+ ### Text-to-speech / TTS
214
+
215
+ Convert text to speech. The output WAV file is saved to your Desktop.
216
+
217
+ ```sh
218
+ # Single utterance → saves llmpm_<timestamp>.wav to ~/Desktop
219
+ llmpm run suno/bark-small --prompt "Hello, how are you today?"
220
+
221
+ # Interactive session
222
+ llmpm run suno/bark-small
223
+ ```
224
+
225
+ > Requires: `pip install transformers torch`
226
+
227
+ ### `llmpm run` options
228
+
229
+ | Option | Default | Description |
230
+ | --- | --- | --- |
231
+ | `--prompt` / `-p` | — | Single-turn prompt or input file path (non-interactive) |
232
+ | `--system` / `-s` | — | System prompt (text generation only) |
233
+ | `--max-tokens` | `128000` | Maximum tokens to generate per response |
234
+ | `--ctx` | `128000` | Context window size (GGUF only) |
235
+ | `--gpu-layers` | `-1` | GPU layers to offload, `-1` = all (GGUF only) |
236
+ | `--verbose` | off | Show model loading output |
237
+
238
+ ### Interactive session commands
239
+
240
+ These commands work in any interactive session:
241
+
242
+ | Command | Action |
243
+ | ---------------- | --------------------------------------------- |
244
+ | `/exit` | End the session |
245
+ | `/clear` | Clear conversation history (text gen only) |
246
+ | `/system <text>` | Update the system prompt (text gen only) |
247
+
248
+ ### Model type detection
249
+
250
+ `llmpm run` reads `config.json` / `model_index.json` from the installed model to determine the pipeline type before loading any weights. The detected type is printed at startup:
251
+
252
+ ```
253
+ Detected: Image Generation (Diffusion)
254
+ Loading model… ✓
255
+ ```
256
+
257
+ If detection is ambiguous the model falls back to the text-generation backend.
258
+
259
+ ---
260
+
261
+ ## `llmpm serve`
262
+
263
+ Start a local HTTP server exposing the model as an OpenAI-compatible REST API.
264
+ A browser-based chat UI is also available at `/chat`.
265
+
266
+ ```sh
267
+ # Serve on the default port (8080)
268
+ llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF
269
+
270
+ # Custom port and host
271
+ llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --port 9000 --host 0.0.0.0
272
+
273
+ # Set the default max tokens (clients may override per-request)
274
+ llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --max-tokens 2048
275
+
276
+ # GGUF options
277
+ llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --ctx 8192 --gpu-layers 32
278
+ ```
279
+
280
+ ### `llmpm serve` options
281
+
282
+ | Option | Default | Description |
283
+ | --- | --- | --- |
284
+ | `--port` / `-p` | `8080` | Port to listen on (auto-increments if busy) |
285
+ | `--host` / `-H` | `localhost` | Host/address to bind to |
286
+ | `--max-tokens` | `128000` | Default max tokens per response (overridable per-request) |
287
+ | `--ctx` | `128000` | Context window size (GGUF only) |
288
+ | `--gpu-layers` | `-1` | GPU layers to offload, `-1` = all (GGUF only) |
289
+
290
+ ### Endpoints
291
+
292
+ | Method | Path | Models | Description |
293
+ | --- | --- | --- | --- |
294
+ | `GET` | `/chat` | all | Browser chat / image-gen UI |
295
+ | `GET` | `/health` | all | `{"status":"ok","model":"<id>"}` |
296
+ | `POST` | `/v1/chat/completions` | text-gen | OpenAI-compatible chat inference (SSE streaming supported) |
297
+ | `POST` | `/v1/images/generations` | diffusion | Text-to-image; pass `image` (base64) for image-to-image |
298
+ | `POST` | `/v1/audio/transcriptions` | ASR | Speech-to-text |
299
+ | `POST` | `/v1/audio/speech` | TTS | Text-to-speech |
300
+
301
+ ### Example API calls
302
+
303
+ ```sh
304
+ # Text generation
305
+ curl -X POST http://localhost:8080/v1/chat/completions \
306
+ -H "Content-Type: application/json" \
307
+ -d '{"messages": [{"role": "user", "content": "Hello!"}], "max_tokens": 256}'
308
+
309
+ # Text-to-image
310
+ curl -X POST http://localhost:8080/v1/images/generations \
311
+ -H "Content-Type: application/json" \
312
+ -d '{"prompt": "a cat in a forest", "n": 1}'
313
+
314
+ # Image-to-image (include the source image as base64 in the same endpoint)
315
+ IMAGE_B64=$(base64 -i input.png)
316
+ curl -X POST http://localhost:8080/v1/images/generations \
317
+ -H "Content-Type: application/json" \
318
+ -d "{\"prompt\": \"turn it into a painting\", \"image\": \"$IMAGE_B64\"}"
319
+ ```
320
+
321
+ Response shape (both text-to-image and image-to-image):
322
+
323
+ ```json
324
+ {
325
+ "created": 1234567890,
326
+ "data": [{ "b64_json": "<base64-png>" }]
327
+ }
328
+ ```
329
+
330
+ ---
331
+
332
+ ## `llmpm push`
333
+
334
+ ```sh
335
+ # Push an already-installed model
336
+ llmpm push my-org/my-fine-tune
337
+
338
+ # Push a local directory
339
+ llmpm push my-org/my-fine-tune --path ./my-model-dir
340
+
341
+ # Push as private repository
342
+ llmpm push my-org/my-fine-tune --private
343
+
344
+ # Custom commit message
345
+ llmpm push my-org/my-fine-tune -m "Add Q4_K_M quantisation"
346
+ ```
347
+
348
+ Requires a HuggingFace token (run `huggingface-cli login` or set `HF_TOKEN`).
349
+
350
+ ---
351
+
352
+ ## Backends
353
+
354
+ | Model type | Pipeline | Backend | Extra install |
355
+ | ----------------------- | ----------------- | ------------------------------ | ------------------------------------------------ |
356
+ | `.gguf` files | Text generation | llama.cpp via llama-cpp-python | `pip install llmpm[gguf]` |
357
+ | `.safetensors` / `.bin` | Text generation | HuggingFace Transformers | `pip install llmpm[transformers]` |
358
+ | Diffusion models | Image generation | HuggingFace Diffusers | `pip install llmpm[diffusion]` |
359
+ | Vision models | Image-to-text | HuggingFace Transformers | `pip install llmpm[vision]` |
360
+ | Whisper / ASR models | Speech-to-text | HuggingFace Transformers | `pip install llmpm[audio]` |
361
+ | TTS models | Text-to-speech | HuggingFace Transformers | `pip install llmpm[audio]` |
362
+
363
+ Install all backends at once:
364
+
365
+ ```sh
366
+ pip install llmpm[all]
367
+ ```
368
+
369
+ ---
370
+
371
+ ## Configuration
372
+
373
+ | Variable | Default | Description |
374
+ | ------------- | ---------- | -------------------------------------- |
375
+ | `LLMPM_HOME` | `~/.llmpm` | Root directory for models and registry |
376
+ | `HF_TOKEN` | — | HuggingFace API token for gated models |
377
+ | `LLPM_PYTHON` | `python3` | Python binary used by the npm shim |
378
+
379
+ ---
380
+
381
+ ## License
382
+
383
+ MIT
llmpm-1.0.0/README.md ADDED
@@ -0,0 +1,320 @@
1
+ # llmpm — LLM Package Manager
2
+
3
+ > Download, run, and share AI models from the command line.
4
+
5
+ `llmpm` is a package manager for AI models, inspired by npm.
6
+ Models are sourced from [HuggingFace Hub](https://huggingface.co).
7
+ Supports text generation (GGUF via **llama.cpp** and Transformer checkpoints), image generation (Diffusion), vision, speech-to-text (ASR), and text-to-speech (TTS).
8
+
9
+ ---
10
+
11
+ ## Installation
12
+
13
+ ### via pip (recommended)
14
+
15
+ ```sh
16
+ pip install llmpm
17
+ ```
18
+
19
+ ### via npm
20
+
21
+ ```sh
22
+ npm install -g llmpm
23
+ ```
24
+
25
+ > The npm package automatically installs the Python backend via pip.
26
+
27
+ ---
28
+
29
+ ## Quick start
30
+
31
+ ```sh
32
+ # Install a GGUF model (interactive quantisation picker)
33
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF
34
+
35
+ # Run it
36
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF
37
+ ```
38
+
39
+ ---
40
+
41
+ ## Commands
42
+
43
+ | Command | Description |
44
+ | ---------------------- | --------------------------------------------- |
45
+ | `llmpm install <repo>` | Download and install a model from HuggingFace |
46
+ | `llmpm run <repo>` | Run an installed model (interactive chat) |
47
+ | `llmpm serve <repo>` | Serve a model as an OpenAI-compatible HTTP API |
48
+ | `llmpm push <repo>` | Upload a model to HuggingFace Hub |
49
+ | `llmpm list` | Show all installed models |
50
+ | `llmpm info <repo>` | Show details about a model |
51
+ | `llmpm uninstall <repo>` | Uninstall a model |
52
+
53
+ ---
54
+
55
+ ## `llmpm install`
56
+
57
+ ```sh
58
+ # Interactively choose a GGUF quantisation
59
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF
60
+
61
+ # Install a specific quantisation
62
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --quant Q4_K_M
63
+
64
+ # Install a full Transformer model
65
+ llmpm install meta-llama/Llama-3.2-1B-Instruct
66
+
67
+ # Install a single specific file
68
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --file Llama-3.2-1B-Instruct-Q4_K_M.gguf
69
+
70
+ # Skip prompts (pick best default)
71
+ llmpm install bartowski/Llama-3.2-1B-Instruct-GGUF --no-interactive
72
+ ```
73
+
74
+ Models are stored in `~/.llmpm/models/`.
75
+
76
+ ---
77
+
78
+ ## `llmpm run`
79
+
80
+ `llmpm run` auto-detects the model type and launches the appropriate interactive session. It supports text generation, image generation, vision, speech-to-text (ASR), and text-to-speech (TTS) models.
81
+
82
+ ### Text generation (GGUF & Transformers)
83
+
84
+ ```sh
85
+ # Interactive chat
86
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF
87
+
88
+ # Single-turn inference
89
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --prompt "Explain quantum computing"
90
+
91
+ # With a system prompt
92
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --system "You are a helpful pirate."
93
+
94
+ # Limit response length
95
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --max-tokens 512
96
+
97
+ # GGUF: tune context window and GPU layers
98
+ llmpm run bartowski/Llama-3.2-1B-Instruct-GGUF --ctx 8192 --gpu-layers 32
99
+
100
+ # Transformer model
101
+ llmpm run HuggingFaceTB/SmolLM2-1.7B-Instruct
102
+ ```
103
+
104
+ ### Image generation (Diffusion)
105
+
106
+ Generates an image from a text prompt and saves it as a PNG on your Desktop.
107
+
108
+ ```sh
109
+ # Single prompt → saves llmpm_<timestamp>.png to ~/Desktop
110
+ llmpm run amused/amused-256 --prompt "a cyberpunk city at sunset"
111
+
112
+ # Interactive session (type a prompt, get an image each time)
113
+ llmpm run amused/amused-256
114
+ ```
115
+
116
+ In interactive mode type your prompt and press Enter. The output path is printed after each generation. Type `/exit` to quit.
117
+
118
+ > Requires: `pip install diffusers torch accelerate`
119
+
120
+ ### Vision (image-to-text)
121
+
122
+ Describe or answer questions about an image. Pass the image file path via `--prompt`.
123
+
124
+ ```sh
125
+ # Single image description
126
+ llmpm run Salesforce/blip-image-captioning-base --prompt /path/to/photo.jpg
127
+
128
+ # Interactive session: type an image path at each prompt
129
+ llmpm run Salesforce/blip-image-captioning-base
130
+ ```
131
+
132
+ > Requires: `pip install transformers torch Pillow`
133
+
134
+ ### Speech-to-text / ASR
135
+
136
+ Transcribe an audio file. Pass the audio file path via `--prompt`.
137
+
138
+ ```sh
139
+ # Transcribe a single file
140
+ llmpm run openai/whisper-base --prompt recording.wav
141
+
142
+ # Interactive: enter an audio file path at each prompt
143
+ llmpm run openai/whisper-base
144
+ ```
145
+
146
+ Supported formats depend on your installed audio libraries (wav, flac, mp3, …).
147
+
148
+ > Requires: `pip install transformers torch`
149
+
150
+ ### Text-to-speech / TTS
151
+
152
+ Convert text to speech. The output WAV file is saved to your Desktop.
153
+
154
+ ```sh
155
+ # Single utterance → saves llmpm_<timestamp>.wav to ~/Desktop
156
+ llmpm run suno/bark-small --prompt "Hello, how are you today?"
157
+
158
+ # Interactive session
159
+ llmpm run suno/bark-small
160
+ ```
161
+
162
+ > Requires: `pip install transformers torch`
163
+
164
+ ### `llmpm run` options
165
+
166
+ | Option | Default | Description |
167
+ | --- | --- | --- |
168
+ | `--prompt` / `-p` | — | Single-turn prompt or input file path (non-interactive) |
169
+ | `--system` / `-s` | — | System prompt (text generation only) |
170
+ | `--max-tokens` | `128000` | Maximum tokens to generate per response |
171
+ | `--ctx` | `128000` | Context window size (GGUF only) |
172
+ | `--gpu-layers` | `-1` | GPU layers to offload, `-1` = all (GGUF only) |
173
+ | `--verbose` | off | Show model loading output |
174
+
175
+ ### Interactive session commands
176
+
177
+ These commands work in any interactive session:
178
+
179
+ | Command | Action |
180
+ | ---------------- | --------------------------------------------- |
181
+ | `/exit` | End the session |
182
+ | `/clear` | Clear conversation history (text gen only) |
183
+ | `/system <text>` | Update the system prompt (text gen only) |
184
+
185
+ ### Model type detection
186
+
187
+ `llmpm run` reads `config.json` / `model_index.json` from the installed model to determine the pipeline type before loading any weights. The detected type is printed at startup:
188
+
189
+ ```
190
+ Detected: Image Generation (Diffusion)
191
+ Loading model… ✓
192
+ ```
193
+
194
+ If detection is ambiguous the model falls back to the text-generation backend.
195
+
196
+ ---
197
+
198
+ ## `llmpm serve`
199
+
200
+ Start a local HTTP server exposing the model as an OpenAI-compatible REST API.
201
+ A browser-based chat UI is also available at `/chat`.
202
+
203
+ ```sh
204
+ # Serve on the default port (8080)
205
+ llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF
206
+
207
+ # Custom port and host
208
+ llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --port 9000 --host 0.0.0.0
209
+
210
+ # Set the default max tokens (clients may override per-request)
211
+ llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --max-tokens 2048
212
+
213
+ # GGUF options
214
+ llmpm serve bartowski/Llama-3.2-1B-Instruct-GGUF --ctx 8192 --gpu-layers 32
215
+ ```
216
+
217
+ ### `llmpm serve` options
218
+
219
+ | Option | Default | Description |
220
+ | --- | --- | --- |
221
+ | `--port` / `-p` | `8080` | Port to listen on (auto-increments if busy) |
222
+ | `--host` / `-H` | `localhost` | Host/address to bind to |
223
+ | `--max-tokens` | `128000` | Default max tokens per response (overridable per-request) |
224
+ | `--ctx` | `128000` | Context window size (GGUF only) |
225
+ | `--gpu-layers` | `-1` | GPU layers to offload, `-1` = all (GGUF only) |
226
+
227
+ ### Endpoints
228
+
229
+ | Method | Path | Models | Description |
230
+ | --- | --- | --- | --- |
231
+ | `GET` | `/chat` | all | Browser chat / image-gen UI |
232
+ | `GET` | `/health` | all | `{"status":"ok","model":"<id>"}` |
233
+ | `POST` | `/v1/chat/completions` | text-gen | OpenAI-compatible chat inference (SSE streaming supported) |
234
+ | `POST` | `/v1/images/generations` | diffusion | Text-to-image; pass `image` (base64) for image-to-image |
235
+ | `POST` | `/v1/audio/transcriptions` | ASR | Speech-to-text |
236
+ | `POST` | `/v1/audio/speech` | TTS | Text-to-speech |
237
+
238
+ ### Example API calls
239
+
240
+ ```sh
241
+ # Text generation
242
+ curl -X POST http://localhost:8080/v1/chat/completions \
243
+ -H "Content-Type: application/json" \
244
+ -d '{"messages": [{"role": "user", "content": "Hello!"}], "max_tokens": 256}'
245
+
246
+ # Text-to-image
247
+ curl -X POST http://localhost:8080/v1/images/generations \
248
+ -H "Content-Type: application/json" \
249
+ -d '{"prompt": "a cat in a forest", "n": 1}'
250
+
251
+ # Image-to-image (include the source image as base64 in the same endpoint)
252
+ IMAGE_B64=$(base64 -i input.png)
253
+ curl -X POST http://localhost:8080/v1/images/generations \
254
+ -H "Content-Type: application/json" \
255
+ -d "{\"prompt\": \"turn it into a painting\", \"image\": \"$IMAGE_B64\"}"
256
+ ```
257
+
258
+ Response shape (both text-to-image and image-to-image):
259
+
260
+ ```json
261
+ {
262
+ "created": 1234567890,
263
+ "data": [{ "b64_json": "<base64-png>" }]
264
+ }
265
+ ```
266
+
267
+ ---
268
+
269
+ ## `llmpm push`
270
+
271
+ ```sh
272
+ # Push an already-installed model
273
+ llmpm push my-org/my-fine-tune
274
+
275
+ # Push a local directory
276
+ llmpm push my-org/my-fine-tune --path ./my-model-dir
277
+
278
+ # Push as private repository
279
+ llmpm push my-org/my-fine-tune --private
280
+
281
+ # Custom commit message
282
+ llmpm push my-org/my-fine-tune -m "Add Q4_K_M quantisation"
283
+ ```
284
+
285
+ Requires a HuggingFace token (run `huggingface-cli login` or set `HF_TOKEN`).
286
+
287
+ ---
288
+
289
+ ## Backends
290
+
291
+ | Model type | Pipeline | Backend | Extra install |
292
+ | ----------------------- | ----------------- | ------------------------------ | ------------------------------------------------ |
293
+ | `.gguf` files | Text generation | llama.cpp via llama-cpp-python | `pip install llmpm[gguf]` |
294
+ | `.safetensors` / `.bin` | Text generation | HuggingFace Transformers | `pip install llmpm[transformers]` |
295
+ | Diffusion models | Image generation | HuggingFace Diffusers | `pip install llmpm[diffusion]` |
296
+ | Vision models | Image-to-text | HuggingFace Transformers | `pip install llmpm[vision]` |
297
+ | Whisper / ASR models | Speech-to-text | HuggingFace Transformers | `pip install llmpm[audio]` |
298
+ | TTS models | Text-to-speech | HuggingFace Transformers | `pip install llmpm[audio]` |
299
+
300
+ Install all backends at once:
301
+
302
+ ```sh
303
+ pip install llmpm[all]
304
+ ```
305
+
306
+ ---
307
+
308
+ ## Configuration
309
+
310
+ | Variable | Default | Description |
311
+ | ------------- | ---------- | -------------------------------------- |
312
+ | `LLMPM_HOME` | `~/.llmpm` | Root directory for models and registry |
313
+ | `HF_TOKEN` | — | HuggingFace API token for gated models |
314
+ | `LLPM_PYTHON` | `python3` | Python binary used by the npm shim |
315
+
316
+ ---
317
+
318
+ ## License
319
+
320
+ MIT
@@ -0,0 +1,4 @@
1
+ """llmpm — LLM Package Manager."""
2
+
3
+ __version__ = "1.0.0"
4
+ __author__ = "llmpm contributors"