ppmlx 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. ppmlx-0.1.0/.github/workflows/homebrew-update.yml +16 -0
  2. ppmlx-0.1.0/.github/workflows/release.yml +37 -0
  3. ppmlx-0.1.0/.github/workflows/tests.yml +23 -0
  4. ppmlx-0.1.0/.gitignore +19 -0
  5. ppmlx-0.1.0/CLAUDE.md +113 -0
  6. ppmlx-0.1.0/LICENSE +21 -0
  7. ppmlx-0.1.0/PKG-INFO +419 -0
  8. ppmlx-0.1.0/README.md +359 -0
  9. ppmlx-0.1.0/homebrew/Formula/ppmlx.rb +85 -0
  10. ppmlx-0.1.0/ppmlx/__init__.py +5 -0
  11. ppmlx-0.1.0/ppmlx/cli.py +2037 -0
  12. ppmlx-0.1.0/ppmlx/config.py +139 -0
  13. ppmlx-0.1.0/ppmlx/db.py +320 -0
  14. ppmlx-0.1.0/ppmlx/engine.py +415 -0
  15. ppmlx-0.1.0/ppmlx/engine_embed.py +123 -0
  16. ppmlx-0.1.0/ppmlx/engine_vlm.py +145 -0
  17. ppmlx-0.1.0/ppmlx/memory.py +80 -0
  18. ppmlx-0.1.0/ppmlx/modelfile.py +234 -0
  19. ppmlx-0.1.0/ppmlx/models.py +442 -0
  20. ppmlx-0.1.0/ppmlx/quantize.py +161 -0
  21. ppmlx-0.1.0/ppmlx/registry.py +50 -0
  22. ppmlx-0.1.0/ppmlx/registry_data.json +1687 -0
  23. ppmlx-0.1.0/ppmlx/schema.py +180 -0
  24. ppmlx-0.1.0/ppmlx/server.py +1986 -0
  25. ppmlx-0.1.0/pyproject.toml +65 -0
  26. ppmlx-0.1.0/scripts/bench_common.sh +499 -0
  27. ppmlx-0.1.0/scripts/bench_compare.sh +141 -0
  28. ppmlx-0.1.0/scripts/bench_ollama_glm.sh +9 -0
  29. ppmlx-0.1.0/scripts/bench_ollama_gptoss.sh +9 -0
  30. ppmlx-0.1.0/scripts/bench_ollama_qwen.sh +9 -0
  31. ppmlx-0.1.0/scripts/bench_ppmlx_glm.sh +9 -0
  32. ppmlx-0.1.0/scripts/bench_ppmlx_gptoss.sh +9 -0
  33. ppmlx-0.1.0/scripts/bench_ppmlx_qwen.sh +9 -0
  34. ppmlx-0.1.0/scripts/install.sh +122 -0
  35. ppmlx-0.1.0/scripts/reinstall.sh +14 -0
  36. ppmlx-0.1.0/tests/__init__.py +0 -0
  37. ppmlx-0.1.0/tests/conftest.py +82 -0
  38. ppmlx-0.1.0/tests/test_cli.py +163 -0
  39. ppmlx-0.1.0/tests/test_config.py +269 -0
  40. ppmlx-0.1.0/tests/test_db.py +148 -0
  41. ppmlx-0.1.0/tests/test_engine.py +397 -0
  42. ppmlx-0.1.0/tests/test_engine_embed.py +93 -0
  43. ppmlx-0.1.0/tests/test_engine_vlm.py +122 -0
  44. ppmlx-0.1.0/tests/test_memory.py +96 -0
  45. ppmlx-0.1.0/tests/test_modelfile.py +255 -0
  46. ppmlx-0.1.0/tests/test_models.py +170 -0
  47. ppmlx-0.1.0/tests/test_quantize.py +197 -0
  48. ppmlx-0.1.0/tests/test_schema.py +232 -0
  49. ppmlx-0.1.0/tests/test_server.py +291 -0
@@ -0,0 +1,16 @@
1
+ name: Update Homebrew Formula
2
+
3
+ on:
4
+ workflow_run:
5
+ workflows: ["Release"]
6
+ types: [completed]
7
+
8
+ jobs:
9
+ update-formula:
10
+ if: ${{ github.event.workflow_run.conclusion == 'success' }}
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Trigger Homebrew tap update
14
+ run: |
15
+ echo "TODO: Update homebrew-ppmlx tap formula with new version"
16
+ echo "Steps: download sdist, run brew update-python-resources, update formula sha256"
@@ -0,0 +1,37 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ permissions:
9
+ id-token: write
10
+
11
+ jobs:
12
+ test-publish:
13
+ runs-on: ubuntu-latest
14
+ environment: testpypi
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - name: Install uv
18
+ uses: astral-sh/setup-uv@v5
19
+ - name: Build
20
+ run: uv build
21
+ - name: Publish to TestPyPI
22
+ uses: pypa/gh-action-pypi-publish@release/v1
23
+ with:
24
+ repository-url: https://test.pypi.org/legacy/
25
+
26
+ publish:
27
+ needs: test-publish
28
+ runs-on: ubuntu-latest
29
+ environment: pypi
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+ - name: Install uv
33
+ uses: astral-sh/setup-uv@v5
34
+ - name: Build
35
+ run: uv build
36
+ - name: Publish to PyPI
37
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,23 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - name: Install uv
15
+ uses: astral-sh/setup-uv@v5
16
+ with:
17
+ version: "latest"
18
+ - name: Set up Python 3.11
19
+ run: uv python install 3.11
20
+ - name: Install dependencies
21
+ run: uv sync --python 3.11
22
+ - name: Run tests
23
+ run: uv run pytest tests/ -v --tb=short
ppmlx-0.1.0/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ .venv/
7
+ venv/
8
+ env/
9
+ .env
10
+ dist/
11
+ build/
12
+ *.egg-info/
13
+ .pytest_cache/
14
+ .mypy_cache/
15
+ .ruff_cache/
16
+ *.pyc
17
+ uv.lock
18
+ *.html
19
+ .claire/
ppmlx-0.1.0/CLAUDE.md ADDED
@@ -0,0 +1,113 @@
1
+ # ppmlx
2
+
3
+ CLI for running LLMs on Apple Silicon via MLX with an OpenAI-compatible API.
4
+
5
+ ## Dev Setup
6
+
7
+ ```bash
8
+ uv sync --python 3.11
9
+ uv run pytest tests/ -v
10
+ ```
11
+
12
+ ## Starting the Server
13
+
14
+ ```bash
15
+ # Basic start (default port 6767)
16
+ uv run ppmlx serve
17
+
18
+ # Pre-load a model on startup
19
+ uv run ppmlx serve --model llama3
20
+
21
+ # Bind to all interfaces (e.g. for LAN access)
22
+ uv run ppmlx serve --host 0.0.0.0 --port 6767
23
+
24
+ # Interactive model selection
25
+ uv run ppmlx serve --interactive
26
+ ```
27
+
28
+ Server listens on `http://127.0.0.1:6767` by default.
29
+
30
+ ## Connecting (OpenAI-compatible)
31
+
32
+ **Python SDK:**
33
+ ```python
34
+ from openai import OpenAI
35
+
36
+ client = OpenAI(base_url="http://localhost:6767/v1", api_key="local")
37
+ response = client.chat.completions.create(
38
+ model="llama3",
39
+ messages=[{"role": "user", "content": "Hello"}],
40
+ )
41
+ ```
42
+
43
+ **curl:**
44
+ ```bash
45
+ # List models
46
+ curl http://localhost:6767/v1/models
47
+
48
+ # Chat
49
+ curl http://localhost:6767/v1/chat/completions \
50
+ -H "Content-Type: application/json" \
51
+ -d '{"model": "llama3", "messages": [{"role": "user", "content": "Hello"}]}'
52
+
53
+ # Embeddings
54
+ curl http://localhost:6767/v1/embeddings \
55
+ -H "Content-Type: application/json" \
56
+ -d '{"model": "nomic-embed", "input": "Hello world"}'
57
+ ```
58
+
59
+ **Any OpenAI-compatible tool** (LangChain, LlamaIndex, Open WebUI, etc.):
60
+ - `base_url`: `http://localhost:6767/v1`
61
+ - `api_key`: any non-empty string (e.g. `"local"`)
62
+
63
+ ## API Endpoints
64
+
65
+ | Endpoint | Description |
66
+ |---|---|
67
+ | `POST /v1/chat/completions` | Chat (streaming supported) |
68
+ | `POST /v1/responses` | Responses API (Codex, newer OpenAI tools) |
69
+ | `POST /v1/completions` | Text completion |
70
+ | `POST /v1/embeddings` | Embeddings |
71
+ | `GET /v1/models` | List loaded/available models |
72
+ | `GET /health` | Health check |
73
+ | `GET /metrics` | Usage metrics |
74
+
75
+ ## Key CLI Commands
76
+
77
+ ```bash
78
+ ppmlx pull <model> # Download a model
79
+ ppmlx run <model> # Interactive chat REPL
80
+ ppmlx serve # Start API server
81
+ ppmlx list # List local models
82
+ ppmlx ps # Show loaded models + memory usage
83
+ ppmlx rm <model> # Remove a model
84
+ ```
85
+
86
+ ## Config
87
+
88
+ `~/.ppmlx/config.toml` — all optional. Key fields:
89
+ ```toml
90
+ [server]
91
+ host = "127.0.0.1"
92
+ port = 6767
93
+
94
+ [generation]
95
+ temperature = 0.7
96
+ max_tokens = 2048
97
+ ```
98
+
99
+ ## Project Structure
100
+
101
+ ```
102
+ ppmlx/
103
+ cli.py # Typer CLI (entry point)
104
+ server.py # FastAPI app (OpenAI-compatible routes)
105
+ engine.py # MLX LLM inference
106
+ engine_embed.py # MLX embedding inference
107
+ models.py # Model registry + HuggingFace download
108
+ config.py # Config loading (~/.ppmlx/config.toml)
109
+ db.py # SQLite request logging
110
+ schema.py # Pydantic request/response schemas
111
+ tests/
112
+ conftest.py # MLX stubs for CI (no GPU needed)
113
+ ```
ppmlx-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 pp-llm Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
ppmlx-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,419 @@
1
+ Metadata-Version: 2.4
2
+ Name: ppmlx
3
+ Version: 0.1.0
4
+ Summary: CLI for running LLMs on Apple Silicon via MLX
5
+ Project-URL: Homepage, https://ppmlx.dev
6
+ Project-URL: Repository, https://github.com/PingCompany/ppmlx
7
+ Project-URL: Issues, https://github.com/PingCompany/ppmlx/issues
8
+ Project-URL: Documentation, https://github.com/PingCompany/ppmlx#readme
9
+ Author-email: Rafał Wyderka <rafal@ppmlx.dev>
10
+ License: MIT License
11
+
12
+ Copyright (c) 2026 pp-llm Contributors
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a copy
15
+ of this software and associated documentation files (the "Software"), to deal
16
+ in the Software without restriction, including without limitation the rights
17
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
+ copies of the Software, and to permit persons to whom the Software is
19
+ furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in all
22
+ copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
+ SOFTWARE.
31
+ License-File: LICENSE
32
+ Keywords: apple-silicon,cli,inference,llm,local-ai,mlx,ollama
33
+ Classifier: Development Status :: 4 - Beta
34
+ Classifier: Environment :: Console
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Operating System :: MacOS
38
+ Classifier: Programming Language :: Python :: 3
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
42
+ Requires-Python: <3.13,>=3.11
43
+ Requires-Dist: fastapi>=0.115
44
+ Requires-Dist: httpx>=0.27
45
+ Requires-Dist: huggingface-hub>=0.24
46
+ Requires-Dist: mlx-lm>=0.22; sys_platform == 'darwin'
47
+ Requires-Dist: mlx-vlm>=0.1.18; sys_platform == 'darwin'
48
+ Requires-Dist: prompt-toolkit>=3.0
49
+ Requires-Dist: pydantic>=2.0
50
+ Requires-Dist: questionary>=2.0
51
+ Requires-Dist: rich>=13.0
52
+ Requires-Dist: setproctitle>=1.3
53
+ Requires-Dist: sse-starlette>=2.0
54
+ Requires-Dist: tomli-w>=1.0
55
+ Requires-Dist: typer>=0.12
56
+ Requires-Dist: uvicorn[standard]>=0.30
57
+ Provides-Extra: embeddings
58
+ Requires-Dist: mlx-embeddings>=0.0.5; (sys_platform == 'darwin') and extra == 'embeddings'
59
+ Description-Content-Type: text/markdown
60
+
61
+ # ppmlx
62
+
63
+ **CLI for running LLMs on Apple Silicon via MLX** — OpenAI-compatible API on port 6767.
64
+
65
+ ![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue)
66
+ ![Platform](https://img.shields.io/badge/platform-Apple%20Silicon-lightgrey)
67
+ ![License](https://img.shields.io/badge/license-MIT-green)
68
+
69
+ ---
70
+
71
+ ## Install
72
+
73
+ > **Requires:** macOS on Apple Silicon (M1/M2/M3/M4), Python 3.11+
74
+
75
+ ### uv (recommended)
76
+
77
+ ```bash
78
+ uv tool install ppmlx
79
+ ```
80
+
81
+ ### pipx
82
+
83
+ ```bash
84
+ pipx install ppmlx
85
+ ```
86
+
87
+ ### curl | sh (one-liner)
88
+
89
+ ```bash
90
+ curl -fsSL https://raw.githubusercontent.com/PingCompany/ppmlx/main/scripts/install.sh | sh
91
+ ```
92
+
93
+ ### From source
94
+
95
+ ```bash
96
+ git clone https://github.com/PingCompany/ppmlx
97
+ cd ppmlx
98
+ uv tool install .
99
+ ```
100
+
101
+ ### Homebrew
102
+
103
+ Homebrew tap coming soon. For now, use `uv tool install ppmlx`.
104
+
105
+ ---
106
+
107
+ ## Quick Start
108
+
109
+ ```bash
110
+ # 1. Download a model
111
+ ppmlx pull llama3
112
+
113
+ # 2. Interactive chat REPL
114
+ ppmlx run llama3
115
+
116
+ # 3. Start OpenAI-compatible API server on :6767
117
+ ppmlx serve
118
+ ```
119
+
120
+ ---
121
+
122
+ ## OpenAI SDK Example
123
+
124
+ ```python
125
+ from openai import OpenAI
126
+
127
+ client = OpenAI(base_url="http://localhost:6767/v1", api_key="local")
128
+
129
+ response = client.chat.completions.create(
130
+ model="llama3",
131
+ messages=[{"role": "user", "content": "Hello, how are you?"}],
132
+ stream=True,
133
+ )
134
+
135
+ for chunk in response:
136
+ if chunk.choices[0].delta.content:
137
+ print(chunk.choices[0].delta.content, end="", flush=True)
138
+ print()
139
+ ```
140
+
141
+ ---
142
+
143
+ ## curl Example
144
+
145
+ ```bash
146
+ # List available models
147
+ curl http://localhost:6767/v1/models
148
+
149
+ # Chat completion
150
+ curl http://localhost:6767/v1/chat/completions \
151
+ -H "Content-Type: application/json" \
152
+ -d '{
153
+ "model": "llama3",
154
+ "messages": [{"role": "user", "content": "What is Apple Silicon?"}],
155
+ "stream": false
156
+ }'
157
+
158
+ # Embeddings
159
+ curl http://localhost:6767/v1/embeddings \
160
+ -H "Content-Type: application/json" \
161
+ -d '{"model": "nomic-embed", "input": "Hello world"}'
162
+ ```
163
+
164
+ ---
165
+
166
+ ## Model Aliases
167
+
168
+ ### Llama Family
169
+
170
+ | Alias | HuggingFace Repo |
171
+ |--------------|---------------------------------------------------------------|
172
+ | `llama3` | mlx-community/Meta-Llama-3-8B-Instruct-4bit |
173
+ | `llama3-70b` | mlx-community/Meta-Llama-3-70B-Instruct-4bit |
174
+ | `llama3.2` | mlx-community/Llama-3.2-3B-Instruct-4bit |
175
+ | `llama3.1` | mlx-community/Meta-Llama-3.1-8B-Instruct-4bit |
176
+
177
+ ### Mistral / Mixtral Family
178
+
179
+ | Alias | HuggingFace Repo |
180
+ |-----------------|---------------------------------------------------------------|
181
+ | `mistral` | mlx-community/Mistral-7B-Instruct-v0.3-4bit |
182
+ | `mixtral` | mlx-community/Mixtral-8x7B-Instruct-v0.1-4bit |
183
+ | `mistral-nemo` | mlx-community/Mistral-Nemo-Instruct-2407-4bit |
184
+
185
+ ### Qwen Family
186
+
187
+ | Alias | HuggingFace Repo |
188
+ |--------------|---------------------------------------------------------------|
189
+ | `qwen2.5` | mlx-community/Qwen2.5-7B-Instruct-4bit |
190
+ | `qwen2.5-14b`| mlx-community/Qwen2.5-14B-Instruct-4bit |
191
+ | `qwen2.5-72b`| mlx-community/Qwen2.5-72B-Instruct-4bit |
192
+
193
+ ### Phi / Gemma Family
194
+
195
+ | Alias | HuggingFace Repo |
196
+ |--------------|---------------------------------------------------------------|
197
+ | `phi4` | mlx-community/phi-4-4bit |
198
+ | `phi3.5` | mlx-community/Phi-3.5-mini-instruct-4bit |
199
+ | `gemma2` | mlx-community/gemma-2-9b-it-4bit |
200
+ | `gemma2-27b` | mlx-community/gemma-2-27b-it-4bit |
201
+
202
+ ### Code Models
203
+
204
+ | Alias | HuggingFace Repo |
205
+ |----------------|---------------------------------------------------------------|
206
+ | `codellama` | mlx-community/CodeLlama-13b-Instruct-hf-4bit |
207
+ | `deepseek-coder`| mlx-community/deepseek-coder-6.7b-instruct-4bit |
208
+
209
+ ### Embedding Models
210
+
211
+ | Alias | HuggingFace Repo |
212
+ |----------------|---------------------------------------------------------------|
213
+ | `nomic-embed` | mlx-community/nomic-embed-text-v1.5 |
214
+ | `bge-small` | mlx-community/bge-small-en-v1.5 |
215
+
216
+ ---
217
+
218
+ ## RAM Requirements
219
+
220
+ | Model Size | Min RAM | Recommended RAM | Notes |
221
+ |-------------|---------|------------------|---------------------------------|
222
+ | 1-3B params | 4 GB | 8 GB | Llama 3.2 3B, Phi 3.5 mini |
223
+ | 7-8B params | 8 GB | 16 GB | Llama 3 8B, Mistral 7B |
224
+ | 13-14B | 16 GB | 24 GB | CodeLlama 13B, Qwen 2.5 14B |
225
+ | 27-34B | 24 GB | 36 GB | Gemma 2 27B |
226
+ | 70-72B | 48 GB | 64 GB | Llama 3 70B, Qwen 2.5 72B |
227
+
228
+ > All values are for 4-bit quantized models. Unquantized models require 2-4x more RAM.
229
+
230
+ ---
231
+
232
+ ## CLI Commands
233
+
234
+ | Command | Description |
235
+ |----------------------|------------------------------------------------------|
236
+ | `ppmlx pull <model>`| Download a model from HuggingFace Hub |
237
+ | `ppmlx run <model>` | Start interactive chat REPL |
238
+ | `ppmlx serve` | Start OpenAI-compatible API server on :6767 |
239
+ | `ppmlx list` | List locally downloaded models |
240
+ | `ppmlx rm <model>` | Remove a downloaded model |
241
+ | `ppmlx alias <n> <repo>` | Add a custom model alias |
242
+ | `ppmlx aliases` | Show all model aliases (built-in + custom) |
243
+ | `ppmlx ps` | Show currently loaded models and memory usage |
244
+ | `ppmlx quantize` | Convert and quantize a model to MLX format |
245
+ | `ppmlx create` | Create a custom model from a Modelfile |
246
+ | `ppmlx logs` | Query the request log database |
247
+ | `ppmlx info <model>`| Show detailed model information |
248
+ | `ppmlx estimate <m>`| Estimate RAM requirements before downloading |
249
+
250
+ ---
251
+
252
+ ## Modelfile Example
253
+
254
+ Create a `Modelfile` to define a custom model with a system prompt:
255
+
256
+ ```
257
+ FROM llama3
258
+
259
+ SYSTEM """
260
+ You are a helpful coding assistant. You write clean, well-documented code
261
+ and explain your reasoning step by step.
262
+ """
263
+
264
+ PARAMETER temperature 0.2
265
+ PARAMETER max_tokens 4096
266
+ PARAMETER top_p 0.9
267
+ ```
268
+
269
+ Then build it:
270
+
271
+ ```bash
272
+ ppmlx create coding-assistant -f Modelfile
273
+ ppmlx run coding-assistant
274
+ ```
275
+
276
+ ---
277
+
278
+ ## Configuration
279
+
280
+ ppmlx reads configuration from `~/.ppmlx/config.toml`. All values are optional.
281
+
282
+ ```toml
283
+ [server]
284
+ host = "127.0.0.1" # Bind address (default: 127.0.0.1)
285
+ port = 6767 # Port (default: 6767)
286
+ cors = true # Enable CORS (default: true)
287
+ cors_origins = ["*"] # Allowed CORS origins
288
+
289
+ [models]
290
+ dir = "~/.ppmlx/models" # Model storage directory
291
+ default_alias = "llama3" # Default model for bare requests
292
+
293
+ [generation]
294
+ temperature = 0.7 # Default sampling temperature
295
+ max_tokens = 2048 # Default max output tokens
296
+ top_p = 0.9 # Default top-p
297
+ repetition_penalty = 1.1
298
+
299
+ [logging]
300
+ db_path = "~/.ppmlx/ppmlx.db" # SQLite log database
301
+ log_requests = true # Log all requests
302
+ log_level = "info" # Server log level
303
+ ```
304
+
305
+ ---
306
+
307
+ ## Architecture
308
+
309
+ ```
310
+ ┌─────────────────────────────────────────────────────┐
311
+ │ ppmlx CLI │
312
+ │ (typer + rich) │
313
+ │ pull / run / serve / list / rm / quantize / ... │
314
+ └───────────────────┬─────────────────────────────────┘
315
+
316
+ ┌──────────▼──────────┐
317
+ │ FastAPI Server │
318
+ │ port :6767 │
319
+ │ │
320
+ │ /v1/chat/completions│
321
+ │ /v1/completions │
322
+ │ /v1/embeddings │
323
+ │ /v1/models │
324
+ │ /health /metrics │
325
+ └──────┬──────┬───────┘
326
+ │ │
327
+ ┌─────────▼──┐ ┌─▼──────────────┐
328
+ │ LLM Engine│ │ Embed Engine │
329
+ │ (mlx-lm) │ │(mlx-embeddings)│
330
+ └─────────┬──┘ └─┬──────────────┘
331
+ │ │
332
+ ┌─────────▼───────▼──────────────┐
333
+ │ MLX / Metal GPU │
334
+ │ Apple Silicon Unified Memory │
335
+ └────────────────────────────────┘
336
+
337
+ ┌─────────────▼──────────────────┐
338
+ │ SQLite Request Log │
339
+ │ ~/.ppmlx/ppmlx.db │
340
+ └────────────────────────────────┘
341
+ ```
342
+
343
+ ---
344
+
345
+ ## Uninstall
346
+
347
+ ### uv
348
+
349
+ ```bash
350
+ uv tool uninstall ppmlx
351
+ ```
352
+
353
+ ### pipx
354
+
355
+ ```bash
356
+ pipx uninstall ppmlx
357
+ ```
358
+
359
+ ### Manual cleanup (all methods)
360
+
361
+ ```bash
362
+ # Remove downloaded models and config
363
+ rm -rf ~/.ppmlx
364
+ ```
365
+
366
+ ---
367
+
368
+ ## Contributing
369
+
370
+ 1. Fork the repository on GitHub.
371
+ 2. Create a feature branch: `git checkout -b feat/my-feature`
372
+ 3. Install dev dependencies: `uv sync --python 3.11`
373
+ 4. Run tests: `uv run pytest tests/ -v`
374
+ 5. Submit a pull request.
375
+
376
+ ### Development Setup
377
+
378
+ ```bash
379
+ git clone https://github.com/PingCompany/ppmlx
380
+ cd ppmlx
381
+ uv sync --python 3.11
382
+ uv run ppmlx --version
383
+ uv run pytest tests/ -v
384
+ ```
385
+
386
+ ### Project Structure
387
+
388
+ ```
389
+ ppmlx/
390
+ __init__.py # version
391
+ cli.py # Typer CLI entry point
392
+ server.py # FastAPI application
393
+ engine.py # MLX LLM inference engine
394
+ engine_embed.py # MLX embedding engine
395
+ engine_vlm.py # MLX vision-language engine
396
+ models.py # model registry, aliases, download
397
+ config.py # configuration loading
398
+ db.py # SQLite request logging
399
+ memory.py # RAM estimation utilities
400
+ modelfile.py # Modelfile parser
401
+ quantize.py # MLX quantization helpers
402
+ tests/
403
+ conftest.py # MLX stubs for CI
404
+ test_cli.py # CLI tests
405
+ scripts/
406
+ install.sh # One-liner installer
407
+ homebrew/
408
+ Formula/ppmlx.rb # Homebrew formula
409
+ .github/workflows/
410
+ tests.yml # CI tests
411
+ release.yml # PyPI release
412
+ homebrew-update.yml
413
+ ```
414
+
415
+ ---
416
+
417
+ ## License
418
+
419
+ MIT — see [LICENSE](LICENSE).