ppmlx 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ppmlx-0.1.0/.github/workflows/homebrew-update.yml +16 -0
- ppmlx-0.1.0/.github/workflows/release.yml +37 -0
- ppmlx-0.1.0/.github/workflows/tests.yml +23 -0
- ppmlx-0.1.0/.gitignore +19 -0
- ppmlx-0.1.0/CLAUDE.md +113 -0
- ppmlx-0.1.0/LICENSE +21 -0
- ppmlx-0.1.0/PKG-INFO +419 -0
- ppmlx-0.1.0/README.md +359 -0
- ppmlx-0.1.0/homebrew/Formula/ppmlx.rb +85 -0
- ppmlx-0.1.0/ppmlx/__init__.py +5 -0
- ppmlx-0.1.0/ppmlx/cli.py +2037 -0
- ppmlx-0.1.0/ppmlx/config.py +139 -0
- ppmlx-0.1.0/ppmlx/db.py +320 -0
- ppmlx-0.1.0/ppmlx/engine.py +415 -0
- ppmlx-0.1.0/ppmlx/engine_embed.py +123 -0
- ppmlx-0.1.0/ppmlx/engine_vlm.py +145 -0
- ppmlx-0.1.0/ppmlx/memory.py +80 -0
- ppmlx-0.1.0/ppmlx/modelfile.py +234 -0
- ppmlx-0.1.0/ppmlx/models.py +442 -0
- ppmlx-0.1.0/ppmlx/quantize.py +161 -0
- ppmlx-0.1.0/ppmlx/registry.py +50 -0
- ppmlx-0.1.0/ppmlx/registry_data.json +1687 -0
- ppmlx-0.1.0/ppmlx/schema.py +180 -0
- ppmlx-0.1.0/ppmlx/server.py +1986 -0
- ppmlx-0.1.0/pyproject.toml +65 -0
- ppmlx-0.1.0/scripts/bench_common.sh +499 -0
- ppmlx-0.1.0/scripts/bench_compare.sh +141 -0
- ppmlx-0.1.0/scripts/bench_ollama_glm.sh +9 -0
- ppmlx-0.1.0/scripts/bench_ollama_gptoss.sh +9 -0
- ppmlx-0.1.0/scripts/bench_ollama_qwen.sh +9 -0
- ppmlx-0.1.0/scripts/bench_ppmlx_glm.sh +9 -0
- ppmlx-0.1.0/scripts/bench_ppmlx_gptoss.sh +9 -0
- ppmlx-0.1.0/scripts/bench_ppmlx_qwen.sh +9 -0
- ppmlx-0.1.0/scripts/install.sh +122 -0
- ppmlx-0.1.0/scripts/reinstall.sh +14 -0
- ppmlx-0.1.0/tests/__init__.py +0 -0
- ppmlx-0.1.0/tests/conftest.py +82 -0
- ppmlx-0.1.0/tests/test_cli.py +163 -0
- ppmlx-0.1.0/tests/test_config.py +269 -0
- ppmlx-0.1.0/tests/test_db.py +148 -0
- ppmlx-0.1.0/tests/test_engine.py +397 -0
- ppmlx-0.1.0/tests/test_engine_embed.py +93 -0
- ppmlx-0.1.0/tests/test_engine_vlm.py +122 -0
- ppmlx-0.1.0/tests/test_memory.py +96 -0
- ppmlx-0.1.0/tests/test_modelfile.py +255 -0
- ppmlx-0.1.0/tests/test_models.py +170 -0
- ppmlx-0.1.0/tests/test_quantize.py +197 -0
- ppmlx-0.1.0/tests/test_schema.py +232 -0
- ppmlx-0.1.0/tests/test_server.py +291 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
name: Update Homebrew Formula
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_run:
|
|
5
|
+
workflows: ["Release"]
|
|
6
|
+
types: [completed]
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
update-formula:
|
|
10
|
+
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- name: Trigger Homebrew tap update
|
|
14
|
+
run: |
|
|
15
|
+
echo "TODO: Update homebrew-ppmlx tap formula with new version"
|
|
16
|
+
echo "Steps: download sdist, run brew update-python-resources, update formula sha256"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
id-token: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
test-publish:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
environment: testpypi
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- name: Install uv
|
|
18
|
+
uses: astral-sh/setup-uv@v5
|
|
19
|
+
- name: Build
|
|
20
|
+
run: uv build
|
|
21
|
+
- name: Publish to TestPyPI
|
|
22
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
23
|
+
with:
|
|
24
|
+
repository-url: https://test.pypi.org/legacy/
|
|
25
|
+
|
|
26
|
+
publish:
|
|
27
|
+
needs: test-publish
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
environment: pypi
|
|
30
|
+
steps:
|
|
31
|
+
- uses: actions/checkout@v4
|
|
32
|
+
- name: Install uv
|
|
33
|
+
uses: astral-sh/setup-uv@v5
|
|
34
|
+
- name: Build
|
|
35
|
+
run: uv build
|
|
36
|
+
- name: Publish to PyPI
|
|
37
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- name: Install uv
|
|
15
|
+
uses: astral-sh/setup-uv@v5
|
|
16
|
+
with:
|
|
17
|
+
version: "latest"
|
|
18
|
+
- name: Set up Python 3.11
|
|
19
|
+
run: uv python install 3.11
|
|
20
|
+
- name: Install dependencies
|
|
21
|
+
run: uv sync --python 3.11
|
|
22
|
+
- name: Run tests
|
|
23
|
+
run: uv run pytest tests/ -v --tb=short
|
ppmlx-0.1.0/.gitignore
ADDED
ppmlx-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# ppmlx
|
|
2
|
+
|
|
3
|
+
CLI for running LLMs on Apple Silicon via MLX with an OpenAI-compatible API.
|
|
4
|
+
|
|
5
|
+
## Dev Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
uv sync --python 3.11
|
|
9
|
+
uv run pytest tests/ -v
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Starting the Server
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
# Basic start (default port 6767)
|
|
16
|
+
uv run ppmlx serve
|
|
17
|
+
|
|
18
|
+
# Pre-load a model on startup
|
|
19
|
+
uv run ppmlx serve --model llama3
|
|
20
|
+
|
|
21
|
+
# Bind to all interfaces (e.g. for LAN access)
|
|
22
|
+
uv run ppmlx serve --host 0.0.0.0 --port 6767
|
|
23
|
+
|
|
24
|
+
# Interactive model selection
|
|
25
|
+
uv run ppmlx serve --interactive
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Server listens on `http://127.0.0.1:6767` by default.
|
|
29
|
+
|
|
30
|
+
## Connecting (OpenAI-compatible)
|
|
31
|
+
|
|
32
|
+
**Python SDK:**
|
|
33
|
+
```python
|
|
34
|
+
from openai import OpenAI
|
|
35
|
+
|
|
36
|
+
client = OpenAI(base_url="http://localhost:6767/v1", api_key="local")
|
|
37
|
+
response = client.chat.completions.create(
|
|
38
|
+
model="llama3",
|
|
39
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
40
|
+
)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
**curl:**
|
|
44
|
+
```bash
|
|
45
|
+
# List models
|
|
46
|
+
curl http://localhost:6767/v1/models
|
|
47
|
+
|
|
48
|
+
# Chat
|
|
49
|
+
curl http://localhost:6767/v1/chat/completions \
|
|
50
|
+
-H "Content-Type: application/json" \
|
|
51
|
+
-d '{"model": "llama3", "messages": [{"role": "user", "content": "Hello"}]}'
|
|
52
|
+
|
|
53
|
+
# Embeddings
|
|
54
|
+
curl http://localhost:6767/v1/embeddings \
|
|
55
|
+
-H "Content-Type: application/json" \
|
|
56
|
+
-d '{"model": "nomic-embed", "input": "Hello world"}'
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
**Any OpenAI-compatible tool** (LangChain, LlamaIndex, Open WebUI, etc.):
|
|
60
|
+
- `base_url`: `http://localhost:6767/v1`
|
|
61
|
+
- `api_key`: any non-empty string (e.g. `"local"`)
|
|
62
|
+
|
|
63
|
+
## API Endpoints
|
|
64
|
+
|
|
65
|
+
| Endpoint | Description |
|
|
66
|
+
|---|---|
|
|
67
|
+
| `POST /v1/chat/completions` | Chat (streaming supported) |
|
|
68
|
+
| `POST /v1/responses` | Responses API (Codex, newer OpenAI tools) |
|
|
69
|
+
| `POST /v1/completions` | Text completion |
|
|
70
|
+
| `POST /v1/embeddings` | Embeddings |
|
|
71
|
+
| `GET /v1/models` | List loaded/available models |
|
|
72
|
+
| `GET /health` | Health check |
|
|
73
|
+
| `GET /metrics` | Usage metrics |
|
|
74
|
+
|
|
75
|
+
## Key CLI Commands
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
ppmlx pull <model> # Download a model
|
|
79
|
+
ppmlx run <model> # Interactive chat REPL
|
|
80
|
+
ppmlx serve # Start API server
|
|
81
|
+
ppmlx list # List local models
|
|
82
|
+
ppmlx ps # Show loaded models + memory usage
|
|
83
|
+
ppmlx rm <model> # Remove a model
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Config
|
|
87
|
+
|
|
88
|
+
`~/.ppmlx/config.toml` — all optional. Key fields:
|
|
89
|
+
```toml
|
|
90
|
+
[server]
|
|
91
|
+
host = "127.0.0.1"
|
|
92
|
+
port = 6767
|
|
93
|
+
|
|
94
|
+
[generation]
|
|
95
|
+
temperature = 0.7
|
|
96
|
+
max_tokens = 2048
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Project Structure
|
|
100
|
+
|
|
101
|
+
```
|
|
102
|
+
ppmlx/
|
|
103
|
+
cli.py # Typer CLI (entry point)
|
|
104
|
+
server.py # FastAPI app (OpenAI-compatible routes)
|
|
105
|
+
engine.py # MLX LLM inference
|
|
106
|
+
engine_embed.py # MLX embedding inference
|
|
107
|
+
models.py # Model registry + HuggingFace download
|
|
108
|
+
config.py # Config loading (~/.ppmlx/config.toml)
|
|
109
|
+
db.py # SQLite request logging
|
|
110
|
+
schema.py # Pydantic request/response schemas
|
|
111
|
+
tests/
|
|
112
|
+
conftest.py # MLX stubs for CI (no GPU needed)
|
|
113
|
+
```
|
ppmlx-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 pp-llm Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
ppmlx-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ppmlx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI for running LLMs on Apple Silicon via MLX
|
|
5
|
+
Project-URL: Homepage, https://ppmlx.dev
|
|
6
|
+
Project-URL: Repository, https://github.com/PingCompany/ppmlx
|
|
7
|
+
Project-URL: Issues, https://github.com/PingCompany/ppmlx/issues
|
|
8
|
+
Project-URL: Documentation, https://github.com/PingCompany/ppmlx#readme
|
|
9
|
+
Author-email: Rafał Wyderka <rafal@ppmlx.dev>
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 pp-llm Contributors
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Keywords: apple-silicon,cli,inference,llm,local-ai,mlx,ollama
|
|
33
|
+
Classifier: Development Status :: 4 - Beta
|
|
34
|
+
Classifier: Environment :: Console
|
|
35
|
+
Classifier: Intended Audience :: Developers
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Operating System :: MacOS
|
|
38
|
+
Classifier: Programming Language :: Python :: 3
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
41
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
42
|
+
Requires-Python: <3.13,>=3.11
|
|
43
|
+
Requires-Dist: fastapi>=0.115
|
|
44
|
+
Requires-Dist: httpx>=0.27
|
|
45
|
+
Requires-Dist: huggingface-hub>=0.24
|
|
46
|
+
Requires-Dist: mlx-lm>=0.22; sys_platform == 'darwin'
|
|
47
|
+
Requires-Dist: mlx-vlm>=0.1.18; sys_platform == 'darwin'
|
|
48
|
+
Requires-Dist: prompt-toolkit>=3.0
|
|
49
|
+
Requires-Dist: pydantic>=2.0
|
|
50
|
+
Requires-Dist: questionary>=2.0
|
|
51
|
+
Requires-Dist: rich>=13.0
|
|
52
|
+
Requires-Dist: setproctitle>=1.3
|
|
53
|
+
Requires-Dist: sse-starlette>=2.0
|
|
54
|
+
Requires-Dist: tomli-w>=1.0
|
|
55
|
+
Requires-Dist: typer>=0.12
|
|
56
|
+
Requires-Dist: uvicorn[standard]>=0.30
|
|
57
|
+
Provides-Extra: embeddings
|
|
58
|
+
Requires-Dist: mlx-embeddings>=0.0.5; (sys_platform == 'darwin') and extra == 'embeddings'
|
|
59
|
+
Description-Content-Type: text/markdown
|
|
60
|
+
|
|
61
|
+
# ppmlx
|
|
62
|
+
|
|
63
|
+
**CLI for running LLMs on Apple Silicon via MLX** — OpenAI-compatible API on port 6767.
|
|
64
|
+
|
|
65
|
+

|
|
66
|
+

|
|
67
|
+

|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Install
|
|
72
|
+
|
|
73
|
+
> **Requires:** macOS on Apple Silicon (M1/M2/M3/M4), Python 3.11+
|
|
74
|
+
|
|
75
|
+
### uv (recommended)
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
uv tool install ppmlx
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### pipx
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pipx install ppmlx
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### curl | sh (one-liner)
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
curl -fsSL https://raw.githubusercontent.com/PingCompany/ppmlx/main/scripts/install.sh | sh
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### From source
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
git clone https://github.com/PingCompany/ppmlx
|
|
97
|
+
cd ppmlx
|
|
98
|
+
uv tool install .
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Homebrew
|
|
102
|
+
|
|
103
|
+
Homebrew tap coming soon. For now, use `uv tool install ppmlx`.
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Quick Start
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# 1. Download a model
|
|
111
|
+
ppmlx pull llama3
|
|
112
|
+
|
|
113
|
+
# 2. Interactive chat REPL
|
|
114
|
+
ppmlx run llama3
|
|
115
|
+
|
|
116
|
+
# 3. Start OpenAI-compatible API server on :6767
|
|
117
|
+
ppmlx serve
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## OpenAI SDK Example
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from openai import OpenAI
|
|
126
|
+
|
|
127
|
+
client = OpenAI(base_url="http://localhost:6767/v1", api_key="local")
|
|
128
|
+
|
|
129
|
+
response = client.chat.completions.create(
|
|
130
|
+
model="llama3",
|
|
131
|
+
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
|
132
|
+
stream=True,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
for chunk in response:
|
|
136
|
+
if chunk.choices[0].delta.content:
|
|
137
|
+
print(chunk.choices[0].delta.content, end="", flush=True)
|
|
138
|
+
print()
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## curl Example
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
# List available models
|
|
147
|
+
curl http://localhost:6767/v1/models
|
|
148
|
+
|
|
149
|
+
# Chat completion
|
|
150
|
+
curl http://localhost:6767/v1/chat/completions \
|
|
151
|
+
-H "Content-Type: application/json" \
|
|
152
|
+
-d '{
|
|
153
|
+
"model": "llama3",
|
|
154
|
+
"messages": [{"role": "user", "content": "What is Apple Silicon?"}],
|
|
155
|
+
"stream": false
|
|
156
|
+
}'
|
|
157
|
+
|
|
158
|
+
# Embeddings
|
|
159
|
+
curl http://localhost:6767/v1/embeddings \
|
|
160
|
+
-H "Content-Type: application/json" \
|
|
161
|
+
-d '{"model": "nomic-embed", "input": "Hello world"}'
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Model Aliases
|
|
167
|
+
|
|
168
|
+
### Llama Family
|
|
169
|
+
|
|
170
|
+
| Alias | HuggingFace Repo |
|
|
171
|
+
|--------------|---------------------------------------------------------------|
|
|
172
|
+
| `llama3` | mlx-community/Meta-Llama-3-8B-Instruct-4bit |
|
|
173
|
+
| `llama3-70b` | mlx-community/Meta-Llama-3-70B-Instruct-4bit |
|
|
174
|
+
| `llama3.2` | mlx-community/Llama-3.2-3B-Instruct-4bit |
|
|
175
|
+
| `llama3.1` | mlx-community/Meta-Llama-3.1-8B-Instruct-4bit |
|
|
176
|
+
|
|
177
|
+
### Mistral / Mixtral Family
|
|
178
|
+
|
|
179
|
+
| Alias | HuggingFace Repo |
|
|
180
|
+
|-----------------|---------------------------------------------------------------|
|
|
181
|
+
| `mistral` | mlx-community/Mistral-7B-Instruct-v0.3-4bit |
|
|
182
|
+
| `mixtral` | mlx-community/Mixtral-8x7B-Instruct-v0.1-4bit |
|
|
183
|
+
| `mistral-nemo` | mlx-community/Mistral-Nemo-Instruct-2407-4bit |
|
|
184
|
+
|
|
185
|
+
### Qwen Family
|
|
186
|
+
|
|
187
|
+
| Alias | HuggingFace Repo |
|
|
188
|
+
|--------------|---------------------------------------------------------------|
|
|
189
|
+
| `qwen2.5` | mlx-community/Qwen2.5-7B-Instruct-4bit |
|
|
190
|
+
| `qwen2.5-14b`| mlx-community/Qwen2.5-14B-Instruct-4bit |
|
|
191
|
+
| `qwen2.5-72b`| mlx-community/Qwen2.5-72B-Instruct-4bit |
|
|
192
|
+
|
|
193
|
+
### Phi / Gemma Family
|
|
194
|
+
|
|
195
|
+
| Alias | HuggingFace Repo |
|
|
196
|
+
|--------------|---------------------------------------------------------------|
|
|
197
|
+
| `phi4` | mlx-community/phi-4-4bit |
|
|
198
|
+
| `phi3.5` | mlx-community/Phi-3.5-mini-instruct-4bit |
|
|
199
|
+
| `gemma2` | mlx-community/gemma-2-9b-it-4bit |
|
|
200
|
+
| `gemma2-27b` | mlx-community/gemma-2-27b-it-4bit |
|
|
201
|
+
|
|
202
|
+
### Code Models
|
|
203
|
+
|
|
204
|
+
| Alias | HuggingFace Repo |
|
|
205
|
+
|----------------|---------------------------------------------------------------|
|
|
206
|
+
| `codellama` | mlx-community/CodeLlama-13b-Instruct-hf-4bit |
|
|
207
|
+
| `deepseek-coder`| mlx-community/deepseek-coder-6.7b-instruct-4bit |
|
|
208
|
+
|
|
209
|
+
### Embedding Models
|
|
210
|
+
|
|
211
|
+
| Alias | HuggingFace Repo |
|
|
212
|
+
|----------------|---------------------------------------------------------------|
|
|
213
|
+
| `nomic-embed` | mlx-community/nomic-embed-text-v1.5 |
|
|
214
|
+
| `bge-small` | mlx-community/bge-small-en-v1.5 |
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## RAM Requirements
|
|
219
|
+
|
|
220
|
+
| Model Size | Min RAM | Recommended RAM | Notes |
|
|
221
|
+
|-------------|---------|------------------|---------------------------------|
|
|
222
|
+
| 1-3B params | 4 GB | 8 GB | Llama 3.2 3B, Phi 3.5 mini |
|
|
223
|
+
| 7-8B params | 8 GB | 16 GB | Llama 3 8B, Mistral 7B |
|
|
224
|
+
| 13-14B | 16 GB | 24 GB | CodeLlama 13B, Qwen 2.5 14B |
|
|
225
|
+
| 27-34B | 24 GB | 36 GB | Gemma 2 27B |
|
|
226
|
+
| 70-72B | 48 GB | 64 GB | Llama 3 70B, Qwen 2.5 72B |
|
|
227
|
+
|
|
228
|
+
> All values are for 4-bit quantized models. Unquantized models require 2-4x more RAM.
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## CLI Commands
|
|
233
|
+
|
|
234
|
+
| Command | Description |
|
|
235
|
+
|----------------------|------------------------------------------------------|
|
|
236
|
+
| `ppmlx pull <model>`| Download a model from HuggingFace Hub |
|
|
237
|
+
| `ppmlx run <model>` | Start interactive chat REPL |
|
|
238
|
+
| `ppmlx serve` | Start OpenAI-compatible API server on :6767 |
|
|
239
|
+
| `ppmlx list` | List locally downloaded models |
|
|
240
|
+
| `ppmlx rm <model>` | Remove a downloaded model |
|
|
241
|
+
| `ppmlx alias <n> <repo>` | Add a custom model alias |
|
|
242
|
+
| `ppmlx aliases` | Show all model aliases (built-in + custom) |
|
|
243
|
+
| `ppmlx ps` | Show currently loaded models and memory usage |
|
|
244
|
+
| `ppmlx quantize` | Convert and quantize a model to MLX format |
|
|
245
|
+
| `ppmlx create` | Create a custom model from a Modelfile |
|
|
246
|
+
| `ppmlx logs` | Query the request log database |
|
|
247
|
+
| `ppmlx info <model>`| Show detailed model information |
|
|
248
|
+
| `ppmlx estimate <m>`| Estimate RAM requirements before downloading |
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Modelfile Example
|
|
253
|
+
|
|
254
|
+
Create a `Modelfile` to define a custom model with a system prompt:
|
|
255
|
+
|
|
256
|
+
```
|
|
257
|
+
FROM llama3
|
|
258
|
+
|
|
259
|
+
SYSTEM """
|
|
260
|
+
You are a helpful coding assistant. You write clean, well-documented code
|
|
261
|
+
and explain your reasoning step by step.
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
PARAMETER temperature 0.2
|
|
265
|
+
PARAMETER max_tokens 4096
|
|
266
|
+
PARAMETER top_p 0.9
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Then build it:
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
ppmlx create coding-assistant -f Modelfile
|
|
273
|
+
ppmlx run coding-assistant
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## Configuration
|
|
279
|
+
|
|
280
|
+
ppmlx reads configuration from `~/.ppmlx/config.toml`. All values are optional.
|
|
281
|
+
|
|
282
|
+
```toml
|
|
283
|
+
[server]
|
|
284
|
+
host = "127.0.0.1" # Bind address (default: 127.0.0.1)
|
|
285
|
+
port = 6767 # Port (default: 6767)
|
|
286
|
+
cors = true # Enable CORS (default: true)
|
|
287
|
+
cors_origins = ["*"] # Allowed CORS origins
|
|
288
|
+
|
|
289
|
+
[models]
|
|
290
|
+
dir = "~/.ppmlx/models" # Model storage directory
|
|
291
|
+
default_alias = "llama3" # Default model for bare requests
|
|
292
|
+
|
|
293
|
+
[generation]
|
|
294
|
+
temperature = 0.7 # Default sampling temperature
|
|
295
|
+
max_tokens = 2048 # Default max output tokens
|
|
296
|
+
top_p = 0.9 # Default top-p
|
|
297
|
+
repetition_penalty = 1.1
|
|
298
|
+
|
|
299
|
+
[logging]
|
|
300
|
+
db_path = "~/.ppmlx/ppmlx.db" # SQLite log database
|
|
301
|
+
log_requests = true # Log all requests
|
|
302
|
+
log_level = "info" # Server log level
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
## Architecture
|
|
308
|
+
|
|
309
|
+
```
|
|
310
|
+
┌─────────────────────────────────────────────────────┐
|
|
311
|
+
│ ppmlx CLI │
|
|
312
|
+
│ (typer + rich) │
|
|
313
|
+
│ pull / run / serve / list / rm / quantize / ... │
|
|
314
|
+
└───────────────────┬─────────────────────────────────┘
|
|
315
|
+
│
|
|
316
|
+
┌──────────▼──────────┐
|
|
317
|
+
│ FastAPI Server │
|
|
318
|
+
│ port :6767 │
|
|
319
|
+
│ │
|
|
320
|
+
│ /v1/chat/completions│
|
|
321
|
+
│ /v1/completions │
|
|
322
|
+
│ /v1/embeddings │
|
|
323
|
+
│ /v1/models │
|
|
324
|
+
│ /health /metrics │
|
|
325
|
+
└──────┬──────┬───────┘
|
|
326
|
+
│ │
|
|
327
|
+
┌─────────▼──┐ ┌─▼──────────────┐
|
|
328
|
+
│ LLM Engine│ │ Embed Engine │
|
|
329
|
+
│ (mlx-lm) │ │(mlx-embeddings)│
|
|
330
|
+
└─────────┬──┘ └─┬──────────────┘
|
|
331
|
+
│ │
|
|
332
|
+
┌─────────▼───────▼──────────────┐
|
|
333
|
+
│ MLX / Metal GPU │
|
|
334
|
+
│ Apple Silicon Unified Memory │
|
|
335
|
+
└────────────────────────────────┘
|
|
336
|
+
│
|
|
337
|
+
┌─────────────▼──────────────────┐
|
|
338
|
+
│ SQLite Request Log │
|
|
339
|
+
│ ~/.ppmlx/ppmlx.db │
|
|
340
|
+
└────────────────────────────────┘
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
## Uninstall
|
|
346
|
+
|
|
347
|
+
### uv
|
|
348
|
+
|
|
349
|
+
```bash
|
|
350
|
+
uv tool uninstall ppmlx
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
### pipx
|
|
354
|
+
|
|
355
|
+
```bash
|
|
356
|
+
pipx uninstall ppmlx
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
### Manual cleanup (all methods)
|
|
360
|
+
|
|
361
|
+
```bash
|
|
362
|
+
# Remove downloaded models and config
|
|
363
|
+
rm -rf ~/.ppmlx
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
---
|
|
367
|
+
|
|
368
|
+
## Contributing
|
|
369
|
+
|
|
370
|
+
1. Fork the repository on GitHub.
|
|
371
|
+
2. Create a feature branch: `git checkout -b feat/my-feature`
|
|
372
|
+
3. Install dev dependencies: `uv sync --python 3.11`
|
|
373
|
+
4. Run tests: `uv run pytest tests/ -v`
|
|
374
|
+
5. Submit a pull request.
|
|
375
|
+
|
|
376
|
+
### Development Setup
|
|
377
|
+
|
|
378
|
+
```bash
|
|
379
|
+
git clone https://github.com/PingCompany/ppmlx
|
|
380
|
+
cd ppmlx
|
|
381
|
+
uv sync --python 3.11
|
|
382
|
+
uv run ppmlx --version
|
|
383
|
+
uv run pytest tests/ -v
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
### Project Structure
|
|
387
|
+
|
|
388
|
+
```
|
|
389
|
+
ppmlx/
|
|
390
|
+
__init__.py # version
|
|
391
|
+
cli.py # Typer CLI entry point
|
|
392
|
+
server.py # FastAPI application
|
|
393
|
+
engine.py # MLX LLM inference engine
|
|
394
|
+
engine_embed.py # MLX embedding engine
|
|
395
|
+
engine_vlm.py # MLX vision-language engine
|
|
396
|
+
models.py # model registry, aliases, download
|
|
397
|
+
config.py # configuration loading
|
|
398
|
+
db.py # SQLite request logging
|
|
399
|
+
memory.py # RAM estimation utilities
|
|
400
|
+
modelfile.py # Modelfile parser
|
|
401
|
+
quantize.py # MLX quantization helpers
|
|
402
|
+
tests/
|
|
403
|
+
conftest.py # MLX stubs for CI
|
|
404
|
+
test_cli.py # CLI tests
|
|
405
|
+
scripts/
|
|
406
|
+
install.sh # One-liner installer
|
|
407
|
+
homebrew/
|
|
408
|
+
Formula/ppmlx.rb # Homebrew formula
|
|
409
|
+
.github/workflows/
|
|
410
|
+
tests.yml # CI tests
|
|
411
|
+
release.yml # PyPI release
|
|
412
|
+
homebrew-update.yml
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
---
|
|
416
|
+
|
|
417
|
+
## License
|
|
418
|
+
|
|
419
|
+
MIT — see [LICENSE](LICENSE).
|