mlx-code 0.0.25__tar.gz → 0.0.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mlx_code-0.0.25 → mlx_code-0.0.26}/PKG-INFO +78 -38
- {mlx_code-0.0.25 → mlx_code-0.0.26}/README.md +75 -37
- mlx_code-0.0.25/mlx_code/ntui.py → mlx_code-0.0.26/mlx_code/bare.py +1 -0
- mlx_code-0.0.26/mlx_code/bats.py +299 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/main.py +65 -11
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/repl.py +7 -7
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/view_log.py +1 -1
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code.egg-info/PKG-INFO +78 -38
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code.egg-info/SOURCES.txt +2 -1
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code.egg-info/requires.txt +2 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/setup.py +4 -1
- {mlx_code-0.0.25 → mlx_code-0.0.26}/LICENSE +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/__init__.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/apis.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/gits.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/lsp_tool.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/mcb.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/mcb_tool.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/stream_log.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/tools.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/util.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code/view_git.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code.egg-info/dependency_links.txt +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code.egg-info/entry_points.txt +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/mlx_code.egg-info/top_level.txt +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/setup.cfg +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/tests/__init__.py +0 -0
- {mlx_code-0.0.25 → mlx_code-0.0.26}/tests/test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mlx-code
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.26
|
|
4
4
|
Summary: Coding Agent for Mac
|
|
5
5
|
Home-page: https://josefalbers.github.io/mlx-code/
|
|
6
6
|
Author: J Joe
|
|
@@ -17,6 +17,8 @@ Requires-Dist: httpx
|
|
|
17
17
|
Requires-Dist: pydantic
|
|
18
18
|
Requires-Dist: textual>=8.2.7
|
|
19
19
|
Requires-Dist: rich>=15.0.0
|
|
20
|
+
Requires-Dist: starlette
|
|
21
|
+
Requires-Dist: uvicorn
|
|
20
22
|
Provides-Extra: all
|
|
21
23
|
Requires-Dist: python-lsp-server[all]; extra == "all"
|
|
22
24
|
Requires-Dist: GitPython; extra == "all"
|
|
@@ -47,7 +49,7 @@ A Git-native coding agent that can run entirely on your Mac. No API keys, no clo
|
|
|
47
49
|
```
|
|
48
50
|
Conversation tree (nodes = git commits with embedded chat history)
|
|
49
51
|
|
|
50
|
-
main
|
|
52
|
+
main ──●──●──●──●──●──●──●──●──●──●──●──●──●──●
|
|
51
53
|
│ │
|
|
52
54
|
│ └── branch-1 ──●──●──●
|
|
53
55
|
│ │ ┌────────────┐
|
|
@@ -66,21 +68,21 @@ REPL tabs (each tab = a git branch + agent) │
|
|
|
66
68
|
│ └──────┘ └────┬─────┘ └──────────┘ └────────────┘ │
|
|
67
69
|
└─────────────────┼──────────────────────────────────────┘
|
|
68
70
|
│
|
|
69
|
-
|
|
71
|
+
├─────────────────────────────────────────► Each tab is an independent Agent
|
|
70
72
|
│
|
|
71
|
-
|
|
72
|
-
│ Agent
|
|
73
|
-
│
|
|
74
|
-
│ │ API:
|
|
75
|
-
│ │
|
|
76
|
-
│ │ Claude
|
|
77
|
-
│ │ Gemini
|
|
78
|
-
│ │ OpenAI
|
|
79
|
-
│
|
|
80
|
-
│
|
|
81
|
-
│ Git worktree
|
|
82
|
-
│ (isolation + session state)
|
|
83
|
-
|
|
73
|
+
┌────┴─────────────────────────────────────┐
|
|
74
|
+
│ Agent │
|
|
75
|
+
│ ┌────────────────┐ ┌────────────────┐ │
|
|
76
|
+
│ │ API: │ │ Tools: │ │
|
|
77
|
+
│ │ Local (mlx-lm) │ │ Read Write │ │
|
|
78
|
+
│ │ Claude │ │ Edit Bash │ │
|
|
79
|
+
│ │ Gemini │ │ Grep Find │ │
|
|
80
|
+
│ │ OpenAI │ │ Ls Skill │ │
|
|
81
|
+
│ └────────────────┘ │ Agent ─────────┼──┼───► Spawns child Agent
|
|
82
|
+
│ └────────────────┘ │ (each with own tools + worktree + etc)
|
|
83
|
+
│ Git worktree │
|
|
84
|
+
│ (isolation + session state) │
|
|
85
|
+
└──────────────────────────────────────────┘
|
|
84
86
|
```
|
|
85
87
|
|
|
86
88
|
Each layer is importable and composable on its own. A commit records state, a branch records an alternative path, and a tab is just a live view over an `Agent`.
|
|
@@ -104,9 +106,9 @@ uvx --from mlx-code mlc
|
|
|
104
106
|
# or install into the current environment
|
|
105
107
|
pip install mlx-code
|
|
106
108
|
|
|
107
|
-
|
|
109
|
+
# launch
|
|
110
|
+
mlc # with a local MLX model
|
|
108
111
|
mlc-run --api gemini # or use a remote provider
|
|
109
|
-
mlc-run --api deepseek --model deepseek-v4-flash
|
|
110
112
|
```
|
|
111
113
|
|
|
112
114
|
That's it. The first run starts a local inference server and drops you into the REPL.
|
|
@@ -128,12 +130,12 @@ That's it. The first run starts a local inference server and drops you into the
|
|
|
128
130
|
|
|
129
131
|
**Git is the database.** When the agent makes file changes, they’re committed to a git worktree with the full conversation embedded in the commit message. Resume any past session by hash, branch from any checkpoint, and inspect the agent timeline with `git log`. No proprietary state files, just Git.
|
|
130
132
|
|
|
131
|
-
**Your working directory is never at risk
|
|
132
|
-
|
|
133
|
-
**Built-in safety nets.** Subprocess environment variables go through an explicit allowlist, so secrets in your shell are never leaked to agent-spawned processes.
|
|
133
|
+
**Built-in safety nets.** Your working directory is never at risk. The agent operates inside a `git worktree`, not your checkout. It can make a mess, and you can inspect or discard it without ever touching `main`. Subprocess environment variables go through an explicit allowlist, so secrets in your shell are never leaked to agent-spawned processes.
|
|
134
134
|
|
|
135
135
|
**Batteries included.** Everything ships in one pip install: the MLX inference engine, the multi-protocol API server, the agent loop, the tools, and the TUI. No llama.cpp, no ollama, no vLLM bridge to find and configure. And the server natively speaks OpenAI, Anthropic, Gemini, and Codex wire formats simultaneously, so `claude`, `codex`, and `gemini` CLIs can all work against your local model without a translation layer.
|
|
136
136
|
|
|
137
|
+
**Continuous batching.** The local inference server runs a continuous batching engine that processes multiple sequences concurrently. When you spawn parallel agents (eg, multiple tabs, `asyncio.gather` pipelines, or delegated sub-tasks) they all share the same GPU context and are stepped together each tick. A prefix cache persists KV snapshots to disk, so repeated system prompts and conversation prefixes are prefilled once and reused across sessions. No request queueing, no waiting for the previous agent to finish.
|
|
138
|
+
|
|
137
139
|
---
|
|
138
140
|
|
|
139
141
|
## Agent primitive
|
|
@@ -171,12 +173,12 @@ agent.messages = messages
|
|
|
171
173
|
await agent.run("now add unit tests")
|
|
172
174
|
```
|
|
173
175
|
|
|
174
|
-
Branch from any point in the conversation
|
|
176
|
+
Branch from any point in the conversation. Each branch gets its own worktree:
|
|
175
177
|
|
|
176
178
|
```
|
|
177
179
|
/branch # branch from current state
|
|
178
180
|
/branch --rev 2 # branch from the 2nd user turn
|
|
179
|
-
/branch --rev 3
|
|
181
|
+
/branch --rev 3 make it use httpx instead
|
|
180
182
|
```
|
|
181
183
|
|
|
182
184
|
Since it's just git, you can inspect the timeline outside the REPL:
|
|
@@ -241,6 +243,43 @@ Reliability comes from specialization plus constraint. A read-only reviewer can'
|
|
|
241
243
|
|
|
242
244
|
---
|
|
243
245
|
|
|
246
|
+
## Continuous batching
|
|
247
|
+
|
|
248
|
+
The local server can run multiple inference sequences concurrently inside a single batch step. Instead of a global lock that serialises one request at a time, the batching engine maintains a live set of active sequences and yields tokens for all of them on every step.
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
mlc --engine batch # continuous batching + built-in REPL
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
This unlocks true parallelism for multi-agent workloads:
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
import asyncio
|
|
258
|
+
from mlx_code.repl import Agent
|
|
259
|
+
|
|
260
|
+
async def main():
|
|
261
|
+
agents = [Agent() for _ in range(4)]
|
|
262
|
+
await asyncio.gather(*[
|
|
263
|
+
a.run(f"Research topic: {t}")
|
|
264
|
+
for a, t in zip(agents, ["consensus", "cryptography", "networking", "storage"])
|
|
265
|
+
])
|
|
266
|
+
|
|
267
|
+
asyncio.run(main())
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
All four agents generate simultaneously inside the same batch. No sequential blocking.
|
|
271
|
+
|
|
272
|
+
### Health endpoint
|
|
273
|
+
|
|
274
|
+
```bash
|
|
275
|
+
curl http://127.0.0.1:8000/health
|
|
276
|
+
# {"status":"ok","model":"mlx-community/Qwen3.5-4B-OptiQ-4bit","active_sequences":2,"prefix_cache_files":5}
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
`active_sequences` shows how many agents are generating right now; `prefix_cache_files` shows how many prefix KV snapshots are stored on disk.
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
244
283
|
## Command Line
|
|
245
284
|
|
|
246
285
|
### `mlc`: local server + harness
|
|
@@ -248,20 +287,20 @@ Reliability comes from specialization plus constraint. A read-only reviewer can'
|
|
|
248
287
|
Starts the MLX inference server and launches the built-in TUI harness against it.
|
|
249
288
|
|
|
250
289
|
```bash
|
|
251
|
-
# Default: local server + default
|
|
290
|
+
# Default: local server + default harness
|
|
252
291
|
mlc
|
|
253
292
|
|
|
254
|
-
#
|
|
255
|
-
mlc --
|
|
293
|
+
# Continuous batching mode (default is sequential caching mode)
|
|
294
|
+
mlc --engine batch
|
|
295
|
+
|
|
296
|
+
# Server only, no harness
|
|
297
|
+
mlc --leash none
|
|
256
298
|
|
|
257
299
|
# Use a different harness (routes traffic through the local server)
|
|
258
300
|
mlc --leash claude
|
|
259
301
|
mlc --leash gemini
|
|
260
302
|
mlc --leash codex
|
|
261
303
|
|
|
262
|
-
# Server only, no harness
|
|
263
|
-
mlc --leash none
|
|
264
|
-
|
|
265
304
|
# Specify a model
|
|
266
305
|
mlc --model mlx-community/Qwen3.5-4B-OptiQ-4bit
|
|
267
306
|
|
|
@@ -312,7 +351,7 @@ mlc-run --api codex
|
|
|
312
351
|
echo "explain lsp.py" | mlc-run -a deepseek | cat - PLAN.md | mlc-run --url http://localhost:9000
|
|
313
352
|
|
|
314
353
|
# Simple terminal REPL (no TUI)
|
|
315
|
-
mlc-run --
|
|
354
|
+
mlc-run --bare
|
|
316
355
|
```
|
|
317
356
|
|
|
318
357
|
---
|
|
@@ -437,18 +476,19 @@ agent = Agent(extra_tool_classes=[LiveDBTool], tool_names=["QueryDB"])
|
|
|
437
476
|
|
|
438
477
|
| Command | Description |
|
|
439
478
|
|---|---|
|
|
440
|
-
| `/
|
|
479
|
+
| `/branch [--rev N] [prompt]` | Open a new branch tab from the current (or earlier) checkpoint |
|
|
480
|
+
| `/diff [--all]` | Show a side-by-side diff of changes in the worktree |
|
|
441
481
|
| `/clear [--config F]` | Clear conversation; `--config` reloads agent from a JSON/YAML file |
|
|
482
|
+
| `/tab [N]` | Jump to tab N |
|
|
442
483
|
| `/history [--raw]` | Show conversation transcript; `--raw` shows the raw API message log |
|
|
443
|
-
| `/diff [--all]` | Show a side-by-side diff of changes in the worktree |
|
|
444
|
-
| `/errors` | Show timestamped error log for the current tab |
|
|
445
484
|
| `/tools` | List active tools |
|
|
446
|
-
| `/branch [--rev N] [prompt]` | Open a new branch tab from the current (or earlier) checkpoint |
|
|
447
485
|
| `/abort` | Abort the running agent |
|
|
486
|
+
| `/errors` | Show timestamped error log for the current tab |
|
|
448
487
|
| `/export [path]` | Export session to JSON |
|
|
449
488
|
| `/exit [--all]` | Close branch tab, or exit the app |
|
|
450
|
-
|
|
|
451
|
-
|
|
|
489
|
+
| `/help` | Show command reference |
|
|
490
|
+
| `!command` | Run a shell command; output captured in the TUI (eg, `ls`, `cat hello.c`) |
|
|
491
|
+
| `$command` | Run an interactive command (eg, `vim`, `yazi`, `less hello.c`) |
|
|
452
492
|
|
|
453
493
|
### Key bindings
|
|
454
494
|
|
|
@@ -458,7 +498,7 @@ agent = Agent(extra_tool_classes=[LiveDBTool], tool_names=["QueryDB"])
|
|
|
458
498
|
| `Ctrl-J` | Insert newline |
|
|
459
499
|
| `Ctrl-1` … `Ctrl-9` | Jump to tab N |
|
|
460
500
|
| `Ctrl-,` / `Ctrl-.` | Cycle through tabs |
|
|
461
|
-
| `Ctrl-C` |
|
|
501
|
+
| `Ctrl-C` | Clear input, or abort running agent |
|
|
462
502
|
| `Ctrl-D` | Close branch tab, or exit app |
|
|
463
503
|
| `Ctrl-R` | Recall last prompt into editor |
|
|
464
504
|
|
|
@@ -476,7 +516,7 @@ agent = Agent(extra_tool_classes=[LiveDBTool], tool_names=["QueryDB"])
|
|
|
476
516
|
| `Skill` | Retrieve named skill instructions from config |
|
|
477
517
|
| `Agent` | Spawn an autonomous sub-agent for delegated work |
|
|
478
518
|
|
|
479
|
-
All file tools enforce path sandboxing
|
|
519
|
+
All file tools enforce path sandboxing. The agent cannot read or write outside the worktree.
|
|
480
520
|
|
|
481
521
|
### Backends
|
|
482
522
|
|
|
@@ -11,7 +11,7 @@ A Git-native coding agent that can run entirely on your Mac. No API keys, no clo
|
|
|
11
11
|
```
|
|
12
12
|
Conversation tree (nodes = git commits with embedded chat history)
|
|
13
13
|
|
|
14
|
-
main
|
|
14
|
+
main ──●──●──●──●──●──●──●──●──●──●──●──●──●──●
|
|
15
15
|
│ │
|
|
16
16
|
│ └── branch-1 ──●──●──●
|
|
17
17
|
│ │ ┌────────────┐
|
|
@@ -30,21 +30,21 @@ REPL tabs (each tab = a git branch + agent) │
|
|
|
30
30
|
│ └──────┘ └────┬─────┘ └──────────┘ └────────────┘ │
|
|
31
31
|
└─────────────────┼──────────────────────────────────────┘
|
|
32
32
|
│
|
|
33
|
-
|
|
33
|
+
├─────────────────────────────────────────► Each tab is an independent Agent
|
|
34
34
|
│
|
|
35
|
-
|
|
36
|
-
│ Agent
|
|
37
|
-
│
|
|
38
|
-
│ │ API:
|
|
39
|
-
│ │
|
|
40
|
-
│ │ Claude
|
|
41
|
-
│ │ Gemini
|
|
42
|
-
│ │ OpenAI
|
|
43
|
-
│
|
|
44
|
-
│
|
|
45
|
-
│ Git worktree
|
|
46
|
-
│ (isolation + session state)
|
|
47
|
-
|
|
35
|
+
┌────┴─────────────────────────────────────┐
|
|
36
|
+
│ Agent │
|
|
37
|
+
│ ┌────────────────┐ ┌────────────────┐ │
|
|
38
|
+
│ │ API: │ │ Tools: │ │
|
|
39
|
+
│ │ Local (mlx-lm) │ │ Read Write │ │
|
|
40
|
+
│ │ Claude │ │ Edit Bash │ │
|
|
41
|
+
│ │ Gemini │ │ Grep Find │ │
|
|
42
|
+
│ │ OpenAI │ │ Ls Skill │ │
|
|
43
|
+
│ └────────────────┘ │ Agent ─────────┼──┼───► Spawns child Agent
|
|
44
|
+
│ └────────────────┘ │ (each with own tools + worktree + etc)
|
|
45
|
+
│ Git worktree │
|
|
46
|
+
│ (isolation + session state) │
|
|
47
|
+
└──────────────────────────────────────────┘
|
|
48
48
|
```
|
|
49
49
|
|
|
50
50
|
Each layer is importable and composable on its own. A commit records state, a branch records an alternative path, and a tab is just a live view over an `Agent`.
|
|
@@ -68,9 +68,9 @@ uvx --from mlx-code mlc
|
|
|
68
68
|
# or install into the current environment
|
|
69
69
|
pip install mlx-code
|
|
70
70
|
|
|
71
|
-
|
|
71
|
+
# launch
|
|
72
|
+
mlc # with a local MLX model
|
|
72
73
|
mlc-run --api gemini # or use a remote provider
|
|
73
|
-
mlc-run --api deepseek --model deepseek-v4-flash
|
|
74
74
|
```
|
|
75
75
|
|
|
76
76
|
That's it. The first run starts a local inference server and drops you into the REPL.
|
|
@@ -92,12 +92,12 @@ That's it. The first run starts a local inference server and drops you into the
|
|
|
92
92
|
|
|
93
93
|
**Git is the database.** When the agent makes file changes, they’re committed to a git worktree with the full conversation embedded in the commit message. Resume any past session by hash, branch from any checkpoint, and inspect the agent timeline with `git log`. No proprietary state files, just Git.
|
|
94
94
|
|
|
95
|
-
**Your working directory is never at risk
|
|
96
|
-
|
|
97
|
-
**Built-in safety nets.** Subprocess environment variables go through an explicit allowlist, so secrets in your shell are never leaked to agent-spawned processes.
|
|
95
|
+
**Built-in safety nets.** Your working directory is never at risk. The agent operates inside a `git worktree`, not your checkout. It can make a mess, and you can inspect or discard it without ever touching `main`. Subprocess environment variables go through an explicit allowlist, so secrets in your shell are never leaked to agent-spawned processes.
|
|
98
96
|
|
|
99
97
|
**Batteries included.** Everything ships in one pip install: the MLX inference engine, the multi-protocol API server, the agent loop, the tools, and the TUI. No llama.cpp, no ollama, no vLLM bridge to find and configure. And the server natively speaks OpenAI, Anthropic, Gemini, and Codex wire formats simultaneously, so `claude`, `codex`, and `gemini` CLIs can all work against your local model without a translation layer.
|
|
100
98
|
|
|
99
|
+
**Continuous batching.** The local inference server runs a continuous batching engine that processes multiple sequences concurrently. When you spawn parallel agents (eg, multiple tabs, `asyncio.gather` pipelines, or delegated sub-tasks) they all share the same GPU context and are stepped together each tick. A prefix cache persists KV snapshots to disk, so repeated system prompts and conversation prefixes are prefilled once and reused across sessions. No request queueing, no waiting for the previous agent to finish.
|
|
100
|
+
|
|
101
101
|
---
|
|
102
102
|
|
|
103
103
|
## Agent primitive
|
|
@@ -135,12 +135,12 @@ agent.messages = messages
|
|
|
135
135
|
await agent.run("now add unit tests")
|
|
136
136
|
```
|
|
137
137
|
|
|
138
|
-
Branch from any point in the conversation
|
|
138
|
+
Branch from any point in the conversation. Each branch gets its own worktree:
|
|
139
139
|
|
|
140
140
|
```
|
|
141
141
|
/branch # branch from current state
|
|
142
142
|
/branch --rev 2 # branch from the 2nd user turn
|
|
143
|
-
/branch --rev 3
|
|
143
|
+
/branch --rev 3 make it use httpx instead
|
|
144
144
|
```
|
|
145
145
|
|
|
146
146
|
Since it's just git, you can inspect the timeline outside the REPL:
|
|
@@ -205,6 +205,43 @@ Reliability comes from specialization plus constraint. A read-only reviewer can'
|
|
|
205
205
|
|
|
206
206
|
---
|
|
207
207
|
|
|
208
|
+
## Continuous batching
|
|
209
|
+
|
|
210
|
+
The local server can run multiple inference sequences concurrently inside a single batch step. Instead of a global lock that serialises one request at a time, the batching engine maintains a live set of active sequences and yields tokens for all of them on every step.
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
mlc --engine batch # continuous batching + built-in REPL
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
This unlocks true parallelism for multi-agent workloads:
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
import asyncio
|
|
220
|
+
from mlx_code.repl import Agent
|
|
221
|
+
|
|
222
|
+
async def main():
|
|
223
|
+
agents = [Agent() for _ in range(4)]
|
|
224
|
+
await asyncio.gather(*[
|
|
225
|
+
a.run(f"Research topic: {t}")
|
|
226
|
+
for a, t in zip(agents, ["consensus", "cryptography", "networking", "storage"])
|
|
227
|
+
])
|
|
228
|
+
|
|
229
|
+
asyncio.run(main())
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
All four agents generate simultaneously inside the same batch. No sequential blocking.
|
|
233
|
+
|
|
234
|
+
### Health endpoint
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
curl http://127.0.0.1:8000/health
|
|
238
|
+
# {"status":"ok","model":"mlx-community/Qwen3.5-4B-OptiQ-4bit","active_sequences":2,"prefix_cache_files":5}
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
`active_sequences` shows how many agents are generating right now; `prefix_cache_files` shows how many prefix KV snapshots are stored on disk.
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
208
245
|
## Command Line
|
|
209
246
|
|
|
210
247
|
### `mlc`: local server + harness
|
|
@@ -212,20 +249,20 @@ Reliability comes from specialization plus constraint. A read-only reviewer can'
|
|
|
212
249
|
Starts the MLX inference server and launches the built-in TUI harness against it.
|
|
213
250
|
|
|
214
251
|
```bash
|
|
215
|
-
# Default: local server + default
|
|
252
|
+
# Default: local server + default harness
|
|
216
253
|
mlc
|
|
217
254
|
|
|
218
|
-
#
|
|
219
|
-
mlc --
|
|
255
|
+
# Continuous batching mode (default is sequential caching mode)
|
|
256
|
+
mlc --engine batch
|
|
257
|
+
|
|
258
|
+
# Server only, no harness
|
|
259
|
+
mlc --leash none
|
|
220
260
|
|
|
221
261
|
# Use a different harness (routes traffic through the local server)
|
|
222
262
|
mlc --leash claude
|
|
223
263
|
mlc --leash gemini
|
|
224
264
|
mlc --leash codex
|
|
225
265
|
|
|
226
|
-
# Server only, no harness
|
|
227
|
-
mlc --leash none
|
|
228
|
-
|
|
229
266
|
# Specify a model
|
|
230
267
|
mlc --model mlx-community/Qwen3.5-4B-OptiQ-4bit
|
|
231
268
|
|
|
@@ -276,7 +313,7 @@ mlc-run --api codex
|
|
|
276
313
|
echo "explain lsp.py" | mlc-run -a deepseek | cat - PLAN.md | mlc-run --url http://localhost:9000
|
|
277
314
|
|
|
278
315
|
# Simple terminal REPL (no TUI)
|
|
279
|
-
mlc-run --
|
|
316
|
+
mlc-run --bare
|
|
280
317
|
```
|
|
281
318
|
|
|
282
319
|
---
|
|
@@ -401,18 +438,19 @@ agent = Agent(extra_tool_classes=[LiveDBTool], tool_names=["QueryDB"])
|
|
|
401
438
|
|
|
402
439
|
| Command | Description |
|
|
403
440
|
|---|---|
|
|
404
|
-
| `/
|
|
441
|
+
| `/branch [--rev N] [prompt]` | Open a new branch tab from the current (or earlier) checkpoint |
|
|
442
|
+
| `/diff [--all]` | Show a side-by-side diff of changes in the worktree |
|
|
405
443
|
| `/clear [--config F]` | Clear conversation; `--config` reloads agent from a JSON/YAML file |
|
|
444
|
+
| `/tab [N]` | Jump to tab N |
|
|
406
445
|
| `/history [--raw]` | Show conversation transcript; `--raw` shows the raw API message log |
|
|
407
|
-
| `/diff [--all]` | Show a side-by-side diff of changes in the worktree |
|
|
408
|
-
| `/errors` | Show timestamped error log for the current tab |
|
|
409
446
|
| `/tools` | List active tools |
|
|
410
|
-
| `/branch [--rev N] [prompt]` | Open a new branch tab from the current (or earlier) checkpoint |
|
|
411
447
|
| `/abort` | Abort the running agent |
|
|
448
|
+
| `/errors` | Show timestamped error log for the current tab |
|
|
412
449
|
| `/export [path]` | Export session to JSON |
|
|
413
450
|
| `/exit [--all]` | Close branch tab, or exit the app |
|
|
414
|
-
|
|
|
415
|
-
|
|
|
451
|
+
| `/help` | Show command reference |
|
|
452
|
+
| `!command` | Run a shell command; output captured in the TUI (eg, `ls`, `cat hello.c`) |
|
|
453
|
+
| `$command` | Run an interactive command (eg, `vim`, `yazi`, `less hello.c`) |
|
|
416
454
|
|
|
417
455
|
### Key bindings
|
|
418
456
|
|
|
@@ -422,7 +460,7 @@ agent = Agent(extra_tool_classes=[LiveDBTool], tool_names=["QueryDB"])
|
|
|
422
460
|
| `Ctrl-J` | Insert newline |
|
|
423
461
|
| `Ctrl-1` … `Ctrl-9` | Jump to tab N |
|
|
424
462
|
| `Ctrl-,` / `Ctrl-.` | Cycle through tabs |
|
|
425
|
-
| `Ctrl-C` |
|
|
463
|
+
| `Ctrl-C` | Clear input, or abort running agent |
|
|
426
464
|
| `Ctrl-D` | Close branch tab, or exit app |
|
|
427
465
|
| `Ctrl-R` | Recall last prompt into editor |
|
|
428
466
|
|
|
@@ -440,7 +478,7 @@ agent = Agent(extra_tool_classes=[LiveDBTool], tool_names=["QueryDB"])
|
|
|
440
478
|
| `Skill` | Retrieve named skill instructions from config |
|
|
441
479
|
| `Agent` | Spawn an autonomous sub-agent for delegated work |
|
|
442
480
|
|
|
443
|
-
All file tools enforce path sandboxing
|
|
481
|
+
All file tools enforce path sandboxing. The agent cannot read or write outside the worktree.
|
|
444
482
|
|
|
445
483
|
### Backends
|
|
446
484
|
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import queue as _queue
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
import threading
|
|
7
|
+
import hashlib
|
|
8
|
+
from array import array
|
|
9
|
+
from contextlib import asynccontextmanager
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
import mlx.core as mx
|
|
12
|
+
from starlette.applications import Starlette
|
|
13
|
+
from starlette.requests import Request
|
|
14
|
+
from starlette.responses import StreamingResponse, JSONResponse
|
|
15
|
+
from starlette.routing import Route
|
|
16
|
+
import logging
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
MIN_PREFIX_TOKENS = 256
|
|
19
|
+
|
|
20
|
+
def _hash_tokens(tokens):
|
|
21
|
+
arr = array('I', tokens)
|
|
22
|
+
return hashlib.blake2b(arr.tobytes(), digest_size=8).hexdigest()
|
|
23
|
+
|
|
24
|
+
class PrefixCache:
|
|
25
|
+
|
|
26
|
+
def __init__(self, model_name, cache_dir):
|
|
27
|
+
self.model_name = model_name
|
|
28
|
+
self.cache_dir = Path(cache_dir)
|
|
29
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
def _path(self, prefix_tokens):
|
|
32
|
+
safe = ''.join((c for c in self.model_name if c.isalnum()))
|
|
33
|
+
h = _hash_tokens(prefix_tokens)
|
|
34
|
+
return self.cache_dir / f'{safe}_{len(prefix_tokens)}_{h}.safetensors'
|
|
35
|
+
|
|
36
|
+
def lookup(self, prefix_tokens):
|
|
37
|
+
if not prefix_tokens or len(prefix_tokens) < MIN_PREFIX_TOKENS:
|
|
38
|
+
return None
|
|
39
|
+
path = self._path(prefix_tokens)
|
|
40
|
+
if not path.exists():
|
|
41
|
+
return None
|
|
42
|
+
try:
|
|
43
|
+
from mlx_lm.models.cache import load_prompt_cache
|
|
44
|
+
cache, _ = load_prompt_cache(str(path), return_metadata=True)
|
|
45
|
+
mx.async_eval(cache)
|
|
46
|
+
return cache
|
|
47
|
+
except Exception as exc:
|
|
48
|
+
logger.info(f'[batch] failed to load prefix cache {path.name}: {exc}')
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
def store(self, prefix_tokens, kv_cache):
|
|
52
|
+
if not prefix_tokens or len(prefix_tokens) < MIN_PREFIX_TOKENS:
|
|
53
|
+
return
|
|
54
|
+
path = self._path(prefix_tokens)
|
|
55
|
+
if path.exists():
|
|
56
|
+
return
|
|
57
|
+
try:
|
|
58
|
+
from mlx_lm.models.cache import save_prompt_cache
|
|
59
|
+
save_prompt_cache(str(path), kv_cache)
|
|
60
|
+
logger.info(f'[batch] saved prefix cache len={len(prefix_tokens)} file={path.name}')
|
|
61
|
+
except Exception as exc:
|
|
62
|
+
logger.info(f'[batch] failed to save prefix cache: {exc}')
|
|
63
|
+
|
|
64
|
+
def _prefill_prefix(model, tokens, prefill_step_size=2048):
|
|
65
|
+
from mlx_lm.models.cache import make_prompt_cache
|
|
66
|
+
prompt_cache = make_prompt_cache(model)
|
|
67
|
+
prompt = mx.array(tokens)
|
|
68
|
+
while prompt.shape[0] > 0:
|
|
69
|
+
n = min(prefill_step_size, prompt.shape[0])
|
|
70
|
+
model(prompt[:n][None], cache=prompt_cache)
|
|
71
|
+
mx.eval([c.state for c in prompt_cache])
|
|
72
|
+
prompt = prompt[n:]
|
|
73
|
+
mx.clear_cache()
|
|
74
|
+
return prompt_cache
|
|
75
|
+
|
|
76
|
+
def _get_prefix(tokens, ckpts):
|
|
77
|
+
if not ckpts:
|
|
78
|
+
return (None, 0)
|
|
79
|
+
first_ckpt = min(ckpts)
|
|
80
|
+
if first_ckpt < MIN_PREFIX_TOKENS:
|
|
81
|
+
return (None, 0)
|
|
82
|
+
return (tokens[:first_ckpt], first_ckpt)
|
|
83
|
+
|
|
84
|
+
def make_batch_app(model_name: str, cache_dir: str='.cache'):
|
|
85
|
+
state = {'model': None, 'tokenizer': None, 'batch_gen': None, 'request_queue': _queue.Queue(), 'active': {}, 'loop': None, 'prefix_cache': None}
|
|
86
|
+
|
|
87
|
+
def _engine():
|
|
88
|
+
rq = state['request_queue']
|
|
89
|
+
active = state['active']
|
|
90
|
+
bg = state['batch_gen']
|
|
91
|
+
tok = state['tokenizer']
|
|
92
|
+
loop = state['loop']
|
|
93
|
+
model = state['model']
|
|
94
|
+
pcache = state['prefix_cache']
|
|
95
|
+
while True:
|
|
96
|
+
while not rq.empty():
|
|
97
|
+
try:
|
|
98
|
+
tokens, max_tokens, token_queue, ckpts = rq.get_nowait()
|
|
99
|
+
_insert(bg, active, pcache, model, tok, loop, tokens, max_tokens, token_queue, ckpts)
|
|
100
|
+
except _queue.Empty:
|
|
101
|
+
break
|
|
102
|
+
if not active:
|
|
103
|
+
tokens, max_tokens, token_queue, ckpts = rq.get()
|
|
104
|
+
_insert(bg, active, pcache, model, tok, loop, tokens, max_tokens, token_queue, ckpts)
|
|
105
|
+
try:
|
|
106
|
+
results = bg.next_generated()
|
|
107
|
+
except Exception:
|
|
108
|
+
for uid, meta in list(active.items()):
|
|
109
|
+
loop.call_soon_threadsafe(meta['q'].put_nowait, None)
|
|
110
|
+
active.clear()
|
|
111
|
+
continue
|
|
112
|
+
for r in results:
|
|
113
|
+
meta = active.get(r.uid)
|
|
114
|
+
if meta is None:
|
|
115
|
+
continue
|
|
116
|
+
detok = meta['detok']
|
|
117
|
+
detok.add_token(r.token)
|
|
118
|
+
seg = detok.last_segment
|
|
119
|
+
if r.finish_reason is not None:
|
|
120
|
+
detok.finalize()
|
|
121
|
+
if (final := detok.last_segment):
|
|
122
|
+
loop.call_soon_threadsafe(meta['q'].put_nowait, final)
|
|
123
|
+
loop.call_soon_threadsafe(meta['q'].put_nowait, None)
|
|
124
|
+
del active[r.uid]
|
|
125
|
+
elif seg:
|
|
126
|
+
loop.call_soon_threadsafe(meta['q'].put_nowait, seg)
|
|
127
|
+
|
|
128
|
+
def _insert(bg, active, pcache, model, tok, loop, tokens, max_tokens, token_queue, ckpts):
|
|
129
|
+
prefix_tokens, prefix_len = _get_prefix(tokens, ckpts)
|
|
130
|
+
if prefix_tokens is not None:
|
|
131
|
+
cached_kv = pcache.lookup(prefix_tokens)
|
|
132
|
+
if cached_kv is not None:
|
|
133
|
+
suffix = tokens[prefix_len:]
|
|
134
|
+
try:
|
|
135
|
+
uids = bg.insert([suffix], [max_tokens], caches=[cached_kv])
|
|
136
|
+
except Exception as exc:
|
|
137
|
+
logger.info(f'[batch] cache insert failed ({exc}), falling back to full prompt')
|
|
138
|
+
uids = bg.insert([tokens], [max_tokens])
|
|
139
|
+
prefix_len = 0
|
|
140
|
+
else:
|
|
141
|
+
logger.info(f'[batch] cache HIT prefix={prefix_len} suffix={len(suffix)}')
|
|
142
|
+
del cached_kv
|
|
143
|
+
mx.clear_cache()
|
|
144
|
+
else:
|
|
145
|
+
logger.info(f'[batch] prefilling prefix prefix={prefix_len} suffix={len(tokens) - prefix_len}')
|
|
146
|
+
prefix_kv = _prefill_prefix(model, prefix_tokens)
|
|
147
|
+
pcache.store(prefix_tokens, prefix_kv)
|
|
148
|
+
suffix = tokens[prefix_len:]
|
|
149
|
+
try:
|
|
150
|
+
uids = bg.insert([suffix], [max_tokens], caches=[prefix_kv])
|
|
151
|
+
except Exception as exc:
|
|
152
|
+
logger.info(f'[batch] cache insert failed ({exc}), falling back to full prompt')
|
|
153
|
+
uids = bg.insert([tokens], [max_tokens])
|
|
154
|
+
prefix_len = 0
|
|
155
|
+
del prefix_kv
|
|
156
|
+
mx.clear_cache()
|
|
157
|
+
active[uids[0]] = {'q': token_queue, 'detok': tok.detokenizer}
|
|
158
|
+
else:
|
|
159
|
+
uids = bg.insert([tokens], [max_tokens])
|
|
160
|
+
logger.info(f'[batch] no cache prompt={len(tokens)}')
|
|
161
|
+
active[uids[0]] = {'q': token_queue, 'detok': tok.detokenizer}
|
|
162
|
+
|
|
163
|
+
@asynccontextmanager
|
|
164
|
+
async def lifespan(_app):
|
|
165
|
+
from mlx_lm import load
|
|
166
|
+
from mlx_lm.generate import BatchGenerator
|
|
167
|
+
from mlx_lm.tokenizer_utils import TokenizerWrapper
|
|
168
|
+
logger.info(f'[batch] Loading model {model_name!r} …')
|
|
169
|
+
model, tokenizer = load(model_name)
|
|
170
|
+
if not isinstance(tokenizer, TokenizerWrapper):
|
|
171
|
+
tokenizer = TokenizerWrapper(tokenizer)
|
|
172
|
+
eos = set(tokenizer.eos_token_ids) | {tokenizer.eos_token_id}
|
|
173
|
+
stop_tokens = [[t] for t in eos]
|
|
174
|
+
batch_gen = BatchGenerator(model, stop_tokens=stop_tokens)
|
|
175
|
+
state.update(model=model, tokenizer=tokenizer, batch_gen=batch_gen, loop=asyncio.get_running_loop(), prefix_cache=PrefixCache(model_name, cache_dir))
|
|
176
|
+
logger.info('[batch] Model ready. Starting engine thread.')
|
|
177
|
+
threading.Thread(target=_engine, daemon=True).start()
|
|
178
|
+
yield
|
|
179
|
+
batch_gen.close()
|
|
180
|
+
|
|
181
|
+
@staticmethod
|
|
182
|
+
def _detect_api(path: str) -> str:
|
|
183
|
+
if path.startswith('/v1beta/models/'):
|
|
184
|
+
return 'gemini'
|
|
185
|
+
if path.startswith('/v1/messages'):
|
|
186
|
+
return 'claude'
|
|
187
|
+
if path.startswith('/v1/responses'):
|
|
188
|
+
return 'codex'
|
|
189
|
+
return 'noapi'
|
|
190
|
+
|
|
191
|
+
async def _stream_sse(token_queue, api, msg_id, in_tokens):
|
|
192
|
+
from . import main as _m
|
|
193
|
+
adapters = {'claude': _m.ClaudeAdapter, 'codex': _m.CodexAdapter, 'gemini': _m.GeminiAdapter, 'noapi': _m.DefaultAdapter}
|
|
194
|
+
adapter = adapters.get(api, _m.DefaultAdapter)(msg_id, in_tokens)
|
|
195
|
+
yield adapter.start()
|
|
196
|
+
st = 'thinking'
|
|
197
|
+
buf = ''
|
|
198
|
+
think_tags = ['<think>', '</think>']
|
|
199
|
+
while True:
|
|
200
|
+
text = await token_queue.get()
|
|
201
|
+
if text is None:
|
|
202
|
+
break
|
|
203
|
+
buf += text
|
|
204
|
+
seg = text
|
|
205
|
+
while any((t in seg for t in think_tags)):
|
|
206
|
+
if st == 'text' and think_tags[0] in seg:
|
|
207
|
+
before, _, seg = seg.partition(think_tags[0])
|
|
208
|
+
if before:
|
|
209
|
+
yield adapter.text('text', before)
|
|
210
|
+
st = 'thinking'
|
|
211
|
+
if st == 'thinking' and think_tags[1] in seg:
|
|
212
|
+
before, _, seg = seg.partition(think_tags[1])
|
|
213
|
+
if before:
|
|
214
|
+
yield adapter.text('thinking', before)
|
|
215
|
+
st = 'text'
|
|
216
|
+
if seg:
|
|
217
|
+
yield adapter.text(st, seg)
|
|
218
|
+
if (tools := _m._parse_tools_xml(buf)):
|
|
219
|
+
for tool in tools:
|
|
220
|
+
yield adapter.tool(tool)
|
|
221
|
+
yield adapter.end(True)
|
|
222
|
+
else:
|
|
223
|
+
yield adapter.end(False)
|
|
224
|
+
|
|
225
|
+
async def generate_endpoint(request: Request):
|
|
226
|
+
from . import main as _m
|
|
227
|
+
if state['batch_gen'] is None:
|
|
228
|
+
return JSONResponse({'error': 'model not loaded'}, status_code=503)
|
|
229
|
+
path = request.url.path.split('?')[0].rstrip('/')
|
|
230
|
+
api = _detect_api(path)
|
|
231
|
+
if api == 'gemini':
|
|
232
|
+
q = str(request.url.query) or ''
|
|
233
|
+
if 'alt=sse' not in q and 'streamGenerateContent' not in path:
|
|
234
|
+
return JSONResponse({'candidates': [{'content': {'role': 'model', 'parts': [{'text': '{"complexity_reasoning":"local","complexity_score":50}'}]}, 'finishReason': 'STOP'}], 'usageMetadata': {'promptTokenCount': 0, 'candidatesTokenCount': 0}})
|
|
235
|
+
body = await request.json()
|
|
236
|
+
max_tokens = int(body.get('max_tokens', body.get('max_completion_tokens', 8192)))
|
|
237
|
+
try:
|
|
238
|
+
prompt, ckpts = _m.encode(body, api, state['tokenizer'], None, None, None)
|
|
239
|
+
except Exception as exc:
|
|
240
|
+
return JSONResponse({'error': f'encode: {exc}'}, status_code=500)
|
|
241
|
+
if ckpts is None or not prompt:
|
|
242
|
+
return JSONResponse({'error': 'empty prompt'}, status_code=400)
|
|
243
|
+
msg_id = f'msg_{uuid.uuid4().hex}'
|
|
244
|
+
token_queue = asyncio.Queue()
|
|
245
|
+
state['request_queue'].put((prompt, max_tokens, token_queue, ckpts))
|
|
246
|
+
|
|
247
|
+
async def _sse():
|
|
248
|
+
async for chunk in _stream_sse(token_queue, api, msg_id, len(prompt)):
|
|
249
|
+
yield chunk
|
|
250
|
+
return StreamingResponse(_sse(), media_type='text/event-stream')
|
|
251
|
+
|
|
252
|
+
async def simple_generate(request: Request):
|
|
253
|
+
if state['batch_gen'] is None:
|
|
254
|
+
return JSONResponse({'error': 'model not loaded'}, status_code=503)
|
|
255
|
+
body = await request.json()
|
|
256
|
+
tok = state['tokenizer']
|
|
257
|
+
max_tokens = body.get('max_tokens', 256)
|
|
258
|
+
if 'messages' in body:
|
|
259
|
+
text = tok.apply_chat_template(body['messages'], tokenize=False, add_generation_prompt=True)
|
|
260
|
+
else:
|
|
261
|
+
text = body.get('prompt', '')
|
|
262
|
+
tokens = tok.encode(text)
|
|
263
|
+
if not tokens:
|
|
264
|
+
return JSONResponse({'error': 'empty prompt'}, status_code=400)
|
|
265
|
+
token_queue = asyncio.Queue()
|
|
266
|
+
state['request_queue'].put((tokens, max_tokens, token_queue, []))
|
|
267
|
+
if body.get('stream', True):
|
|
268
|
+
|
|
269
|
+
async def _raw():
|
|
270
|
+
while True:
|
|
271
|
+
chunk = await token_queue.get()
|
|
272
|
+
if chunk is None:
|
|
273
|
+
break
|
|
274
|
+
yield chunk
|
|
275
|
+
return StreamingResponse(_raw(), media_type='text/plain')
|
|
276
|
+
parts = []
|
|
277
|
+
while True:
|
|
278
|
+
chunk = await token_queue.get()
|
|
279
|
+
if chunk is None:
|
|
280
|
+
break
|
|
281
|
+
parts.append(chunk)
|
|
282
|
+
return JSONResponse({'text': ''.join(parts)})
|
|
283
|
+
|
|
284
|
+
async def list_models(_req):
|
|
285
|
+
return JSONResponse({'data': [{'id': 'local', 'object': 'model', 'created': int(time.time()), 'owned_by': 'local'}]})
|
|
286
|
+
|
|
287
|
+
async def count_tokens(_req):
|
|
288
|
+
return JSONResponse({'input_tokens': 0})
|
|
289
|
+
|
|
290
|
+
async def health(_req):
|
|
291
|
+
pc = state['prefix_cache']
|
|
292
|
+
n_cached = 0
|
|
293
|
+
if pc and pc.cache_dir.exists():
|
|
294
|
+
n_cached = sum((1 for _ in pc.cache_dir.glob('*.safetensors')))
|
|
295
|
+
return JSONResponse({'status': 'ok', 'model': model_name, 'active_sequences': len(state['active']), 'prefix_cache_files': n_cached})
|
|
296
|
+
return Starlette(routes=[Route('/v1/models', list_models, methods=['GET']), Route('/v1/messages/count_tokens', count_tokens, methods=['POST']), Route('/v1/chat/completions', generate_endpoint, methods=['POST']), Route('/v1/messages', generate_endpoint, methods=['POST']), Route('/v1/responses', generate_endpoint, methods=['POST']), Route('/v1beta/models/{rest:path}', generate_endpoint, methods=['POST']), Route('/generate', simple_generate, methods=['POST']), Route('/health', health, methods=['GET'])], lifespan=lifespan)
|
|
297
|
+
if __name__ == '__main__':
|
|
298
|
+
import uvicorn
|
|
299
|
+
uvicorn.run(make_batch_app('mlx-community/Qwen3.5-4B-OptiQ-4bit'), host='0.0.0.0', port=8000)
|
|
@@ -871,13 +871,13 @@ def make_handler(model_name, cache_dir, system, names, skips, gwt=None, parse_th
|
|
|
871
871
|
raise
|
|
872
872
|
return Handler
|
|
873
873
|
|
|
874
|
-
def
|
|
874
|
+
def _serve_cache(host, port, model, cache, system, tools, skips, *, fixed_port=False, gwt=None):
|
|
875
875
|
handler = make_handler(model, cache, system, tools, skips, gwt)
|
|
876
876
|
while True:
|
|
877
877
|
try:
|
|
878
878
|
server = HTTPServer((host, port), handler)
|
|
879
879
|
url = f'http://{host}:{port}'
|
|
880
|
-
logger.debug(f'
|
|
880
|
+
logger.debug(f'Cache server bound to {url}')
|
|
881
881
|
return (server, url)
|
|
882
882
|
except OSError as e:
|
|
883
883
|
if e.errno in (48, 98):
|
|
@@ -888,12 +888,52 @@ def serve(host: str, port: int, model: str, cache: str, system: str | None, tool
|
|
|
888
888
|
else:
|
|
889
889
|
raise
|
|
890
890
|
|
|
891
|
+
def _serve_batch(host, port, model, cache_dir='.cache', *, fixed_port=False):
|
|
892
|
+
import uvicorn
|
|
893
|
+
from .bats import make_batch_app
|
|
894
|
+
import socket
|
|
895
|
+
import time
|
|
896
|
+
app = make_batch_app(model, cache_dir=cache_dir)
|
|
897
|
+
while True:
|
|
898
|
+
try:
|
|
899
|
+
with socket.socket() as s:
|
|
900
|
+
s.bind((host, port))
|
|
901
|
+
except OSError as e:
|
|
902
|
+
if e.errno in (48, 98):
|
|
903
|
+
if fixed_port:
|
|
904
|
+
logger.error(f'Port {port} is already in use.')
|
|
905
|
+
sys.exit(1)
|
|
906
|
+
port += 1
|
|
907
|
+
else:
|
|
908
|
+
raise
|
|
909
|
+
else:
|
|
910
|
+
break
|
|
911
|
+
config = uvicorn.Config(app, host=host, port=port, loop='asyncio', log_level='warning')
|
|
912
|
+
uv_server = uvicorn.Server(config)
|
|
913
|
+
t = threading.Thread(target=uv_server.run, daemon=True)
|
|
914
|
+
t.start()
|
|
915
|
+
start_time = time.time()
|
|
916
|
+
notified = False
|
|
917
|
+
while True:
|
|
918
|
+
try:
|
|
919
|
+
with socket.create_connection((host, port), timeout=0.1):
|
|
920
|
+
break
|
|
921
|
+
except OSError:
|
|
922
|
+
if not notified and time.time() - start_time > 3.0:
|
|
923
|
+
logger.info('Waiting for batch server to start (model may be downloading)...')
|
|
924
|
+
notified = True
|
|
925
|
+
time.sleep(0.2)
|
|
926
|
+
url = f'http://{host}:{port}'
|
|
927
|
+
logger.debug(f'Batch server bound to {url}')
|
|
928
|
+
return (uv_server, url)
|
|
929
|
+
|
|
891
930
|
def main():
|
|
892
931
|
parser = argparse.ArgumentParser(description='mlx-code MAIN')
|
|
893
932
|
parser.add_argument('-p', '--prompt', default=None, help='Initial prompt sent automatically when the REPL starts')
|
|
894
933
|
parser.add_argument('-r', '--resume', default=None, metavar='COMMIT', help='Resume a previous session from the given git commit hash')
|
|
895
934
|
parser.add_argument('-m', '--model', default='mlx-community/Qwen3.5-4B-OptiQ-4bit', help='MLX model path or HuggingFace repo ID (default: Qwen3.5-4B-OptiQ-4bit)')
|
|
896
935
|
parser.add_argument('-l', '--leash', choices=['claude', 'codex', 'gemini', 'noapi', 'none'], default='noapi', help="AI harness to launch against the server; 'noapi' starts the built-in REPL, 'none' runs the server only")
|
|
936
|
+
parser.add_argument('--engine', choices=['cache', 'batch'], default='cache', help="'cache' uses PromptCache + single-sequence (default); 'batch' uses BatchGenerator for concurrent sequences (only compatible with --leash none or noapi)")
|
|
897
937
|
parser.add_argument('--skill', default=None, help='Directory to scan recursively for SKILL.md files')
|
|
898
938
|
parser.add_argument('--tools', nargs='+', default=None, help='Whitelist of tool names to enable; allows all tools when omitted')
|
|
899
939
|
parser.add_argument('--system', type=str, default=None, help='System prompt override passed to the model')
|
|
@@ -903,10 +943,14 @@ def main():
|
|
|
903
943
|
parser.add_argument('--port', type=int, default=None, help='Port to listen on; auto-increments if already in use (default: 8000)')
|
|
904
944
|
parser.add_argument('--skips', nargs='+', default=['(?m)^\\[SUGGESTION MODE[\\s\\S]*', '(?m)^<system-reminder>[\\s\\S]*?^</system-reminder>\\s*'], help='Regex patterns stripped from model output before it is returned to the client')
|
|
905
945
|
parser.add_argument('--stream', default=None, help='File to stream log into')
|
|
906
|
-
parser.add_argument('--
|
|
946
|
+
parser.add_argument('--bare', action='store_true', help='Use simple terminal REPL instead of TUI')
|
|
907
947
|
args, leash_args = parser.parse_known_args()
|
|
908
948
|
logger.debug(f'args={args!r} leash_args={leash_args!r}')
|
|
949
|
+
if args.engine == 'batch' and args.leash not in ('none', 'noapi'):
|
|
950
|
+
parser.error('--engine batch only supports --leash none or --leash noapi for now')
|
|
909
951
|
cache = os.path.abspath(args.cache)
|
|
952
|
+
port = args.port if args.port is not None else 8000
|
|
953
|
+
fixed_port = args.port is not None
|
|
910
954
|
with tempfile.TemporaryDirectory(dir='/tmp') as _home:
|
|
911
955
|
env = os.environ.copy()
|
|
912
956
|
home = Path(_home)
|
|
@@ -915,18 +959,28 @@ def main():
|
|
|
915
959
|
env['HOME'] = _home
|
|
916
960
|
env['SHELL'] = '/bin/bash'
|
|
917
961
|
env['PWD'] = cwd
|
|
918
|
-
|
|
962
|
+
if args.engine == 'batch':
|
|
963
|
+
server, url = _serve_batch(args.host, port, args.model, cache_dir=cache, fixed_port=fixed_port)
|
|
964
|
+
else:
|
|
965
|
+
server, url = _serve_cache(host=args.host, port=port, model=args.model, cache=cache, system=None if args.leash in ('none', 'noapi') else args.system, tools=args.tools, skips=args.skips, fixed_port=fixed_port, gwt=gwt)
|
|
919
966
|
if args.leash == 'none':
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
967
|
+
if args.engine == 'batch':
|
|
968
|
+
try:
|
|
969
|
+
threading.Event().wait()
|
|
970
|
+
except KeyboardInterrupt:
|
|
971
|
+
print('\nShutting down server...')
|
|
972
|
+
else:
|
|
973
|
+
try:
|
|
974
|
+
server.serve_forever()
|
|
975
|
+
except KeyboardInterrupt:
|
|
976
|
+
print('\nShutting down server...')
|
|
977
|
+
server.server_close()
|
|
925
978
|
else:
|
|
926
|
-
|
|
979
|
+
if args.engine == 'cache':
|
|
980
|
+
threading.Thread(target=server.serve_forever, daemon=True).start()
|
|
927
981
|
if args.leash == 'noapi':
|
|
928
982
|
from .repl import run_repl
|
|
929
|
-
run_repl(base_url=url, api=args.leash, repo=cwd, env=env, system=args.system, tool_names=args.tools, sdir=args.skill, init_prompt=args.prompt, resume=args.resume, stream=args.stream,
|
|
983
|
+
run_repl(base_url=url, api=args.leash, repo=cwd, env=env, system=args.system, tool_names=args.tools, sdir=args.skill, init_prompt=args.prompt, resume=args.resume, stream=args.stream, bare=args.bare)
|
|
930
984
|
else:
|
|
931
985
|
env['GOOGLE_GEMINI_BASE_URL'] = url
|
|
932
986
|
env['GEMINI_API_KEY'] = 'mc'
|
|
@@ -980,10 +980,10 @@ async def _stream_to_stdout(agent: Agent, user_input: str) -> None:
|
|
|
980
980
|
if text:
|
|
981
981
|
print(text)
|
|
982
982
|
|
|
983
|
-
async def repl(agent, init_prompt=None,
|
|
983
|
+
async def repl(agent, init_prompt=None, bare=False):
|
|
984
984
|
is_tty = sys.stdin.isatty() and sys.stdout.isatty()
|
|
985
|
-
if
|
|
986
|
-
from .
|
|
985
|
+
if bare and is_tty:
|
|
986
|
+
from .bare import SimpleRepl
|
|
987
987
|
sr = SimpleRepl(agent, init_prompt=init_prompt)
|
|
988
988
|
await sr.run()
|
|
989
989
|
return None
|
|
@@ -1025,7 +1025,7 @@ _AGENT_ENV_ALLOWLIST: re.Pattern = re.compile('\n ^(\n # ── Execution
|
|
|
1025
1025
|
def _make_agent_env(base: dict[str, str]) -> dict[str, str]:
|
|
1026
1026
|
return {k: v for k, v in base.items() if _AGENT_ENV_ALLOWLIST.match(k)}
|
|
1027
1027
|
|
|
1028
|
-
def run_repl(*, base_url=None, model=None, api: Literal['claude', 'codex', 'gemini', 'deepseek', 'noapi']='noapi', system='', sdir=None, skills=None, env=None, tool_names=None, extra_tool_classes=None, api_key=None, gwt=None, ctx=None, init_prompt=None, resume_messages=None, repo=None, resume=None, stream=None, verbose_transcript=False,
|
|
1028
|
+
def run_repl(*, base_url=None, model=None, api: Literal['claude', 'codex', 'gemini', 'deepseek', 'noapi']='noapi', system='', sdir=None, skills=None, env=None, tool_names=None, extra_tool_classes=None, api_key=None, gwt=None, ctx=None, init_prompt=None, resume_messages=None, repo=None, resume=None, stream=None, verbose_transcript=False, bare=False):
|
|
1029
1029
|
repo = os.path.abspath(repo or os.getcwd())
|
|
1030
1030
|
with tempfile.TemporaryDirectory(dir=tempfile.gettempdir()) as _home:
|
|
1031
1031
|
if gwt is None:
|
|
@@ -1064,7 +1064,7 @@ def run_repl(*, base_url=None, model=None, api: Literal['claude', 'codex', 'gemi
|
|
|
1064
1064
|
print(f'[resumed {len(resume_messages)} messages from checkpoint]')
|
|
1065
1065
|
app_instance = None
|
|
1066
1066
|
try:
|
|
1067
|
-
app_instance = asyncio.run(repl(agent, init_prompt=init_prompt,
|
|
1067
|
+
app_instance = asyncio.run(repl(agent, init_prompt=init_prompt, bare=bare))
|
|
1068
1068
|
finally:
|
|
1069
1069
|
if log_fp:
|
|
1070
1070
|
log_fp.close()
|
|
@@ -1103,7 +1103,7 @@ def main():
|
|
|
1103
1103
|
parser.add_argument('--key', default=None, help='API key')
|
|
1104
1104
|
parser.add_argument('--stream', default=None, help='File to stream log into')
|
|
1105
1105
|
parser.add_argument('--verbose-transcript', action='store_true', help='Reserved; not yet implemented')
|
|
1106
|
-
parser.add_argument('--
|
|
1106
|
+
parser.add_argument('--bare', action='store_true', help='Use simple terminal REPL instead of TUI')
|
|
1107
1107
|
args = parser.parse_args()
|
|
1108
1108
|
logger.debug(args)
|
|
1109
1109
|
url, model, tool_names, api_key = (args.url, args.model, args.tools, args.key)
|
|
@@ -1117,6 +1117,6 @@ def main():
|
|
|
1117
1117
|
url = 'https://generativelanguage.googleapis.com' if api_key else url
|
|
1118
1118
|
model = 'gemini-3.1-flash-lite' if model is None else model
|
|
1119
1119
|
tool_names = [] if tool_names is None else tool_names
|
|
1120
|
-
run_repl(api=args.api, system=args.system, repo=args.cwd, model=model, base_url=url, tool_names=tool_names, sdir=args.skill, api_key=api_key, init_prompt=args.prompt, resume=args.resume, stream=args.stream,
|
|
1120
|
+
run_repl(api=args.api, system=args.system, repo=args.cwd, model=model, base_url=url, tool_names=tool_names, sdir=args.skill, api_key=api_key, init_prompt=args.prompt, resume=args.resume, stream=args.stream, bare=args.bare)
|
|
1121
1121
|
if __name__ == '__main__':
|
|
1122
1122
|
main()
|
|
@@ -597,7 +597,7 @@ def tui(stdscr, entries, log_file, initial_filter='', initial_visible=None):
|
|
|
597
597
|
def main():
|
|
598
598
|
parser = argparse.ArgumentParser(description='TUI viewer for JSON log files')
|
|
599
599
|
parser.add_argument('logfile', nargs='?', default='.log.json', help='Path to log file (default: .log.json)')
|
|
600
|
-
parser.add_argument('-f', '--filter', default=f'lvl:10;file:main,repl,gits,apis,tools', help='Initial filter string (same syntax as in UI)')
|
|
600
|
+
parser.add_argument('-f', '--filter', default=f'lvl:10;file:main,bats,repl,bare,gits,apis,tools', help='Initial filter string (same syntax as in UI)')
|
|
601
601
|
parser.add_argument('-o', '--out', dest='out', metavar='FILE', help='Write marked entries to FILE (JSON lines format) instead of stdout')
|
|
602
602
|
args = parser.parse_args()
|
|
603
603
|
log_path = args.logfile
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mlx-code
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.26
|
|
4
4
|
Summary: Coding Agent for Mac
|
|
5
5
|
Home-page: https://josefalbers.github.io/mlx-code/
|
|
6
6
|
Author: J Joe
|
|
@@ -17,6 +17,8 @@ Requires-Dist: httpx
|
|
|
17
17
|
Requires-Dist: pydantic
|
|
18
18
|
Requires-Dist: textual>=8.2.7
|
|
19
19
|
Requires-Dist: rich>=15.0.0
|
|
20
|
+
Requires-Dist: starlette
|
|
21
|
+
Requires-Dist: uvicorn
|
|
20
22
|
Provides-Extra: all
|
|
21
23
|
Requires-Dist: python-lsp-server[all]; extra == "all"
|
|
22
24
|
Requires-Dist: GitPython; extra == "all"
|
|
@@ -47,7 +49,7 @@ A Git-native coding agent that can run entirely on your Mac. No API keys, no clo
|
|
|
47
49
|
```
|
|
48
50
|
Conversation tree (nodes = git commits with embedded chat history)
|
|
49
51
|
|
|
50
|
-
main
|
|
52
|
+
main ──●──●──●──●──●──●──●──●──●──●──●──●──●──●
|
|
51
53
|
│ │
|
|
52
54
|
│ └── branch-1 ──●──●──●
|
|
53
55
|
│ │ ┌────────────┐
|
|
@@ -66,21 +68,21 @@ REPL tabs (each tab = a git branch + agent) │
|
|
|
66
68
|
│ └──────┘ └────┬─────┘ └──────────┘ └────────────┘ │
|
|
67
69
|
└─────────────────┼──────────────────────────────────────┘
|
|
68
70
|
│
|
|
69
|
-
|
|
71
|
+
├─────────────────────────────────────────► Each tab is an independent Agent
|
|
70
72
|
│
|
|
71
|
-
|
|
72
|
-
│ Agent
|
|
73
|
-
│
|
|
74
|
-
│ │ API:
|
|
75
|
-
│ │
|
|
76
|
-
│ │ Claude
|
|
77
|
-
│ │ Gemini
|
|
78
|
-
│ │ OpenAI
|
|
79
|
-
│
|
|
80
|
-
│
|
|
81
|
-
│ Git worktree
|
|
82
|
-
│ (isolation + session state)
|
|
83
|
-
|
|
73
|
+
┌────┴─────────────────────────────────────┐
|
|
74
|
+
│ Agent │
|
|
75
|
+
│ ┌────────────────┐ ┌────────────────┐ │
|
|
76
|
+
│ │ API: │ │ Tools: │ │
|
|
77
|
+
│ │ Local (mlx-lm) │ │ Read Write │ │
|
|
78
|
+
│ │ Claude │ │ Edit Bash │ │
|
|
79
|
+
│ │ Gemini │ │ Grep Find │ │
|
|
80
|
+
│ │ OpenAI │ │ Ls Skill │ │
|
|
81
|
+
│ └────────────────┘ │ Agent ─────────┼──┼───► Spawns child Agent
|
|
82
|
+
│ └────────────────┘ │ (each with own tools + worktree + etc)
|
|
83
|
+
│ Git worktree │
|
|
84
|
+
│ (isolation + session state) │
|
|
85
|
+
└──────────────────────────────────────────┘
|
|
84
86
|
```
|
|
85
87
|
|
|
86
88
|
Each layer is importable and composable on its own. A commit records state, a branch records an alternative path, and a tab is just a live view over an `Agent`.
|
|
@@ -104,9 +106,9 @@ uvx --from mlx-code mlc
|
|
|
104
106
|
# or install into the current environment
|
|
105
107
|
pip install mlx-code
|
|
106
108
|
|
|
107
|
-
|
|
109
|
+
# launch
|
|
110
|
+
mlc # with a local MLX model
|
|
108
111
|
mlc-run --api gemini # or use a remote provider
|
|
109
|
-
mlc-run --api deepseek --model deepseek-v4-flash
|
|
110
112
|
```
|
|
111
113
|
|
|
112
114
|
That's it. The first run starts a local inference server and drops you into the REPL.
|
|
@@ -128,12 +130,12 @@ That's it. The first run starts a local inference server and drops you into the
|
|
|
128
130
|
|
|
129
131
|
**Git is the database.** When the agent makes file changes, they’re committed to a git worktree with the full conversation embedded in the commit message. Resume any past session by hash, branch from any checkpoint, and inspect the agent timeline with `git log`. No proprietary state files, just Git.
|
|
130
132
|
|
|
131
|
-
**Your working directory is never at risk
|
|
132
|
-
|
|
133
|
-
**Built-in safety nets.** Subprocess environment variables go through an explicit allowlist, so secrets in your shell are never leaked to agent-spawned processes.
|
|
133
|
+
**Built-in safety nets.** Your working directory is never at risk. The agent operates inside a `git worktree`, not your checkout. It can make a mess, and you can inspect or discard it without ever touching `main`. Subprocess environment variables go through an explicit allowlist, so secrets in your shell are never leaked to agent-spawned processes.
|
|
134
134
|
|
|
135
135
|
**Batteries included.** Everything ships in one pip install: the MLX inference engine, the multi-protocol API server, the agent loop, the tools, and the TUI. No llama.cpp, no ollama, no vLLM bridge to find and configure. And the server natively speaks OpenAI, Anthropic, Gemini, and Codex wire formats simultaneously, so `claude`, `codex`, and `gemini` CLIs can all work against your local model without a translation layer.
|
|
136
136
|
|
|
137
|
+
**Continuous batching.** The local inference server runs a continuous batching engine that processes multiple sequences concurrently. When you spawn parallel agents (eg, multiple tabs, `asyncio.gather` pipelines, or delegated sub-tasks) they all share the same GPU context and are stepped together each tick. A prefix cache persists KV snapshots to disk, so repeated system prompts and conversation prefixes are prefilled once and reused across sessions. No request queueing, no waiting for the previous agent to finish.
|
|
138
|
+
|
|
137
139
|
---
|
|
138
140
|
|
|
139
141
|
## Agent primitive
|
|
@@ -171,12 +173,12 @@ agent.messages = messages
|
|
|
171
173
|
await agent.run("now add unit tests")
|
|
172
174
|
```
|
|
173
175
|
|
|
174
|
-
Branch from any point in the conversation
|
|
176
|
+
Branch from any point in the conversation. Each branch gets its own worktree:
|
|
175
177
|
|
|
176
178
|
```
|
|
177
179
|
/branch # branch from current state
|
|
178
180
|
/branch --rev 2 # branch from the 2nd user turn
|
|
179
|
-
/branch --rev 3
|
|
181
|
+
/branch --rev 3 make it use httpx instead
|
|
180
182
|
```
|
|
181
183
|
|
|
182
184
|
Since it's just git, you can inspect the timeline outside the REPL:
|
|
@@ -241,6 +243,43 @@ Reliability comes from specialization plus constraint. A read-only reviewer can'
|
|
|
241
243
|
|
|
242
244
|
---
|
|
243
245
|
|
|
246
|
+
## Continuous batching
|
|
247
|
+
|
|
248
|
+
The local server can run multiple inference sequences concurrently inside a single batch step. Instead of a global lock that serialises one request at a time, the batching engine maintains a live set of active sequences and yields tokens for all of them on every step.
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
mlc --engine batch # continuous batching + built-in REPL
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
This unlocks true parallelism for multi-agent workloads:
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
import asyncio
|
|
258
|
+
from mlx_code.repl import Agent
|
|
259
|
+
|
|
260
|
+
async def main():
|
|
261
|
+
agents = [Agent() for _ in range(4)]
|
|
262
|
+
await asyncio.gather(*[
|
|
263
|
+
a.run(f"Research topic: {t}")
|
|
264
|
+
for a, t in zip(agents, ["consensus", "cryptography", "networking", "storage"])
|
|
265
|
+
])
|
|
266
|
+
|
|
267
|
+
asyncio.run(main())
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
All four agents generate simultaneously inside the same batch. No sequential blocking.
|
|
271
|
+
|
|
272
|
+
### Health endpoint
|
|
273
|
+
|
|
274
|
+
```bash
|
|
275
|
+
curl http://127.0.0.1:8000/health
|
|
276
|
+
# {"status":"ok","model":"mlx-community/Qwen3.5-4B-OptiQ-4bit","active_sequences":2,"prefix_cache_files":5}
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
`active_sequences` shows how many agents are generating right now; `prefix_cache_files` shows how many prefix KV snapshots are stored on disk.
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
244
283
|
## Command Line
|
|
245
284
|
|
|
246
285
|
### `mlc`: local server + harness
|
|
@@ -248,20 +287,20 @@ Reliability comes from specialization plus constraint. A read-only reviewer can'
|
|
|
248
287
|
Starts the MLX inference server and launches the built-in TUI harness against it.
|
|
249
288
|
|
|
250
289
|
```bash
|
|
251
|
-
# Default: local server + default
|
|
290
|
+
# Default: local server + default harness
|
|
252
291
|
mlc
|
|
253
292
|
|
|
254
|
-
#
|
|
255
|
-
mlc --
|
|
293
|
+
# Continuous batching mode (default is sequential caching mode)
|
|
294
|
+
mlc --engine batch
|
|
295
|
+
|
|
296
|
+
# Server only, no harness
|
|
297
|
+
mlc --leash none
|
|
256
298
|
|
|
257
299
|
# Use a different harness (routes traffic through the local server)
|
|
258
300
|
mlc --leash claude
|
|
259
301
|
mlc --leash gemini
|
|
260
302
|
mlc --leash codex
|
|
261
303
|
|
|
262
|
-
# Server only, no harness
|
|
263
|
-
mlc --leash none
|
|
264
|
-
|
|
265
304
|
# Specify a model
|
|
266
305
|
mlc --model mlx-community/Qwen3.5-4B-OptiQ-4bit
|
|
267
306
|
|
|
@@ -312,7 +351,7 @@ mlc-run --api codex
|
|
|
312
351
|
echo "explain lsp.py" | mlc-run -a deepseek | cat - PLAN.md | mlc-run --url http://localhost:9000
|
|
313
352
|
|
|
314
353
|
# Simple terminal REPL (no TUI)
|
|
315
|
-
mlc-run --
|
|
354
|
+
mlc-run --bare
|
|
316
355
|
```
|
|
317
356
|
|
|
318
357
|
---
|
|
@@ -437,18 +476,19 @@ agent = Agent(extra_tool_classes=[LiveDBTool], tool_names=["QueryDB"])
|
|
|
437
476
|
|
|
438
477
|
| Command | Description |
|
|
439
478
|
|---|---|
|
|
440
|
-
| `/
|
|
479
|
+
| `/branch [--rev N] [prompt]` | Open a new branch tab from the current (or earlier) checkpoint |
|
|
480
|
+
| `/diff [--all]` | Show a side-by-side diff of changes in the worktree |
|
|
441
481
|
| `/clear [--config F]` | Clear conversation; `--config` reloads agent from a JSON/YAML file |
|
|
482
|
+
| `/tab [N]` | Jump to tab N |
|
|
442
483
|
| `/history [--raw]` | Show conversation transcript; `--raw` shows the raw API message log |
|
|
443
|
-
| `/diff [--all]` | Show a side-by-side diff of changes in the worktree |
|
|
444
|
-
| `/errors` | Show timestamped error log for the current tab |
|
|
445
484
|
| `/tools` | List active tools |
|
|
446
|
-
| `/branch [--rev N] [prompt]` | Open a new branch tab from the current (or earlier) checkpoint |
|
|
447
485
|
| `/abort` | Abort the running agent |
|
|
486
|
+
| `/errors` | Show timestamped error log for the current tab |
|
|
448
487
|
| `/export [path]` | Export session to JSON |
|
|
449
488
|
| `/exit [--all]` | Close branch tab, or exit the app |
|
|
450
|
-
|
|
|
451
|
-
|
|
|
489
|
+
| `/help` | Show command reference |
|
|
490
|
+
| `!command` | Run a shell command; output captured in the TUI (eg, `ls`, `cat hello.c`) |
|
|
491
|
+
| `$command` | Run an interactive command (eg, `vim`, `yazi`, `less hello.c`) |
|
|
452
492
|
|
|
453
493
|
### Key bindings
|
|
454
494
|
|
|
@@ -458,7 +498,7 @@ agent = Agent(extra_tool_classes=[LiveDBTool], tool_names=["QueryDB"])
|
|
|
458
498
|
| `Ctrl-J` | Insert newline |
|
|
459
499
|
| `Ctrl-1` … `Ctrl-9` | Jump to tab N |
|
|
460
500
|
| `Ctrl-,` / `Ctrl-.` | Cycle through tabs |
|
|
461
|
-
| `Ctrl-C` |
|
|
501
|
+
| `Ctrl-C` | Clear input, or abort running agent |
|
|
462
502
|
| `Ctrl-D` | Close branch tab, or exit app |
|
|
463
503
|
| `Ctrl-R` | Recall last prompt into editor |
|
|
464
504
|
|
|
@@ -476,7 +516,7 @@ agent = Agent(extra_tool_classes=[LiveDBTool], tool_names=["QueryDB"])
|
|
|
476
516
|
| `Skill` | Retrieve named skill instructions from config |
|
|
477
517
|
| `Agent` | Spawn an autonomous sub-agent for delegated work |
|
|
478
518
|
|
|
479
|
-
All file tools enforce path sandboxing
|
|
519
|
+
All file tools enforce path sandboxing. The agent cannot read or write outside the worktree.
|
|
480
520
|
|
|
481
521
|
### Backends
|
|
482
522
|
|
|
@@ -3,12 +3,13 @@ README.md
|
|
|
3
3
|
setup.py
|
|
4
4
|
mlx_code/__init__.py
|
|
5
5
|
mlx_code/apis.py
|
|
6
|
+
mlx_code/bare.py
|
|
7
|
+
mlx_code/bats.py
|
|
6
8
|
mlx_code/gits.py
|
|
7
9
|
mlx_code/lsp_tool.py
|
|
8
10
|
mlx_code/main.py
|
|
9
11
|
mlx_code/mcb.py
|
|
10
12
|
mlx_code/mcb_tool.py
|
|
11
|
-
mlx_code/ntui.py
|
|
12
13
|
mlx_code/repl.py
|
|
13
14
|
mlx_code/stream_log.py
|
|
14
15
|
mlx_code/tools.py
|
|
@@ -11,7 +11,7 @@ setup(
|
|
|
11
11
|
author_email="albersj66@gmail.com",
|
|
12
12
|
author="J Joe",
|
|
13
13
|
license="Apache-2.0",
|
|
14
|
-
version="0.0.
|
|
14
|
+
version="0.0.26",
|
|
15
15
|
readme="README.md",
|
|
16
16
|
description="Coding Agent for Mac",
|
|
17
17
|
long_description=open("README.md").read(),
|
|
@@ -24,6 +24,9 @@ setup(
|
|
|
24
24
|
|
|
25
25
|
"textual>=8.2.7",
|
|
26
26
|
"rich>=15.0.0",
|
|
27
|
+
|
|
28
|
+
"starlette",
|
|
29
|
+
"uvicorn",
|
|
27
30
|
],
|
|
28
31
|
extras_require={"all": [
|
|
29
32
|
"python-lsp-server[all]",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|