mlx-code 0.0.2a3__tar.gz → 0.0.2a6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 2.4
2
+ Name: mlx-code
3
+ Version: 0.0.2a6
4
+ Summary: Local Claude Code for Mac
5
+ Home-page: https://github.com/JosefAlbers/mlx-code
6
+ Author: J Joe
7
+ Author-email: albersj66@gmail.com
8
+ License: Apache-2.0
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: mlx-lm>=0.19.0
13
+ Requires-Dist: PyYAML
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: description
17
+ Dynamic: description-content-type
18
+ Dynamic: home-page
19
+ Dynamic: license
20
+ Dynamic: license-file
21
+ Dynamic: requires-dist
22
+ Dynamic: requires-python
23
+ Dynamic: summary
24
+
25
+ # mlx-code: Local Coding Agent
26
+
27
+ [![Link](https://raw.githubusercontent.com/JosefAlbers/mlx-code/main/assets/mlx-code.gif)](https://youtu.be/Rba-uTsYuXg)
28
+
29
+ ### Quick Start
30
+
31
+ Install via pip and launch the standalone agent immediately:
32
+
33
+ ```bash
34
+ pip install mlx-code
35
+ mlxcode # Runs standalone mode by default
36
+ ```
37
+
38
+ ### Options
39
+
40
+ You can customize the model, server, and behavior using command-line flags:
41
+
42
+ ```bash
43
+ mlxcode [options] [-- harness options]
44
+ ```
45
+
46
+ - **Standalone (Default):** Uses the built-in `pie` agent (python port of [pi](https://github.com/badlogic/pi-mono))
47
+ - **Harness Mode:** Use `--harness <command>` to proxy an external agent (e.g., `claude`).
48
+
49
+ ### Credits
50
+
51
+ - `main`: Built on [MLX](https://github.com/ml-explore/mlx) and [MLX LM](https://github.com/ml-explore/mlx-lm) by Apple.
52
+ - `pie`: Adapted from [pi](https://github.com/badlogic/pi-mono) by Mario Zechner (MIT License).
53
+
54
+ ### Licence
55
+
56
+ Apache License 2.0 — see LICENSE for details.
57
+
58
+
@@ -0,0 +1,34 @@
1
+ # mlx-code: Local Coding Agent
2
+
3
+ [![Link](https://raw.githubusercontent.com/JosefAlbers/mlx-code/main/assets/mlx-code.gif)](https://youtu.be/Rba-uTsYuXg)
4
+
5
+ ### Quick Start
6
+
7
+ Install via pip and launch the standalone agent immediately:
8
+
9
+ ```bash
10
+ pip install mlx-code
11
+ mlxcode # Runs standalone mode by default
12
+ ```
13
+
14
+ ### Options
15
+
16
+ You can customize the model, server, and behavior using command-line flags:
17
+
18
+ ```bash
19
+ mlxcode [options] [-- harness options]
20
+ ```
21
+
22
+ - **Standalone (Default):** Uses the built-in `pie` agent (python port of [pi](https://github.com/badlogic/pi-mono))
23
+ - **Harness Mode:** Use `--harness <command>` to proxy an external agent (e.g., `claude`).
24
+
25
+ ### Credits
26
+
27
+ - `main`: Built on [MLX](https://github.com/ml-explore/mlx) and [MLX LM](https://github.com/ml-explore/mlx-lm) by Apple.
28
+ - `pie`: Adapted from [pi](https://github.com/badlogic/pi-mono) by Mario Zechner (MIT License).
29
+
30
+ ### Licence
31
+
32
+ Apache License 2.0 — see LICENSE for details.
33
+
34
+
@@ -125,6 +125,7 @@ def parse_tool(tools, names):
125
125
  return qwen_tools
126
126
 
127
127
  def encode(body, tokenizer, system, names, skips):
128
+ trace_logger.debug(body)
128
129
  msgs = []
129
130
  sys_parts = []
130
131
  if isinstance(system, str):
@@ -182,15 +183,20 @@ def encode(body, tokenizer, system, names, skips):
182
183
  if parts:
183
184
  msgs.append({"role": role}|parts)
184
185
  if not msgs[-1].get('content', '').strip():
185
- return None, ''
186
+ return None, -1
186
187
  apply_chat_template = lambda x: tokenizer.apply_chat_template(x, tools = parse_tool(body.get("tools", []), names), tokenize=False, add_generation_prompt=True)
187
- full = apply_chat_template(msgs)
188
+ full_s = apply_chat_template(msgs)
188
189
  last_user_idx = max((i for i, m in enumerate(msgs) if m.get("role") == "user"), default=None)
189
190
  if last_user_idx is None:
190
- return full, ''
191
+ return None, -1
191
192
  p_msgs = msgs[:last_user_idx] + [dict(role='user', content='h' if msgs[last_user_idx]['content'][0] != 'h' else 'i')]
192
- pref = apply_chat_template(p_msgs)
193
- return full, pref
193
+ prfx_s = apply_chat_template(p_msgs)
194
+ add_special_tokens = tokenizer.bos_token is None or not prompt.startswith(tokenizer.bos_token)
195
+ full = tokenizer.encode(full_s, add_special_tokens=add_special_tokens)
196
+ prfx = tokenizer.encode(prfx_s, add_special_tokens=add_special_tokens)
197
+ save_at = get_common_len(full, prfx)
198
+ stream_logger.debug(f'{save_at}\n{dmca(tokenizer.decode(full[:save_at]))}\n---\n{dmca(tokenizer.decode(full[save_at:]))}')
199
+ return full, save_at
194
200
 
195
201
  def decode(raw_text, tokenizer, parse_think, single_think=False):
196
202
  def escape(text):
@@ -286,7 +292,10 @@ def dmca(p_str):
286
292
  p_str = re.sub(pattern, lambda m: mask_text(m.group(0)), p_str)
287
293
  return p_str
288
294
 
289
- def make_handler(model, tokenizer, system, names, skips, parse_think=True):
295
+ def make_handler(model_name, system, names, skips, parse_think=True):
296
+ model, tokenizer = mlx_lm.load(model_name)
297
+ if not isinstance(tokenizer, mlx_lm.tokenizer_utils.TokenizerWrapper):
298
+ tokenizer = mlx_lm.tokenizer_utils.TokenizerWrapper(tokenizer)
290
299
  class Handler(BaseHTTPRequestHandler):
291
300
  def log_message(self, fmt, *args):
292
301
  pass
@@ -316,9 +325,9 @@ def make_handler(model, tokenizer, system, names, skips, parse_think=True):
316
325
  return
317
326
  n = int(self.headers.get("Content-Length", 0))
318
327
  body = json.loads(self.rfile.read(n))
319
- prompt, pref = encode(body, tokenizer, system, names, skips)
328
+ prompt, save_at = encode(body, tokenizer, system, names, skips)
320
329
  with gen_lock:
321
- raw, in_tokens, out_tokens = generate(model, tokenizer, pref=pref, prompt=prompt, max_tokens=body.get("max_tokens", 8192))
330
+ raw, in_tokens, out_tokens = generate(model, tokenizer, prompt=prompt, save_at=save_at, max_tokens=body.get("max_tokens", 8192))
322
331
  blocks, stop_reason = decode(raw, tokenizer, parse_think=parse_think)
323
332
  msg_id = f"msg_{uuid.uuid4().hex}"
324
333
  sse = blocks_to_sse(blocks, msg_id, in_tokens, out_tokens, stop_reason)
@@ -354,9 +363,9 @@ def main():
354
363
  parser.add_argument("--model", default="mlx-community/Qwen3.5-4B-OptiQ-4bit")
355
364
  # parser.add_argument("--model", default="mlx-community/Qwen3.5-2B-OptiQ-4bit")
356
365
  # parser.add_argument("--model", default="mlx-community/Qwen3.5-0.8B-MLX-bf16")
357
- parser.add_argument("--system", type=str, default='')
366
+ # parser.add_argument("--system", type=str, default='')
358
367
  # parser.add_argument("--system", type=str, default='# Env\n{env}')
359
- # parser.add_argument("--system", type=str, default=None)
368
+ parser.add_argument("--system", type=str, default=None)
360
369
  parser.add_argument("--cache", type=str, default='cache')
361
370
  # parser.add_argument("--names", nargs="+", default=[])
362
371
  parser.add_argument("--names", nargs="+", default=['Read','Edit','Write','Grep','Glob','Bash','Agent','Skill'])
@@ -374,8 +383,7 @@ def main():
374
383
  Path(args.cache).mkdir(parents=True, exist_ok=True)
375
384
  global dict_cache
376
385
  dict_cache = dict(model_name=args.model, cache_dir = args.cache)
377
- model, tokenizer = mlx_lm.load(args.model)
378
- server = HTTPServer((args.host, args.port), make_handler(model, tokenizer, args.system, args.names, args.skips))
386
+ server = HTTPServer((args.host, args.port), make_handler(args.model, args.system, args.names, args.skips))
379
387
  if args.nocc:
380
388
  try:
381
389
  server.serve_forever()
@@ -537,23 +545,11 @@ def generate_step(
537
545
  y, logprobs = next_y, next_logprobs
538
546
  n += 1
539
547
 
540
- def generate(model, tokenizer, prompt, pref, hook=None, max_tokens=256, helper_max_tokens=64, **kwargs):
541
- global dict_cache
548
+ def generate(model, tokenizer, prompt, save_at, hook=None, max_tokens=256, helper_max_tokens=64, **kwargs):
542
549
  if prompt is None:
543
550
  return '', 0, 0
544
- if not isinstance(tokenizer, mlx_lm.tokenizer_utils.TokenizerWrapper):
545
- tokenizer = mlx_lm.tokenizer_utils.TokenizerWrapper(tokenizer)
551
+ global dict_cache
546
552
  detokenizer = tokenizer.detokenizer
547
- if isinstance(prompt, str):
548
- add_special_tokens = tokenizer.bos_token is None or not prompt.startswith(tokenizer.bos_token)
549
- prompt_s = prompt
550
- prompt = tokenizer.encode(prompt, add_special_tokens=add_special_tokens)
551
- _pref = tokenizer.encode(pref, add_special_tokens=add_special_tokens)
552
- save_at = get_common_len(prompt, _pref)
553
- else:
554
- prompt_s = tokenizer.decode(prompt)
555
- save_at = -1 # □ for now
556
- stream_logger.debug(dmca(prompt_s))
557
553
  text = ''
558
554
  gens = []
559
555
  common_len = 0
@@ -626,7 +622,7 @@ def generate(model, tokenizer, prompt, pref, hook=None, max_tokens=256, helper_m
626
622
  detokenizer.finalize()
627
623
  text += detokenizer.last_segment
628
624
  dict_cache['hx'] = prompt+gens
629
- trace_logger.debug(f'G {hx_len} {len(prompt)} {common_len} {trim_len} {len(gens)}\n=== TPS ===\n- Processed {len(prompt)} input tokens in {tic_inp-tic_non:.0f} seconds ({len(prompt)/(tic_inp-tic_non):.0f} tokens per second)\n- Generated {len(gens)} new tokens in {tic_out-tic_inp:.0f} seconds ({len(gens)/(tic_out-tic_inp):.0f} tokens per second)\n\n=== INP ===\n{dmca(prompt_s)}\n=== OUT ===\n{text}')
625
+ trace_logger.debug(f'G {hx_len} {len(prompt)} {common_len} {trim_len} {len(gens)}\n=== TPS ===\n- Processed {len(prompt)} input tokens in {tic_inp-tic_non:.0f} seconds ({len(prompt)/(tic_inp-tic_non):.0f} tokens per second)\n- Generated {len(gens)} new tokens in {tic_out-tic_inp:.0f} seconds ({len(gens)/(tic_out-tic_inp):.0f} tokens per second)\n\n=== INP ===\n{dmca(tokenizer.decode(prompt))}\n=== OUT ===\n{text}')
630
626
  return text, len(prompt), len(gens)
631
627
 
632
628
  if __name__ == "__main__":
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 2.4
2
+ Name: mlx-code
3
+ Version: 0.0.2a6
4
+ Summary: Local Claude Code for Mac
5
+ Home-page: https://github.com/JosefAlbers/mlx-code
6
+ Author: J Joe
7
+ Author-email: albersj66@gmail.com
8
+ License: Apache-2.0
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: mlx-lm>=0.19.0
13
+ Requires-Dist: PyYAML
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: description
17
+ Dynamic: description-content-type
18
+ Dynamic: home-page
19
+ Dynamic: license
20
+ Dynamic: license-file
21
+ Dynamic: requires-dist
22
+ Dynamic: requires-python
23
+ Dynamic: summary
24
+
25
+ # mlx-code: Local Coding Agent
26
+
27
+ [![Link](https://raw.githubusercontent.com/JosefAlbers/mlx-code/main/assets/mlx-code.gif)](https://youtu.be/Rba-uTsYuXg)
28
+
29
+ ### Quick Start
30
+
31
+ Install via pip and launch the standalone agent immediately:
32
+
33
+ ```bash
34
+ pip install mlx-code
35
+ mlxcode # Runs standalone mode by default
36
+ ```
37
+
38
+ ### Options
39
+
40
+ You can customize the model, server, and behavior using command-line flags:
41
+
42
+ ```bash
43
+ mlxcode [options] [-- harness options]
44
+ ```
45
+
46
+ - **Standalone (Default):** Uses the built-in `pie` agent (python port of [pi](https://github.com/badlogic/pi-mono))
47
+ - **Harness Mode:** Use `--harness <command>` to proxy an external agent (e.g., `claude`).
48
+
49
+ ### Credits
50
+
51
+ - `main`: Built on [MLX](https://github.com/ml-explore/mlx) and [MLX LM](https://github.com/ml-explore/mlx-lm) by Apple.
52
+ - `pie`: Adapted from [pi](https://github.com/badlogic/pi-mono) by Mario Zechner (MIT License).
53
+
54
+ ### Licence
55
+
56
+ Apache License 2.0 — see LICENSE for details.
57
+
58
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mlxcode = main:main