mlx-code 0.0.35__tar.gz → 0.0.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {mlx_code-0.0.35 → mlx_code-0.0.36}/PKG-INFO +2 -2
  2. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/bats.py +46 -69
  3. mlx_code-0.0.36/mlx_code/main.py +533 -0
  4. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/mlxs.py +1 -0
  5. mlx_code-0.0.36/mlx_code/tfms.py +33 -0
  6. mlx_code-0.0.36/mlx_code/vlls.py +65 -0
  7. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/PKG-INFO +2 -2
  8. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/SOURCES.txt +1 -0
  9. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/requires.txt +1 -1
  10. {mlx_code-0.0.35 → mlx_code-0.0.36}/setup.py +2 -2
  11. mlx_code-0.0.35/mlx_code/main.py +0 -513
  12. mlx_code-0.0.35/mlx_code/vlls.py +0 -88
  13. {mlx_code-0.0.35 → mlx_code-0.0.36}/LICENSE +0 -0
  14. {mlx_code-0.0.35 → mlx_code-0.0.36}/README.md +0 -0
  15. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/__init__.py +0 -0
  16. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/apis.py +0 -0
  17. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/bare.py +0 -0
  18. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/bats_plan.py +0 -0
  19. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/bench_bats.py +0 -0
  20. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/gits.py +0 -0
  21. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/lsp_tool.py +0 -0
  22. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/mcb.py +0 -0
  23. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/mcb_tool.py +0 -0
  24. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/repl.py +0 -0
  25. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/stream_log.py +0 -0
  26. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/test_bats_plan.py +0 -0
  27. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/tools.py +0 -0
  28. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/tui.py +0 -0
  29. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/util.py +0 -0
  30. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/view_git.py +0 -0
  31. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/view_log.py +0 -0
  32. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/web.py +0 -0
  33. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/dependency_links.txt +0 -0
  34. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/entry_points.txt +0 -0
  35. {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/top_level.txt +0 -0
  36. {mlx_code-0.0.35 → mlx_code-0.0.36}/setup.cfg +0 -0
  37. {mlx_code-0.0.35 → mlx_code-0.0.36}/tests/__init__.py +0 -0
  38. {mlx_code-0.0.35 → mlx_code-0.0.36}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlx-code
3
- Version: 0.0.35
3
+ Version: 0.0.36
4
4
  Summary: Coding Agent for Mac
5
5
  Home-page: https://josefalbers.github.io/mlx-code/
6
6
  Author: J Joe
@@ -13,7 +13,7 @@ Requires-Python: >=3.12.8
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: mlx-lm>=0.31.3; platform_system == "Darwin"
16
- Requires-Dist: vllm; platform_system != "Darwin"
16
+ Requires-Dist: transformers; platform_system != "Darwin"
17
17
  Requires-Dist: httpx
18
18
  Requires-Dist: pydantic
19
19
  Provides-Extra: all
@@ -11,9 +11,9 @@ _M='inserted'
11
11
  _L='submit_ts'
12
12
  _K='cancel'
13
13
  _J='asyncio.Queue'
14
- _I=False
15
- _H='seed_none'
16
- _G='submitted'
14
+ _I='seed_none'
15
+ _H='submitted'
16
+ _G=False
17
17
  _F='insert_ts'
18
18
  _E='rid'
19
19
  _D='out'
@@ -36,7 +36,7 @@ DEFAULT_SKIPS=['(?m)^\\[SUGGESTION MODE[\\s\\S]*','(?m)^<system-reminder>[\\s\\S
36
36
  PREFILL_STEP=2048
37
37
  MAX_ENGINE_ERRORS=3
38
38
  class Tracer:
39
- _COUNTERS=_G,_M,_N,_O,_P,_Q,_R,_H,_S,_T
39
+ _COUNTERS=_H,_M,_N,_O,_P,_Q,_R,_I,_S,_T
40
40
  def __init__(A,path:Optional[str]=_A):
41
41
  B=path;A._f=open(B,'a',buffering=1)if B else _A;A._lock=threading.Lock();A._c={A:0 for A in A._COUNTERS}
42
42
  if B:logger.info('[bats] tracing to %s',B)
@@ -54,10 +54,10 @@ class CheckpointStore:
54
54
  def load(A,prefix_tokens):from mlx_lm.models.cache import load_prompt_cache as B;C,D=B(str(A.path(prefix_tokens)),return_metadata=_B);return C
55
55
  def save(C,prefix_tokens,cache)->bool:
56
56
  from mlx_lm.models.cache import save_prompt_cache as D;A=C.path(prefix_tokens)
57
- if A.exists():return _I
57
+ if A.exists():return _G
58
58
  B=A.with_name(A.stem+'.tmp.safetensors');D(str(B),cache);B.replace(A);return _B
59
59
  def _make_think_guard(te:int,max_tokens:int,prompt_len:int):
60
- F=prompt_len;D='closed';C='gen_tokens';from.main import is_stuck as H;I=int(max_tokens*.9);A={D:_I,C:[]}
60
+ F=prompt_len;D='closed';C='gen_tokens';from.main import is_stuck as H;I=int(max_tokens*.9);A={D:_G,C:[]}
61
61
  def B(tokens,logits):
62
62
  G=tokens;B=logits
63
63
  if A[D]:return B
@@ -176,67 +176,52 @@ class Engine:
176
176
  try:A.bg.remove(B)
177
177
  except Exception:pass
178
178
  def make_batch_app(model_name:str,cache_dir:str=_U,*,system:Optional[str]=_A,tool_names:Optional[list]=_A,skips:Optional[list]=_A,think_tags:Optional[list]=_A,ram_seed_slots:int=4,completion_batch_size:int=32,prefill_batch_size:int=8,trace_path:Optional[str]=_A)->Starlette:
179
- P='GET';V='submit';U='empty prompt';T='max_tokens';S='text';R='model not loaded';Q='disk';O=ram_seed_slots;N='model';M='none';L='store';K='tok';J=trace_path;I='ram';H='engine';F=skips;E=model_name;D='error';B='POST';C=think_tags;F=DEFAULT_SKIPS if F is _A else F;C=['<think>','</think>']if C is _A else C;J=J or os.environ.get('BATS_TRACE');A:dict[str,Any]={H:_A,K:_A,L:_A,I:_A}
179
+ M='submit';L='disk';K=ram_seed_slots;J='none';I='store';H='tok';G=trace_path;F='ram';E=skips;B='engine';D=model_name;C=think_tags;from.import main as N;E=DEFAULT_SKIPS if E is _A else E;C=['<think>','</think>']if C is _A else C;G=G or os.environ.get('BATS_TRACE');A:dict[str,Any]={B:_A,H:_A,I:_A,F:_A}
180
180
  @asynccontextmanager
181
- async def W(_app):
182
- from mlx_lm import load;from mlx_lm.tokenizer_utils import TokenizerWrapper as G;logger.info('[bats] loading model %r',E);L,B=load(E)
183
- if not isinstance(B,G):B=G(B)
181
+ async def O(_app):
182
+ from mlx_lm import load;from mlx_lm.tokenizer_utils import TokenizerWrapper as H;logger.info('[bats] loading model %r',D);M,B=load(D)
183
+ if not isinstance(B,H):B=H(B)
184
184
  if _new_detokenizer(B)is _new_detokenizer(B):logger.warning('[bats] detokenizer factory returned a shared instance!')
185
- D=B.convert_tokens_to_ids(C[1]);H=getattr(B,'unk_token_id',_A)
186
- if D is _A or H is not _A and D==H:D=_A;logger.info('[bats] no %s token; doom-loop guard disabled',C[1])
187
- I=CheckpointStore(E,cache_dir);K=RamSeedCache(O);M=Tracer(J);F=Engine(L,B,I,K,D,asyncio.get_running_loop(),M,completion_batch_size=completion_batch_size,prefill_batch_size=prefill_batch_size);F.start();A.update(engine=F,tok=B,store=I,ram=K);logger.info('[bats] ready (ram_seed_slots=%d)',O)
185
+ E=B.convert_tokens_to_ids(C[1]);I=getattr(B,'unk_token_id',_A)
186
+ if E is _A or I is not _A and E==I:E=_A;logger.info('[bats] no %s token; doom-loop guard disabled',C[1])
187
+ J=CheckpointStore(D,cache_dir);L=RamSeedCache(K);N=Tracer(G);F=Engine(M,B,J,L,E,asyncio.get_running_loop(),N,completion_batch_size=completion_batch_size,prefill_batch_size=prefill_batch_size);F.start();A.update(engine=F,tok=B,store=J,ram=L);logger.info('[bats] ready (ram_seed_slots=%d)',K)
188
188
  try:yield
189
189
  finally:F.stop()
190
- def d(prompt,ckpts):
191
- C=prompt;D,E=A[L],A[I]
192
- def H(c):A=C[:c];return E.get(hash_tokens(A))is not _A or D.path(A).exists()
193
- B=choose_seed_ckpt(ckpts,H)
194
- if B==0:return _A,0,M
195
- F=C[:B];G=E.get(hash_tokens(F))
196
- if G is not _A:return G,B,I
197
- try:return D.load(F),B,Q
198
- except Exception as J:logger.info('[bats] checkpoint load failed (%s); falling back to recompute',J);return _A,0,M
199
- async def e(api,q,cancel,msg_id,in_tokens,think_tags,initial_state='thinking'):
200
- from.import main as A;B=A.select_adapter(api,msg_id,in_tokens);E=initial_state;F=''
190
+ def P(prompt,ckpts):
191
+ C=prompt;D,E=A[I],A[F]
192
+ def K(c):A=C[:c];return E.get(hash_tokens(A))is not _A or D.path(A).exists()
193
+ B=choose_seed_ckpt(ckpts,K)
194
+ if B==0:return _A,0,J
195
+ G=C[:B];H=E.get(hash_tokens(G))
196
+ if H is not _A:return H,B,F
197
+ try:return D.load(G),B,L
198
+ except Exception as M:logger.info('[bats] checkpoint load failed (%s); falling back to recompute',M);return _A,0,J
199
+ async def Q(q,cancel):
201
200
  try:
202
- yield B.start()
203
201
  while _B:
204
- C=await q.get()
205
- if C is _A:break
206
- F+=C;G,E=A.apply_think_state(B,E,C,think_tags)
207
- for D in G:yield D
208
- for D in A.finish_sse(B,F):yield D
202
+ A=await q.get()
203
+ if A is _A:break
204
+ yield A
209
205
  finally:cancel.set()
210
- async def G(request:Request):
211
- L=request;from.import main as W;B=A[H]
212
- if B is _A:return JSONResponse({D:R},status_code=503)
213
- X=L.url.path.split('?')[0].rstrip('/');G=W.detect_api_from_path(X)
214
- if G=='gemini':
215
- f=str(L.url.query)or''
216
- if'alt=sse'not in f and'streamGenerateContent'not in X:return JSONResponse({'candidates':[{'content':{'role':N,'parts':[{S:'{"complexity_reasoning":"local","complexity_score":50}'}]},'finishReason':'STOP'}],'usageMetadata':{'promptTokenCount':0,'candidatesTokenCount':0}})
217
- O=await L.json();Y=int(O.get(T,O.get('max_completion_tokens',8192)))
218
- try:E,J,g=W.encode(O,G,A[K],system,tool_names,F,think_tags=C)
219
- except Exception as h:logger.exception('[bats] encode failed');return JSONResponse({D:f"encode: {h}"},status_code=500)
220
- if J is _A or not E:return JSONResponse({D:U},status_code=400)
221
- P=uuid.uuid4().hex[:8];i=time.perf_counter();B.tracer.count(_G);B.tracer.event(V,rid=P,api=G,prompt_tokens=len(E),n_ckpts=len(J),max_tokens=Y);j=asyncio.get_running_loop();k=time.perf_counter();l,Z,a=await j.run_in_executor(_A,d,E,J);B.tracer.count({I:_Q,Q:_R,M:_H}[a]);B.tracer.event('seed',rid=P,seed_len=Z,source=a,lookup_ms=round((time.perf_counter()-k)*1000,1));b:_J=asyncio.Queue();c=threading.Event();B.submit(_Req(E,J,l,Z,Y,b,c,P,i));m=f"msg_{uuid.uuid4().hex}";return StreamingResponse(e(G,b,c,m,len(E),C,initial_state=g),media_type='text/event-stream')
222
- async def X(request:Request):
223
- P='messages';B=A[H]
224
- if B is _A:return JSONResponse({D:R},status_code=503)
225
- C=await request.json();I=A[K];J=int(C.get(T,256))
226
- if P in C:L=I.apply_chat_template(C[P],tokenize=_I,add_generation_prompt=_B)
227
- else:L=C.get('prompt','')
228
- E=I.encode(L)
229
- if not E:return JSONResponse({D:U},status_code=400)
230
- M=uuid.uuid4().hex[:8];Q=time.perf_counter();B.tracer.count(_G);B.tracer.count(_H);B.tracer.event(V,rid=M,api='generate',prompt_tokens=len(E),n_ckpts=0,max_tokens=J);F:_J=asyncio.Queue();G=threading.Event();B.submit(_Req(list(E),[],_A,0,J,F,G,M,Q))
231
- if C.get('stream',_B):
232
- async def W():
206
+ async def R(api,prompt,ckpts,*,max_tokens,temperature,top_p,rid):H=max_tokens;G=rid;E=ckpts;D=prompt;C=A[B];R=time.perf_counter();C.tracer.count(_H);C.tracer.event(M,rid=G,api=api,prompt_tokens=len(D),n_ckpts=len(E),max_tokens=H);S=asyncio.get_running_loop();T=time.perf_counter();U,I,K=await S.run_in_executor(_A,P,D,E);C.tracer.count({F:_Q,L:_R,J:_I}[K]);C.tracer.event('seed',rid=G,seed_len=I,source=K,lookup_ms=round((time.perf_counter()-T)*1000,1));N:_J=asyncio.Queue();O=threading.Event();C.submit(_Req(D,E,U,I,H,N,O,G,R));return Q(N,O)
207
+ async def S(request:Request):
208
+ Q='messages';P='error';C=A[B]
209
+ if C is _A:return JSONResponse({P:'model not loaded'},status_code=503)
210
+ D=await request.json();I=A[H];J=int(D.get('max_tokens',256))
211
+ if Q in D:K=I.apply_chat_template(D[Q],tokenize=_G,add_generation_prompt=_B)
212
+ else:K=D.get('prompt','')
213
+ E=I.encode(K)
214
+ if not E:return JSONResponse({P:'empty prompt'},status_code=400)
215
+ L=uuid.uuid4().hex[:8];R=time.perf_counter();C.tracer.count(_H);C.tracer.count(_I);C.tracer.event(M,rid=L,api='generate',prompt_tokens=len(E),n_ckpts=0,max_tokens=J);F:_J=asyncio.Queue();G=threading.Event();C.submit(_Req(list(E),[],_A,0,J,F,G,L,R))
216
+ if D.get('stream',_B):
217
+ async def S():
233
218
  try:
234
219
  while _B:
235
220
  A=await F.get()
236
221
  if A is _A:break
237
222
  yield A
238
223
  finally:G.set()
239
- return StreamingResponse(W(),media_type='text/plain')
224
+ return StreamingResponse(S(),media_type='text/plain')
240
225
  N=[]
241
226
  try:
242
227
  while _B:
@@ -244,19 +229,11 @@ def make_batch_app(model_name:str,cache_dir:str=_U,*,system:Optional[str]=_A,too
244
229
  if O is _A:break
245
230
  N.append(O)
246
231
  finally:G.set()
247
- return JSONResponse({S:''.join(N)})
248
- async def Y(_req):A='local';return JSONResponse({'data':[{'id':A,'object':N,'created':int(time.time()),'owned_by':A}]})
249
- async def Z(_req):return JSONResponse({'input_tokens':0})
250
- async def a(_req):
251
- C,B=A[L],A[H];F=sum(1 for A in C.cache_dir.glob('*.safetensors'))if C else 0;D={'status':'ok',N:E,'active_sequences':len(B.active)if B else 0,'checkpoint_files':F}
252
- if B:D['counters']=B.tracer.snapshot()
253
- return JSONResponse(D)
254
- return Starlette(routes=[Route('/v1/models',Y,methods=[P]),Route('/v1/messages/count_tokens',Z,methods=[B]),Route('/v1/chat/completions',G,methods=[B]),Route('/v1/messages',G,methods=[B]),Route('/v1/responses',G,methods=[B]),Route('/v1beta/models/{rest:path}',G,methods=[B]),Route('/generate',X,methods=[B]),Route('/health',a,methods=[P])],lifespan=W)
255
- class BatchServer:
256
- def __init__(A,app,host:str,port:int):A._server=uvicorn.Server(uvicorn.Config(app,host=host,port=port,loop='asyncio',log_level='warning'));A.host=host;A.port=port
257
- def serve_forever(A):A._server.run()
258
- @property
259
- def started(self)->bool:return self._server.started
260
- def stop(A):A._server.should_exit=_B
261
- def make_batch_server(host:str,port:int,model,cache_dir:str=_U,*,system:Optional[str]=_A,tool_names:Optional[list]=_A,skips:Optional[list]=_A,think_tags:Optional[list]=_A,ram_seed_slots:int=4,trace_path:Optional[str]=_A)->BatchServer:A=make_batch_app(model,cache_dir=cache_dir,system=system,tool_names=tool_names,skips=skips,think_tags=think_tags,ram_seed_slots=ram_seed_slots,trace_path=trace_path);return BatchServer(A,host,port)
232
+ return JSONResponse({'text':''.join(N)})
233
+ async def T(_req):
234
+ E,C=A[I],A[B];G=sum(1 for A in E.cache_dir.glob('*.safetensors'))if E else 0;F={'status':'ok','model':D,'active_sequences':len(C.active)if C else 0,'checkpoint_files':G}
235
+ if C:F['counters']=C.tracer.snapshot()
236
+ return JSONResponse(F)
237
+ return N.make_async_app(model_name=D,ready=lambda:A[B]is not _A,encode_tok=lambda:A[H],submit=R,system=system,tool_names=tool_names,skips=E,think_tags=C,lifespan=O,health=T,extra_routes=[('/generate',['POST'],S)])
238
+ def make_server(host,port,model,*,cache_dir=_U,think_tags=_A,ram_seed_slots=4,trace_path=_A,fixed_port=_G):B=host;A=port;from.import main as C;A=C._find_port(B,A,fixed_port);E=make_batch_app(model,cache_dir=cache_dir,think_tags=think_tags,ram_seed_slots=ram_seed_slots,trace_path=trace_path);F=C.UvicornServer(E,B,A);D=f"http://{B}:{A}";logger.debug('batch server bound to %s',D);return F,D
262
239
  if __name__=='__main__':uvicorn.run(make_batch_app('mlx-community/Qwen3.5-4B-OptiQ-4bit'),host='0.0.0.0',port=8000)