mlx-code 0.0.35__tar.gz → 0.0.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mlx_code-0.0.35 → mlx_code-0.0.36}/PKG-INFO +2 -2
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/bats.py +46 -69
- mlx_code-0.0.36/mlx_code/main.py +533 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/mlxs.py +1 -0
- mlx_code-0.0.36/mlx_code/tfms.py +33 -0
- mlx_code-0.0.36/mlx_code/vlls.py +65 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/PKG-INFO +2 -2
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/SOURCES.txt +1 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/requires.txt +1 -1
- {mlx_code-0.0.35 → mlx_code-0.0.36}/setup.py +2 -2
- mlx_code-0.0.35/mlx_code/main.py +0 -513
- mlx_code-0.0.35/mlx_code/vlls.py +0 -88
- {mlx_code-0.0.35 → mlx_code-0.0.36}/LICENSE +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/README.md +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/__init__.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/apis.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/bare.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/bats_plan.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/bench_bats.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/gits.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/lsp_tool.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/mcb.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/mcb_tool.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/repl.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/stream_log.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/test_bats_plan.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/tools.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/tui.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/util.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/view_git.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/view_log.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code/web.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/dependency_links.txt +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/entry_points.txt +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/mlx_code.egg-info/top_level.txt +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/setup.cfg +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/tests/__init__.py +0 -0
- {mlx_code-0.0.35 → mlx_code-0.0.36}/tests/test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mlx-code
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.36
|
|
4
4
|
Summary: Coding Agent for Mac
|
|
5
5
|
Home-page: https://josefalbers.github.io/mlx-code/
|
|
6
6
|
Author: J Joe
|
|
@@ -13,7 +13,7 @@ Requires-Python: >=3.12.8
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Requires-Dist: mlx-lm>=0.31.3; platform_system == "Darwin"
|
|
16
|
-
Requires-Dist:
|
|
16
|
+
Requires-Dist: transformers; platform_system != "Darwin"
|
|
17
17
|
Requires-Dist: httpx
|
|
18
18
|
Requires-Dist: pydantic
|
|
19
19
|
Provides-Extra: all
|
|
@@ -11,9 +11,9 @@ _M='inserted'
|
|
|
11
11
|
_L='submit_ts'
|
|
12
12
|
_K='cancel'
|
|
13
13
|
_J='asyncio.Queue'
|
|
14
|
-
_I=
|
|
15
|
-
_H='
|
|
16
|
-
_G=
|
|
14
|
+
_I='seed_none'
|
|
15
|
+
_H='submitted'
|
|
16
|
+
_G=False
|
|
17
17
|
_F='insert_ts'
|
|
18
18
|
_E='rid'
|
|
19
19
|
_D='out'
|
|
@@ -36,7 +36,7 @@ DEFAULT_SKIPS=['(?m)^\\[SUGGESTION MODE[\\s\\S]*','(?m)^<system-reminder>[\\s\\S
|
|
|
36
36
|
PREFILL_STEP=2048
|
|
37
37
|
MAX_ENGINE_ERRORS=3
|
|
38
38
|
class Tracer:
|
|
39
|
-
_COUNTERS=
|
|
39
|
+
_COUNTERS=_H,_M,_N,_O,_P,_Q,_R,_I,_S,_T
|
|
40
40
|
def __init__(A,path:Optional[str]=_A):
|
|
41
41
|
B=path;A._f=open(B,'a',buffering=1)if B else _A;A._lock=threading.Lock();A._c={A:0 for A in A._COUNTERS}
|
|
42
42
|
if B:logger.info('[bats] tracing to %s',B)
|
|
@@ -54,10 +54,10 @@ class CheckpointStore:
|
|
|
54
54
|
def load(A,prefix_tokens):from mlx_lm.models.cache import load_prompt_cache as B;C,D=B(str(A.path(prefix_tokens)),return_metadata=_B);return C
|
|
55
55
|
def save(C,prefix_tokens,cache)->bool:
|
|
56
56
|
from mlx_lm.models.cache import save_prompt_cache as D;A=C.path(prefix_tokens)
|
|
57
|
-
if A.exists():return
|
|
57
|
+
if A.exists():return _G
|
|
58
58
|
B=A.with_name(A.stem+'.tmp.safetensors');D(str(B),cache);B.replace(A);return _B
|
|
59
59
|
def _make_think_guard(te:int,max_tokens:int,prompt_len:int):
|
|
60
|
-
F=prompt_len;D='closed';C='gen_tokens';from.main import is_stuck as H;I=int(max_tokens*.9);A={D:
|
|
60
|
+
F=prompt_len;D='closed';C='gen_tokens';from.main import is_stuck as H;I=int(max_tokens*.9);A={D:_G,C:[]}
|
|
61
61
|
def B(tokens,logits):
|
|
62
62
|
G=tokens;B=logits
|
|
63
63
|
if A[D]:return B
|
|
@@ -176,67 +176,52 @@ class Engine:
|
|
|
176
176
|
try:A.bg.remove(B)
|
|
177
177
|
except Exception:pass
|
|
178
178
|
def make_batch_app(model_name:str,cache_dir:str=_U,*,system:Optional[str]=_A,tool_names:Optional[list]=_A,skips:Optional[list]=_A,think_tags:Optional[list]=_A,ram_seed_slots:int=4,completion_batch_size:int=32,prefill_batch_size:int=8,trace_path:Optional[str]=_A)->Starlette:
|
|
179
|
-
|
|
179
|
+
M='submit';L='disk';K=ram_seed_slots;J='none';I='store';H='tok';G=trace_path;F='ram';E=skips;B='engine';D=model_name;C=think_tags;from.import main as N;E=DEFAULT_SKIPS if E is _A else E;C=['<think>','</think>']if C is _A else C;G=G or os.environ.get('BATS_TRACE');A:dict[str,Any]={B:_A,H:_A,I:_A,F:_A}
|
|
180
180
|
@asynccontextmanager
|
|
181
|
-
async def
|
|
182
|
-
from mlx_lm import load;from mlx_lm.tokenizer_utils import TokenizerWrapper as
|
|
183
|
-
if not isinstance(B,
|
|
181
|
+
async def O(_app):
|
|
182
|
+
from mlx_lm import load;from mlx_lm.tokenizer_utils import TokenizerWrapper as H;logger.info('[bats] loading model %r',D);M,B=load(D)
|
|
183
|
+
if not isinstance(B,H):B=H(B)
|
|
184
184
|
if _new_detokenizer(B)is _new_detokenizer(B):logger.warning('[bats] detokenizer factory returned a shared instance!')
|
|
185
|
-
|
|
186
|
-
if
|
|
187
|
-
|
|
185
|
+
E=B.convert_tokens_to_ids(C[1]);I=getattr(B,'unk_token_id',_A)
|
|
186
|
+
if E is _A or I is not _A and E==I:E=_A;logger.info('[bats] no %s token; doom-loop guard disabled',C[1])
|
|
187
|
+
J=CheckpointStore(D,cache_dir);L=RamSeedCache(K);N=Tracer(G);F=Engine(M,B,J,L,E,asyncio.get_running_loop(),N,completion_batch_size=completion_batch_size,prefill_batch_size=prefill_batch_size);F.start();A.update(engine=F,tok=B,store=J,ram=L);logger.info('[bats] ready (ram_seed_slots=%d)',K)
|
|
188
188
|
try:yield
|
|
189
189
|
finally:F.stop()
|
|
190
|
-
def
|
|
191
|
-
C=prompt;D,E=A[
|
|
192
|
-
def
|
|
193
|
-
B=choose_seed_ckpt(ckpts,
|
|
194
|
-
if B==0:return _A,0,
|
|
195
|
-
|
|
196
|
-
if
|
|
197
|
-
try:return D.load(
|
|
198
|
-
except Exception as
|
|
199
|
-
async def
|
|
200
|
-
from.import main as A;B=A.select_adapter(api,msg_id,in_tokens);E=initial_state;F=''
|
|
190
|
+
def P(prompt,ckpts):
|
|
191
|
+
C=prompt;D,E=A[I],A[F]
|
|
192
|
+
def K(c):A=C[:c];return E.get(hash_tokens(A))is not _A or D.path(A).exists()
|
|
193
|
+
B=choose_seed_ckpt(ckpts,K)
|
|
194
|
+
if B==0:return _A,0,J
|
|
195
|
+
G=C[:B];H=E.get(hash_tokens(G))
|
|
196
|
+
if H is not _A:return H,B,F
|
|
197
|
+
try:return D.load(G),B,L
|
|
198
|
+
except Exception as M:logger.info('[bats] checkpoint load failed (%s); falling back to recompute',M);return _A,0,J
|
|
199
|
+
async def Q(q,cancel):
|
|
201
200
|
try:
|
|
202
|
-
yield B.start()
|
|
203
201
|
while _B:
|
|
204
|
-
|
|
205
|
-
if
|
|
206
|
-
|
|
207
|
-
for D in G:yield D
|
|
208
|
-
for D in A.finish_sse(B,F):yield D
|
|
202
|
+
A=await q.get()
|
|
203
|
+
if A is _A:break
|
|
204
|
+
yield A
|
|
209
205
|
finally:cancel.set()
|
|
210
|
-
async def G(
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
if
|
|
221
|
-
|
|
222
|
-
async def X(request:Request):
|
|
223
|
-
P='messages';B=A[H]
|
|
224
|
-
if B is _A:return JSONResponse({D:R},status_code=503)
|
|
225
|
-
C=await request.json();I=A[K];J=int(C.get(T,256))
|
|
226
|
-
if P in C:L=I.apply_chat_template(C[P],tokenize=_I,add_generation_prompt=_B)
|
|
227
|
-
else:L=C.get('prompt','')
|
|
228
|
-
E=I.encode(L)
|
|
229
|
-
if not E:return JSONResponse({D:U},status_code=400)
|
|
230
|
-
M=uuid.uuid4().hex[:8];Q=time.perf_counter();B.tracer.count(_G);B.tracer.count(_H);B.tracer.event(V,rid=M,api='generate',prompt_tokens=len(E),n_ckpts=0,max_tokens=J);F:_J=asyncio.Queue();G=threading.Event();B.submit(_Req(list(E),[],_A,0,J,F,G,M,Q))
|
|
231
|
-
if C.get('stream',_B):
|
|
232
|
-
async def W():
|
|
206
|
+
async def R(api,prompt,ckpts,*,max_tokens,temperature,top_p,rid):H=max_tokens;G=rid;E=ckpts;D=prompt;C=A[B];R=time.perf_counter();C.tracer.count(_H);C.tracer.event(M,rid=G,api=api,prompt_tokens=len(D),n_ckpts=len(E),max_tokens=H);S=asyncio.get_running_loop();T=time.perf_counter();U,I,K=await S.run_in_executor(_A,P,D,E);C.tracer.count({F:_Q,L:_R,J:_I}[K]);C.tracer.event('seed',rid=G,seed_len=I,source=K,lookup_ms=round((time.perf_counter()-T)*1000,1));N:_J=asyncio.Queue();O=threading.Event();C.submit(_Req(D,E,U,I,H,N,O,G,R));return Q(N,O)
|
|
207
|
+
async def S(request:Request):
|
|
208
|
+
Q='messages';P='error';C=A[B]
|
|
209
|
+
if C is _A:return JSONResponse({P:'model not loaded'},status_code=503)
|
|
210
|
+
D=await request.json();I=A[H];J=int(D.get('max_tokens',256))
|
|
211
|
+
if Q in D:K=I.apply_chat_template(D[Q],tokenize=_G,add_generation_prompt=_B)
|
|
212
|
+
else:K=D.get('prompt','')
|
|
213
|
+
E=I.encode(K)
|
|
214
|
+
if not E:return JSONResponse({P:'empty prompt'},status_code=400)
|
|
215
|
+
L=uuid.uuid4().hex[:8];R=time.perf_counter();C.tracer.count(_H);C.tracer.count(_I);C.tracer.event(M,rid=L,api='generate',prompt_tokens=len(E),n_ckpts=0,max_tokens=J);F:_J=asyncio.Queue();G=threading.Event();C.submit(_Req(list(E),[],_A,0,J,F,G,L,R))
|
|
216
|
+
if D.get('stream',_B):
|
|
217
|
+
async def S():
|
|
233
218
|
try:
|
|
234
219
|
while _B:
|
|
235
220
|
A=await F.get()
|
|
236
221
|
if A is _A:break
|
|
237
222
|
yield A
|
|
238
223
|
finally:G.set()
|
|
239
|
-
return StreamingResponse(
|
|
224
|
+
return StreamingResponse(S(),media_type='text/plain')
|
|
240
225
|
N=[]
|
|
241
226
|
try:
|
|
242
227
|
while _B:
|
|
@@ -244,19 +229,11 @@ def make_batch_app(model_name:str,cache_dir:str=_U,*,system:Optional[str]=_A,too
|
|
|
244
229
|
if O is _A:break
|
|
245
230
|
N.append(O)
|
|
246
231
|
finally:G.set()
|
|
247
|
-
return JSONResponse({
|
|
248
|
-
async def
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
return Starlette(routes=[Route('/v1/models',Y,methods=[P]),Route('/v1/messages/count_tokens',Z,methods=[B]),Route('/v1/chat/completions',G,methods=[B]),Route('/v1/messages',G,methods=[B]),Route('/v1/responses',G,methods=[B]),Route('/v1beta/models/{rest:path}',G,methods=[B]),Route('/generate',X,methods=[B]),Route('/health',a,methods=[P])],lifespan=W)
|
|
255
|
-
class BatchServer:
|
|
256
|
-
def __init__(A,app,host:str,port:int):A._server=uvicorn.Server(uvicorn.Config(app,host=host,port=port,loop='asyncio',log_level='warning'));A.host=host;A.port=port
|
|
257
|
-
def serve_forever(A):A._server.run()
|
|
258
|
-
@property
|
|
259
|
-
def started(self)->bool:return self._server.started
|
|
260
|
-
def stop(A):A._server.should_exit=_B
|
|
261
|
-
def make_batch_server(host:str,port:int,model,cache_dir:str=_U,*,system:Optional[str]=_A,tool_names:Optional[list]=_A,skips:Optional[list]=_A,think_tags:Optional[list]=_A,ram_seed_slots:int=4,trace_path:Optional[str]=_A)->BatchServer:A=make_batch_app(model,cache_dir=cache_dir,system=system,tool_names=tool_names,skips=skips,think_tags=think_tags,ram_seed_slots=ram_seed_slots,trace_path=trace_path);return BatchServer(A,host,port)
|
|
232
|
+
return JSONResponse({'text':''.join(N)})
|
|
233
|
+
async def T(_req):
|
|
234
|
+
E,C=A[I],A[B];G=sum(1 for A in E.cache_dir.glob('*.safetensors'))if E else 0;F={'status':'ok','model':D,'active_sequences':len(C.active)if C else 0,'checkpoint_files':G}
|
|
235
|
+
if C:F['counters']=C.tracer.snapshot()
|
|
236
|
+
return JSONResponse(F)
|
|
237
|
+
return N.make_async_app(model_name=D,ready=lambda:A[B]is not _A,encode_tok=lambda:A[H],submit=R,system=system,tool_names=tool_names,skips=E,think_tags=C,lifespan=O,health=T,extra_routes=[('/generate',['POST'],S)])
|
|
238
|
+
def make_server(host,port,model,*,cache_dir=_U,think_tags=_A,ram_seed_slots=4,trace_path=_A,fixed_port=_G):B=host;A=port;from.import main as C;A=C._find_port(B,A,fixed_port);E=make_batch_app(model,cache_dir=cache_dir,think_tags=think_tags,ram_seed_slots=ram_seed_slots,trace_path=trace_path);F=C.UvicornServer(E,B,A);D=f"http://{B}:{A}";logger.debug('batch server bound to %s',D);return F,D
|
|
262
239
|
if __name__=='__main__':uvicorn.run(make_batch_app('mlx-community/Qwen3.5-4B-OptiQ-4bit'),host='0.0.0.0',port=8000)
|