@modular-prompt/driver 0.11.14 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +25 -0
  2. package/dist/content-utils.d.ts +9 -0
  3. package/dist/content-utils.d.ts.map +1 -1
  4. package/dist/content-utils.js +27 -0
  5. package/dist/content-utils.js.map +1 -1
  6. package/dist/mlx-ml/mlx-driver.d.ts +1 -1
  7. package/dist/mlx-ml/mlx-driver.d.ts.map +1 -1
  8. package/dist/mlx-ml/mlx-driver.js +36 -45
  9. package/dist/mlx-ml/mlx-driver.js.map +1 -1
  10. package/dist/mlx-ml/process/harmony-parser.d.ts +3 -0
  11. package/dist/mlx-ml/process/harmony-parser.d.ts.map +1 -0
  12. package/dist/mlx-ml/process/harmony-parser.js +175 -0
  13. package/dist/mlx-ml/process/harmony-parser.js.map +1 -0
  14. package/dist/mlx-ml/process/index.d.ts +1 -1
  15. package/dist/mlx-ml/process/index.d.ts.map +1 -1
  16. package/dist/mlx-ml/process/index.js +2 -2
  17. package/dist/mlx-ml/process/index.js.map +1 -1
  18. package/dist/mlx-ml/process/model-handlers.d.ts +9 -1
  19. package/dist/mlx-ml/process/model-handlers.d.ts.map +1 -1
  20. package/dist/mlx-ml/process/model-handlers.js +17 -0
  21. package/dist/mlx-ml/process/model-handlers.js.map +1 -1
  22. package/dist/mlx-ml/process/parameter-validator.d.ts.map +1 -1
  23. package/dist/mlx-ml/process/parameter-validator.js +10 -3
  24. package/dist/mlx-ml/process/parameter-validator.js.map +1 -1
  25. package/dist/mlx-ml/process/queue.d.ts +1 -1
  26. package/dist/mlx-ml/process/queue.d.ts.map +1 -1
  27. package/dist/mlx-ml/process/queue.js +3 -2
  28. package/dist/mlx-ml/process/queue.js.map +1 -1
  29. package/dist/mlx-ml/process/response-processor.d.ts +8 -0
  30. package/dist/mlx-ml/process/response-processor.d.ts.map +1 -0
  31. package/dist/mlx-ml/process/response-processor.js +2 -0
  32. package/dist/mlx-ml/process/response-processor.js.map +1 -0
  33. package/dist/mlx-ml/process/types.d.ts +1 -0
  34. package/dist/mlx-ml/process/types.d.ts.map +1 -1
  35. package/dist/mlx-ml/tool-call-parser.d.ts.map +1 -1
  36. package/dist/mlx-ml/tool-call-parser.js +0 -43
  37. package/dist/mlx-ml/tool-call-parser.js.map +1 -1
  38. package/dist/mlx-ml/types.d.ts +1 -0
  39. package/dist/mlx-ml/types.d.ts.map +1 -1
  40. package/dist/openai/openai-driver.d.ts +0 -2
  41. package/dist/openai/openai-driver.d.ts.map +1 -1
  42. package/dist/openai/openai-driver.js.map +1 -1
  43. package/dist/types.d.ts +7 -0
  44. package/dist/types.d.ts.map +1 -1
  45. package/package.json +3 -3
  46. package/src/mlx-ml/python/__main__.py +33 -9
  47. package/src/mlx-ml/python/example_basic.py +93 -0
  48. package/src/mlx-ml/python/example_tool_call.py +165 -0
  49. package/src/mlx-ml/python/pyproject.toml +2 -2
  50. package/src/mlx-ml/python/token_utils.py +12 -3
  51. package/src/mlx-ml/python/uv.lock +55 -38
@@ -0,0 +1,93 @@
1
+ # This file contains code to use LLM-jp-4 models with mlx-lm on Apple Silicon.
2
+
3
+ from mlx_lm import load, stream_generate
4
+ from mlx_lm.sample_utils import make_sampler
5
+
6
+
7
+ def main():
8
+ model, tokenizer = load(
9
+ # "llm-jp/llm-jp-4-8b-instruct",
10
+ "llm-jp/llm-jp-4-8b-thinking",
11
+ tokenizer_config={"trust_remote_code": True},
12
+ )
13
+
14
+ messages = [
15
+ {"role": "user", "content": "日本語で自己紹介してください。"},
16
+ ]
17
+
18
+ prompt: str = tokenizer.apply_chat_template(
19
+ messages,
20
+ tokenize=False,
21
+ add_generation_prompt=True,
22
+ reasoning_effort="medium",
23
+ )
24
+
25
+ print("--- Prompt ---")
26
+ print(prompt)
27
+
28
+ input_ids = tokenizer.encode(prompt)
29
+
30
+ print("--- Input IDs ---")
31
+ print(input_ids)
32
+
33
+ generated_ids: list[int] = []
34
+
35
+ sampler = make_sampler(temp=0.7, top_p=0.9)
36
+
37
+ for resp in stream_generate(
38
+ model, tokenizer, prompt=input_ids,
39
+ max_tokens=1024, sampler=sampler,
40
+ ):
41
+ generated_ids.append(resp.token)
42
+
43
+ print("--- Generated IDs ---")
44
+ print(generated_ids)
45
+
46
+ response = tokenizer.decode(generated_ids)
47
+
48
+ print("\n--- Response ---")
49
+ print(response)
50
+
51
+ parsed = tokenizer.parse_response(response)
52
+
53
+ print("\n--- Parsed Response ---")
54
+ print("Role:", parsed.get("role"))
55
+ print("Thinking:", parsed.get("thinking"))
56
+ print("Content:", parsed.get("content"))
57
+
58
+ # Harmony parser is bundled as the parse_harmony_message method of the tokenizer.
59
+ # This function accepts a list of token IDs (not strings)
60
+ # and returns a list of Harmony's message objects with split tokens.
61
+
62
+ # To correctly parse the response,
63
+ # we need to include the prefill tokens for the assistant's response.
64
+ response_prefill = tokenizer.encode("<|start|>assistant")
65
+ parsed_harmony = tokenizer.parse_harmony_message(response_prefill + generated_ids)
66
+
67
+ print("\n--- Parsed Harmony Messages ---")
68
+ for i, message in enumerate(parsed_harmony, start=1):
69
+ print(f"Message {i}:")
70
+
71
+ # The end type can be "END", "CALL", or "INCOMPLETE".
72
+ print(" End Type:", message.end)
73
+
74
+ if message.role:
75
+ print(" Role Tokens:", message.role.token_ids)
76
+ print(" Role Text:", repr(tokenizer.decode(message.role.token_ids)))
77
+ print(" Role Start Position:", message.role.start)
78
+ if message.channel:
79
+ print(" Channel Tokens:", message.channel.token_ids)
80
+ print(" Channel Text:", repr(tokenizer.decode(message.channel.token_ids)))
81
+ print(" Channel Start Position:", message.channel.start)
82
+ if message.constrain:
83
+ print(" Constrain Tokens:", message.constrain.token_ids)
84
+ print(" Constrain Text:", repr(tokenizer.decode(message.constrain.token_ids)))
85
+ print(" Constrain Start Position:", message.constrain.start)
86
+ if message.content:
87
+ print(" Content Tokens:", message.content.token_ids)
88
+ print(" Content Text:", repr(tokenizer.decode(message.content.token_ids)))
89
+ print(" Content Start Position:", message.content.start)
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()
@@ -0,0 +1,165 @@
1
+ # Tool call example using LLM-jp-4 with mlx-lm on Apple Silicon.
2
+
3
+ from mlx_lm import load, stream_generate
4
+ from mlx_lm.sample_utils import make_sampler
5
+
6
+
7
+ def generate_response(model, tokenizer, input_ids, sampler):
8
+ generated_ids: list[int] = []
9
+ for resp in stream_generate(
10
+ model, tokenizer, prompt=input_ids,
11
+ max_tokens=1024, sampler=sampler,
12
+ ):
13
+ generated_ids.append(resp.token)
14
+ return generated_ids
15
+
16
+
17
+ def main():
18
+ model, tokenizer = load(
19
+ # "llm-jp/llm-jp-4-8b-thinking",
20
+ # "llm-jp/llm-jp-4-8b-instruct",
21
+ # "mlx-community/llm-jp-4-32b-a3b-thinking-4bit",
22
+ "mlx-community/Qwen3.6-27B-4bit",
23
+ # tokenizer_config={"trust_remote_code": True},
24
+ )
25
+
26
+ tools = [
27
+ {
28
+ "type": "function",
29
+ "function": {
30
+ "name": "get_current_time",
31
+ "description": "現在の日時を取得する",
32
+ "parameters": {
33
+ "type": "object",
34
+ "properties": {},
35
+ },
36
+ },
37
+ },
38
+ {
39
+ "type": "function",
40
+ "function": {
41
+ "name": "get_weather",
42
+ "description": "指定された都市の現在の天気を取得する",
43
+ "parameters": {
44
+ "type": "object",
45
+ "required": ["city"],
46
+ "properties": {
47
+ "city": {
48
+ "type": "string",
49
+ "description": "都市名(例: 東京、大阪)",
50
+ },
51
+ },
52
+ },
53
+ },
54
+ },
55
+ ]
56
+
57
+ messages = [
58
+ {"role": "developer", "content": "必要に応じて応答をTool Callに切り替えてください。functionsで定義されている機能を呼び出すことができます。"},
59
+ # \nツール実行形式: <|start|>assistant to=functions.get_current_time<|channel|>commentary json<|message|>{"locate": "Asia/Tokyo"}<|call|>
60
+
61
+ # few-shot: tool call → tool response の例
62
+ {"role": "user", "content": "今何時?"},
63
+ {
64
+ "role": "assistant",
65
+ "tool_calls": [{
66
+ "function": {
67
+ "name": "get_current_time",
68
+ "arguments": {"locate": "Asia/Tokyo"},
69
+ },
70
+ }],
71
+ },
72
+ {
73
+ "role": "tool",
74
+ "content": {"datetime": "2026-04-24T15:30:00+09:00"},
75
+ },
76
+ {
77
+ "role": "assistant",
78
+ "content": "現在の時刻は15時30分です。",
79
+ },
80
+ # 本番のリクエスト
81
+ {"role": "user", "content": '東京の天気を教えてください。'},
82
+ ]
83
+
84
+ sampler = make_sampler(temp=0.7, top_p=0.9)
85
+
86
+ # --- Turn 1: tool call生成 ---
87
+ prompt: str = tokenizer.apply_chat_template(
88
+ messages,
89
+ tools=tools,
90
+ tokenize=False,
91
+ add_generation_prompt=True,
92
+ trust_remote_code=True,
93
+ reasoning_effort="middle",
94
+ )
95
+
96
+ print("=== Turn 1: Tool Call ===")
97
+ print("--- Prompt ---")
98
+ print(prompt)
99
+
100
+ input_ids = tokenizer.encode(prompt)
101
+ generated_ids = generate_response(model, tokenizer, input_ids, sampler)
102
+ response = tokenizer.decode(generated_ids)
103
+
104
+ print("\n--- Raw Response ---")
105
+ print(response)
106
+
107
+ # Harmony parserでtool callを解析
108
+ response_prefill = tokenizer.encode("<|start|>assistant")
109
+ parsed_harmony = tokenizer.parse_harmony_message(response_prefill + generated_ids)
110
+
111
+ print("\n--- Parsed Harmony Messages ---")
112
+ for i, message in enumerate(parsed_harmony, start=1):
113
+ print(f"Message {i}:")
114
+ print(" End Type:", message.end)
115
+ if message.role:
116
+ print(" Role:", repr(tokenizer.decode(message.role.token_ids)))
117
+ if message.channel:
118
+ print(" Channel:", repr(tokenizer.decode(message.channel.token_ids)))
119
+ if message.constrain:
120
+ print(" Constrain:", repr(tokenizer.decode(message.constrain.token_ids)))
121
+ if message.content:
122
+ print(" Content:", repr(tokenizer.decode(message.content.token_ids)))
123
+
124
+ # # --- Turn 2: tool resultを渡して最終応答 ---
125
+ # messages.append({
126
+ # "role": "assistant",
127
+ # "tool_calls": [{
128
+ # "function": {
129
+ # "name": "get_weather",
130
+ # "arguments": '{"city": "東京"}',
131
+ # },
132
+ # }],
133
+ # })
134
+ # messages.append({
135
+ # "role": "tool",
136
+ # "content": '{"city": "東京", "weather": "晴れ", "temperature": 22, "humidity": 45}',
137
+ # })
138
+
139
+ # prompt2: str = tokenizer.apply_chat_template(
140
+ # messages,
141
+ # tools=tools,
142
+ # tokenize=False,
143
+ # add_generation_prompt=True,
144
+ # )
145
+
146
+ # print("\n\n=== Turn 2: Final Response ===")
147
+ # print("--- Prompt ---")
148
+ # print(prompt2)
149
+
150
+ # input_ids2 = tokenizer.encode(prompt2)
151
+ # generated_ids2 = generate_response(model, tokenizer, input_ids2, sampler)
152
+ # response2 = tokenizer.decode(generated_ids2)
153
+
154
+ # print("\n--- Raw Response ---")
155
+ # print(response2)
156
+
157
+ # parsed = tokenizer.parse_response(response2)
158
+ # print("\n--- Parsed Response ---")
159
+ # print("Role:", parsed.get("role"))
160
+ # print("Thinking:", parsed.get("thinking"))
161
+ # print("Content:", parsed.get("content"))
162
+
163
+
164
+ if __name__ == "__main__":
165
+ main()
@@ -7,8 +7,8 @@ dependencies = [
7
7
  "flex==6.14.1",
8
8
  "hf-xet==1.2.0",
9
9
  "jinja2==3.1.6",
10
- "mlx==0.31.1; sys_platform == 'darwin'",
11
- "mlx-lm==0.31.2; sys_platform == 'darwin'",
10
+ "mlx>=0.31.2; sys_platform == 'darwin'",
11
+ "mlx-lm==0.31.3; sys_platform == 'darwin'",
12
12
  "mlx-vlm==0.4.4",
13
13
  "tokenizers==0.22.2",
14
14
  "torch==2.9.1",
@@ -152,7 +152,8 @@ def get_special_tokens(tokenizer):
152
152
 
153
153
  # ツール関連の単体トークン(追加)
154
154
  "tool_calls_marker": "[TOOL_CALLS]",
155
- "tool_call_end": "<|call|>",
155
+ # Harmony形式のcallトークン(tool_call_endとは異なる用途)
156
+ "harmony_call": "<|call|>",
156
157
  }
157
158
 
158
159
  # VLM processorではconvert_tokens_to_idsがない場合がある
@@ -254,8 +255,6 @@ def detect_tool_call_format(tokenizer):
254
255
  (r'<longcat_tool_call>', r'</longcat_tool_call>'),
255
256
  # <minimax:tool_call>...</minimax:tool_call>
256
257
  (r'<minimax:tool_call>', r'</minimax:tool_call>'),
257
- # context-1形式: to=functions.{name}...<|call|>
258
- (r'to=functions\.', r'<\|call\|>'),
259
258
  ]
260
259
 
261
260
  for start_pattern, end_pattern in tool_call_patterns:
@@ -266,6 +265,16 @@ def detect_tool_call_format(tokenizer):
266
265
  result["call_end"] = end_match.group(0)
267
266
  break
268
267
 
268
+ # Harmony形式の専用検出
269
+ # テンプレート内で "functions." と <|call|> が共存する場合
270
+ if "call_start" not in result:
271
+ has_functions = re.search(r'"functions\."', template)
272
+ has_call = re.search(r'<\|call\|>', template)
273
+ if has_functions and has_call:
274
+ result["tool_parser_type"] = "harmony"
275
+ result["call_start"] = "to=functions."
276
+ result["call_end"] = "<|call|>"
277
+
269
278
  # Mistral特殊ケース
270
279
  if "call_start" not in result:
271
280
  mistral_match = re.search(r'\[TOOL_CALLS\]', template)
@@ -761,30 +761,46 @@ wheels = [
761
761
  name = "mlx"
762
762
  version = "0.31.1"
763
763
  source = { registry = "https://pypi.org/simple" }
764
+ resolution-markers = [
765
+ "python_full_version >= '3.12' and sys_platform == 'win32'",
766
+ "python_full_version >= '3.12' and sys_platform == 'emscripten'",
767
+ "python_full_version == '3.11.*' and sys_platform == 'win32'",
768
+ "python_full_version == '3.11.*' and sys_platform == 'emscripten'",
769
+ ]
770
+
771
+ [[package]]
772
+ name = "mlx"
773
+ version = "0.31.2"
774
+ source = { registry = "https://pypi.org/simple" }
775
+ resolution-markers = [
776
+ "python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'",
777
+ "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'",
778
+ "python_full_version < '3.11'",
779
+ ]
764
780
  dependencies = [
765
781
  { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
766
782
  ]
767
783
  wheels = [
768
- { url = "https://files.pythonhosted.org/packages/9b/f9/f1663dafd45af02467f4f41777c13ec34b9104b2b0450d870c3f906285cd/mlx-0.31.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:bc46c911cc060d2eaf21b9e24a1712dc56763b660b53631b9057a32ab1c0271a", size = 574137, upload-time = "2026-03-12T02:15:54.996Z" },
769
- { url = "https://files.pythonhosted.org/packages/c6/26/1fd632f537a5160a21475a70aaef252090c62f9629f45ad20f5acfe810f3/mlx-0.31.1-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:fa132def5b3d959362077521c80f1fc80f64c45060d2940dc1d66a1aa19ce5f6", size = 574140, upload-time = "2026-03-12T02:15:56.709Z" },
770
- { url = "https://files.pythonhosted.org/packages/5c/c9/e790fa8ddc1b27fea7ba749699883f31c65e166b18e4598beab4574e4686/mlx-0.31.1-cp310-cp310-macosx_26_0_arm64.whl", hash = "sha256:877ff2f98debd035b922825a0d7e7e1be0959fc5ca1d24cb5020a23e510ff16d", size = 574124, upload-time = "2026-03-12T02:15:58.323Z" },
771
- { url = "https://files.pythonhosted.org/packages/b4/da/f7375fc2be05d026640c5ced085a9e71066a33100638e5762347dae5d680/mlx-0.31.1-cp310-cp310-manylinux_2_35_aarch64.whl", hash = "sha256:931c9316ec47b45ec0e737519f4f4c90eb69cbbdaaecadd6dd2ccdf1a85d4e61", size = 641428, upload-time = "2026-03-12T02:15:59.743Z" },
772
- { url = "https://files.pythonhosted.org/packages/1c/3f/ab060661d966d435e41212d4f6d6e9d1202da8b9043b1c18c343ab7d1b08/mlx-0.31.1-cp310-cp310-manylinux_2_35_x86_64.whl", hash = "sha256:dec00ce7b094d6bc2876996291fd76c9e28326bc1a9853440903f2a06946ce1f", size = 674521, upload-time = "2026-03-12T02:16:01.057Z" },
773
- { url = "https://files.pythonhosted.org/packages/75/32/25dc2eae1d6f867224ef2bca2c644e3e913fe8067991f8394c090b720e3e/mlx-0.31.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:8863835fb36c7c4f65008b1426ddb9ff7931a13c975e0ef58a40002ae8048922", size = 574311, upload-time = "2026-03-12T02:16:02.651Z" },
774
- { url = "https://files.pythonhosted.org/packages/9b/bf/c5aa1d1154f5a216139c8162cd3e6568b7eb427390d655f7f5ae3a1a61e7/mlx-0.31.1-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:0de504c1f1fe73b32fc3cf457b8eac30d1f7ce22440ef075c1970f96712e6fff", size = 574312, upload-time = "2026-03-12T02:16:04.231Z" },
775
- { url = "https://files.pythonhosted.org/packages/3a/88/ef57747552c9e9da0c28465d9266c05a0009b698d90fb0bc63eb81840b8d/mlx-0.31.1-cp311-cp311-macosx_26_0_arm64.whl", hash = "sha256:10715b895e1f3e984c2c54257b7db956ff8af1fa93255412794a3724fe2dd3b1", size = 574385, upload-time = "2026-03-12T02:16:05.528Z" },
776
- { url = "https://files.pythonhosted.org/packages/ac/51/dbea4bbe7a2e4cd05226965b34198d49459cfaef8b9b37b72f006a9811ab/mlx-0.31.1-cp311-cp311-manylinux_2_35_aarch64.whl", hash = "sha256:d065625ab3101adcd7f5824297243fe40a0615099a06f5597ab67284483aa2f8", size = 641347, upload-time = "2026-03-12T02:16:07.013Z" },
777
- { url = "https://files.pythonhosted.org/packages/c5/86/3db98e8805637fb56f078311d622e9500f5c9088f6d79a6e304ec8235b47/mlx-0.31.1-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:b2cf8502d9d64dc6851034fcd4a656cbb26be20c36f190f2971f4ac0caed89cb", size = 674769, upload-time = "2026-03-12T02:16:08.51Z" },
778
- { url = "https://files.pythonhosted.org/packages/38/29/71fe1f68756f515856e6930973c23245810d4aa3cd22fddd719d86a709dc/mlx-0.31.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8a63b31a398c9519f2bb0c81cf3865d9baca4ff573ffc31ead465d18286184e8", size = 574308, upload-time = "2026-03-12T02:16:10.256Z" },
779
- { url = "https://files.pythonhosted.org/packages/21/be/70654a2cee0d71fd10bd237a50a79d06ae51679a194db6a3b16c0c84e6a5/mlx-0.31.1-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:a7a9347df4dcc41f0d16ff70b65650820af4879f686534b233b16826a22afa00", size = 574309, upload-time = "2026-03-12T02:16:11.577Z" },
780
- { url = "https://files.pythonhosted.org/packages/ad/69/c7bc7b04f76b0cbd678f328011d1634bd0bcfc2da45aba06e084cb031127/mlx-0.31.1-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:6cdb797ea31787d1ce9e5be77991c4bd5cbf129ab15f7253b78e09737f535fce", size = 574289, upload-time = "2026-03-12T02:16:13.146Z" },
781
- { url = "https://files.pythonhosted.org/packages/55/f7/dcc129228faab4d406041d91413c5999250ab79da6fe5417ac84f1616ff1/mlx-0.31.1-cp312-cp312-manylinux_2_35_aarch64.whl", hash = "sha256:1ed1991c8e39f841d5756c0c543beb819763a2f80fba3f4b150bc6cad4d973de", size = 626439, upload-time = "2026-03-12T02:16:14.741Z" },
782
- { url = "https://files.pythonhosted.org/packages/90/1d/8b32e46ea98ab5c1c15cf1b37ac97af651977f84e72e1800412a700c51d9/mlx-0.31.1-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:195c5cb27328380287c0ffe9ef48f860ab75ec5d3dfce153d475dc2c99369708", size = 668679, upload-time = "2026-03-12T02:16:16.012Z" },
783
- { url = "https://files.pythonhosted.org/packages/44/45/04465da443634b23fb11670bbd2f7538b1ed43ffc5e0de44a95b3c29e9c1/mlx-0.31.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9a6d3410fc951bd28508fed9c1ab5d9903f6f6bb101c3a5d63d4191d49a384a1", size = 574268, upload-time = "2026-03-12T02:16:17.27Z" },
784
- { url = "https://files.pythonhosted.org/packages/85/7b/84956960356ff36e8c1bbed68fac96709e98e6a1adbc8e3d0ff71022d84e/mlx-0.31.1-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:20bd7ba19882603ac22711092d0e799f1ff7b5183c2c641d417dab4d2423d99e", size = 574265, upload-time = "2026-03-12T02:16:18.479Z" },
785
- { url = "https://files.pythonhosted.org/packages/86/01/d6f0ef5b8c0b390af08246d1301e9717dfb076b3920012b53105a888ed8c/mlx-0.31.1-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:4c4565d6f4f8ce295613ee342d313ee5ab0b0eab9a6272954450f8343f7876bc", size = 574172, upload-time = "2026-03-12T02:16:19.898Z" },
786
- { url = "https://files.pythonhosted.org/packages/df/05/eb29e9eb0cff9c7dfd872e26663e6e9512629730740e1db629086c80ac5a/mlx-0.31.1-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:9dc564a8b38b9aec279a1c7d34551068b1cc1f8e43b5ac044b56b2a9a4205195", size = 626558, upload-time = "2026-03-12T02:16:21.652Z" },
787
- { url = "https://files.pythonhosted.org/packages/25/45/ecb746fbb6acb75d03760e41cc7bd21c2e2b544528b3033f7d70402334ac/mlx-0.31.1-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:78f51ab929278366006ee7793dbb5c942b121542c793c33eb9b894a2ce8e27e1", size = 668625, upload-time = "2026-03-12T02:16:23.103Z" },
784
+ { url = "https://files.pythonhosted.org/packages/29/7c/c16d52494a1ba6d90443f31fa26bc810bf878d532dfa9a7a13f49ef9542d/mlx-0.31.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:b29cf940f34205f09bb552ac60465ae833c4ae640b52777c6d725ddbad8461ca", size = 586942, upload-time = "2026-04-22T03:14:21.97Z" },
785
+ { url = "https://files.pythonhosted.org/packages/74/da/1c7f3dc39b7bda65b0cafbaf1e58a35eea118622c6f4506c9a4294c9806e/mlx-0.31.2-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:ebdc47b87b4b0216ceab3b5961716804bba3107c16454b65ae51d0e0c059f298", size = 586942, upload-time = "2026-04-22T03:14:23.527Z" },
786
+ { url = "https://files.pythonhosted.org/packages/4c/e9/a8559389706d39f613620a8b6b42ed03cf3155a516b0762d355c5116fdab/mlx-0.31.2-cp310-cp310-macosx_26_0_arm64.whl", hash = "sha256:2a64db61b2840f28bae08354e6f999698e30381af201cc12354290673c96213b", size = 586804, upload-time = "2026-04-22T03:14:24.882Z" },
787
+ { url = "https://files.pythonhosted.org/packages/4d/4a/274ebee3783a37560cddc8e781ec3eefadd17f3f85a7dcd5df6f07d200d6/mlx-0.31.2-cp310-cp310-manylinux_2_35_aarch64.whl", hash = "sha256:e3e2818157371501de097887f371784227f9dd9c91e177f986db7b25319c55d7", size = 653252, upload-time = "2026-04-22T03:14:26.275Z" },
788
+ { url = "https://files.pythonhosted.org/packages/d6/c7/79283370001660102f5c5c772b649f69da02113609d927af35e747508320/mlx-0.31.2-cp310-cp310-manylinux_2_35_x86_64.whl", hash = "sha256:c71dff00cc1b363d542f111d9e8b7b59dadb65b29d027f798b71ea34da75b665", size = 692109, upload-time = "2026-04-22T03:14:28.05Z" },
789
+ { url = "https://files.pythonhosted.org/packages/94/89/1e77ec3ff380e8fb9e7258047374d31452a0f9828a0e370f127b07dd8288/mlx-0.31.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4a3f181b367d404e44a6bd68ef5eb573930809ac60cacd51d0c851c629b1b651", size = 586911, upload-time = "2026-04-22T03:14:29.675Z" },
790
+ { url = "https://files.pythonhosted.org/packages/6a/41/c1907f05f8a3fc54025fb78ad68d3c4a4b931664d03c0a24f7f431cc4087/mlx-0.31.2-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:70297cbef7479429f69c966bfed10da20a6f0c2aa997eec2b4f6ba1a07caf2ef", size = 586915, upload-time = "2026-04-22T03:14:31.403Z" },
791
+ { url = "https://files.pythonhosted.org/packages/97/b0/61ac2c14773c786fecbda28067b0207a0c654cb4d10c548808c51284d700/mlx-0.31.2-cp311-cp311-macosx_26_0_arm64.whl", hash = "sha256:c0ff158b7ac93a4b5659adbc70053498b30a5964fc45f78596398e056a96c36a", size = 587030, upload-time = "2026-04-22T03:14:32.961Z" },
792
+ { url = "https://files.pythonhosted.org/packages/de/53/e12feb7078ee472983555fcb1da4749a2bbbc8fc5b29b78c205b96d37d1e/mlx-0.31.2-cp311-cp311-manylinux_2_35_aarch64.whl", hash = "sha256:cd5d42b0b2bee7efe1b0680a7e302943dd33b92c879cffa0358ffdb5a4a8d27b", size = 652994, upload-time = "2026-04-22T03:14:34.691Z" },
793
+ { url = "https://files.pythonhosted.org/packages/c5/40/f92c8cdc9595bf24c7e483a3156bfe0cc99a5cf5545d8dba8e7fe000c10b/mlx-0.31.2-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:b368f7ede4238cc44076e4843820338c453c21ee50bd3ee26d4b182c179fd8e1", size = 692086, upload-time = "2026-04-22T03:14:36.45Z" },
794
+ { url = "https://files.pythonhosted.org/packages/c3/47/5f33906cb03d6a378a697cd2d2641a26b37dea17ee3d9124d7e39e8eca01/mlx-0.31.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:e5067aaf2be1f3d7bba5be52348775804f111173c1ed04639618fd713b1a530f", size = 584863, upload-time = "2026-04-22T03:14:38.211Z" },
795
+ { url = "https://files.pythonhosted.org/packages/08/e7/a851a451b1327af9fb4df3991b9ae87d066b6f6630e854af55c288b0995a/mlx-0.31.2-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:edb9797db7d852477ca1c99708058654ee860d4148fe5765f0d55528e2b1aa22", size = 584860, upload-time = "2026-04-22T03:14:39.746Z" },
796
+ { url = "https://files.pythonhosted.org/packages/3b/15/0d1dc0597644e5e7b011ca954ba0c47e13cd880a3b909b0c3f1b4d8bf8f1/mlx-0.31.2-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:51ca102db641b01e7cb083ce8ecb580e281530a141a7ca12544bb370641630ae", size = 584887, upload-time = "2026-04-22T03:14:41.585Z" },
797
+ { url = "https://files.pythonhosted.org/packages/5d/c3/00664239a98e8bd614733c4182cd402d2bacad2d7f79eca66562ac406870/mlx-0.31.2-cp312-cp312-manylinux_2_35_aarch64.whl", hash = "sha256:117c7583cae0ca107cd53c591cc34f8e75f97a505aa47088844b7dc0fc69dc67", size = 627863, upload-time = "2026-04-22T03:14:43.326Z" },
798
+ { url = "https://files.pythonhosted.org/packages/53/7b/af6cd73a79772af6f19eab2cb4c48eda23a9294d1650a4c1269a9996e532/mlx-0.31.2-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:99572133181481640a8bf8d449daf083816d0af3ee050c8adfc5bf45ceca91c6", size = 685090, upload-time = "2026-04-22T03:14:45.058Z" },
799
+ { url = "https://files.pythonhosted.org/packages/a3/3f/888f8664d4f8e23a1363a5f50024be5216e199ab7ad0ba20988c7ed6d729/mlx-0.31.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:1b3fb0dda955b0d552ce57bdd6f42b3309ab21b067e40587d6848443d307e91f", size = 584796, upload-time = "2026-04-22T03:14:47.215Z" },
800
+ { url = "https://files.pythonhosted.org/packages/dd/14/e9cd18b51f9e1dbcb060eec0fafc2d2428c8e1eacd9b0a02d7c5ce75b661/mlx-0.31.2-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:34b0171cd9eb5c43fdd82091f6135d6ccc5a065363a4a3e68fac64fb4e53d37c", size = 584790, upload-time = "2026-04-22T03:14:48.519Z" },
801
+ { url = "https://files.pythonhosted.org/packages/ca/20/c6c5fb998c7834d094b2bfb9f003b5246cb270f0266da055c55546c34999/mlx-0.31.2-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:c05981684279a8935d58b0dde3ea5b02d210c3bad3319aa0e9934ec2df165752", size = 584795, upload-time = "2026-04-22T03:14:49.904Z" },
802
+ { url = "https://files.pythonhosted.org/packages/0b/19/aca251d4c5f3532ce9c2c1e95ad76740d9c6c298f406f62d992f465b9be0/mlx-0.31.2-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:cd1f4189e5f1bc68735f44eb63ce98ae09d66ac75d7ab5b15a41afae7e9f0513", size = 627843, upload-time = "2026-04-22T03:14:51.351Z" },
803
+ { url = "https://files.pythonhosted.org/packages/3e/2b/b89364883b98f21c2fe29e52d4ac8bc2fa2fe0d79293b36ec421efc1854a/mlx-0.31.2-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:53c8d57ffa9ce77f8355663be05014c0dd37280e57f19126fb0a24389a30684b", size = 685064, upload-time = "2026-04-22T03:14:52.75Z" },
788
804
  ]
789
805
 
790
806
  [[package]]
@@ -795,7 +811,7 @@ dependencies = [
795
811
  { name = "flex" },
796
812
  { name = "hf-xet" },
797
813
  { name = "jinja2" },
798
- { name = "mlx", marker = "sys_platform == 'darwin'" },
814
+ { name = "mlx", version = "0.31.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
799
815
  { name = "mlx-lm", marker = "sys_platform == 'darwin'" },
800
816
  { name = "mlx-vlm" },
801
817
  { name = "tokenizers" },
@@ -809,8 +825,8 @@ requires-dist = [
809
825
  { name = "flex", specifier = "==6.14.1" },
810
826
  { name = "hf-xet", specifier = "==1.2.0" },
811
827
  { name = "jinja2", specifier = "==3.1.6" },
812
- { name = "mlx", marker = "sys_platform == 'darwin'", specifier = "==0.31.1" },
813
- { name = "mlx-lm", marker = "sys_platform == 'darwin'", specifier = "==0.31.2" },
828
+ { name = "mlx", marker = "sys_platform == 'darwin'", specifier = ">=0.31.2" },
829
+ { name = "mlx-lm", marker = "sys_platform == 'darwin'", specifier = "==0.31.3" },
814
830
  { name = "mlx-vlm", specifier = "==0.4.4" },
815
831
  { name = "tokenizers", specifier = "==0.22.2" },
816
832
  { name = "torch", specifier = "==2.9.1" },
@@ -820,11 +836,11 @@ requires-dist = [
820
836
 
821
837
  [[package]]
822
838
  name = "mlx-lm"
823
- version = "0.31.2"
839
+ version = "0.31.3"
824
840
  source = { registry = "https://pypi.org/simple" }
825
841
  dependencies = [
826
842
  { name = "jinja2" },
827
- { name = "mlx", marker = "sys_platform == 'darwin'" },
843
+ { name = "mlx", version = "0.31.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
828
844
  { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
829
845
  { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
830
846
  { name = "protobuf" },
@@ -832,19 +848,19 @@ dependencies = [
832
848
  { name = "sentencepiece" },
833
849
  { name = "transformers" },
834
850
  ]
835
- sdist = { url = "https://files.pythonhosted.org/packages/51/d7/66ac623c87f9ca834e49f8c79ff3be69179a6b5f23fc7b257fcd4fbb15bd/mlx_lm-0.31.2.tar.gz", hash = "sha256:2681b7652546d36128f43d6cc7698ec43529a1845a27350394d1843be63c652b", size = 301181, upload-time = "2026-04-07T21:36:51.229Z" }
851
+ sdist = { url = "https://files.pythonhosted.org/packages/84/94/9a38d6b0c6fcca995b9136c94eb7da1e9c5165652edf228b96b29960fa7a/mlx_lm-0.31.3.tar.gz", hash = "sha256:61eb0e3ba09444f77f874aff295401d7ccd20b39495cbbce0c782a15474ce733", size = 304318, upload-time = "2026-04-22T07:37:27.922Z" }
836
852
  wheels = [
837
- { url = "https://files.pythonhosted.org/packages/d5/67/63b032a4f11d1a634f26cedf4c5e0e33ea027bf189b1a10e1cac2653db92/mlx_lm-0.31.2-py3-none-any.whl", hash = "sha256:a90a7905031b2580b4923f92e0fb156ca287f452bb1a9e41a60dcbc971532d46", size = 407972, upload-time = "2026-04-07T21:36:49.859Z" },
853
+ { url = "https://files.pythonhosted.org/packages/90/02/9a67b8e4f87e3e2e5cd7b1ad79304b93c09a0db6af34bee75e6551c06c60/mlx_lm-0.31.3-py3-none-any.whl", hash = "sha256:758cfddf1180053b7613db76fad3d246a331a2a905808e1164a275621fc983b8", size = 408890, upload-time = "2026-04-22T07:37:25.965Z" },
838
854
  ]
839
855
 
840
856
  [[package]]
841
857
  name = "mlx-metal"
842
- version = "0.31.1"
858
+ version = "0.31.2"
843
859
  source = { registry = "https://pypi.org/simple" }
844
860
  wheels = [
845
- { url = "https://files.pythonhosted.org/packages/39/66/2313497fdbc7fbadf8e026c09366e3f049f9114e65ca4edc23cdb8699186/mlx_metal-0.31.1-py3-none-macosx_14_0_arm64.whl", hash = "sha256:70741174131dbf7fdd479cb730e06e08c358eac3bf7905d9e884e7960cfdd5b8", size = 38624074, upload-time = "2026-03-12T02:15:48.036Z" },
846
- { url = "https://files.pythonhosted.org/packages/c7/34/4c3c6890ce6095b2ab2ba2f5f15c9a7ba17208d47f8cacb572885a2dc0eb/mlx_metal-0.31.1-py3-none-macosx_15_0_arm64.whl", hash = "sha256:6c56bd8cd27743e635f5a90a22535af7c31bd22b4b126d46b6da2da52d72e413", size = 38618950, upload-time = "2026-03-12T02:15:51.908Z" },
847
- { url = "https://files.pythonhosted.org/packages/51/bc/987cb99e3aafb296aa11ce5133838a10eae8447edd53168d0804d4fb3a14/mlx_metal-0.31.1-py3-none-macosx_26_0_arm64.whl", hash = "sha256:e7324b7c56b519ae67c025d3ced07e5d35bc3a9f19d4c45fe4927f385148c59e", size = 49256543, upload-time = "2026-03-12T02:15:54.851Z" },
861
+ { url = "https://files.pythonhosted.org/packages/3f/69/fe3b783ebe999f3118234e1e940feb622518bfb1dea6ac5d13b1d36a8449/mlx_metal-0.31.2-py3-none-macosx_14_0_arm64.whl", hash = "sha256:b25385bcee18fc194092255b8b53b9a3d8489eb650e59160f1b57aadd07aa2dc", size = 40055588, upload-time = "2026-04-22T03:14:14.43Z" },
862
+ { url = "https://files.pythonhosted.org/packages/4f/5d/4c690d5b93c30ba002656c37363159d978705bf8eb801b8481840fb942c2/mlx_metal-0.31.2-py3-none-macosx_15_0_arm64.whl", hash = "sha256:e9d4e5fce6ca10a87a0e388597f99519ad594d09e674708b5312bd8bd4f5997d", size = 40053220, upload-time = "2026-04-22T03:14:18.048Z" },
863
+ { url = "https://files.pythonhosted.org/packages/99/82/11fd62a8d7a3e96e5c43220b17de0151e3f10101f8bb3b865f5bd9cdd074/mlx_metal-0.31.2-py3-none-macosx_26_0_arm64.whl", hash = "sha256:84ffb60ee503f03eb684f5fb168d5cff31e2a16b7f27c1731eaf7662bd6e9b46", size = 55792151, upload-time = "2026-04-22T03:14:22.059Z" },
848
864
  ]
849
865
 
850
866
  [[package]]
@@ -855,7 +871,8 @@ dependencies = [
855
871
  { name = "datasets" },
856
872
  { name = "fastapi" },
857
873
  { name = "miniaudio" },
858
- { name = "mlx" },
874
+ { name = "mlx", version = "0.31.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" },
875
+ { name = "mlx", version = "0.31.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
859
876
  { name = "mlx-lm" },
860
877
  { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
861
878
  { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -1201,7 +1218,7 @@ name = "nvidia-cudnn-cu12"
1201
1218
  version = "9.10.2.21"
1202
1219
  source = { registry = "https://pypi.org/simple" }
1203
1220
  dependencies = [
1204
- { name = "nvidia-cublas-cu12" },
1221
+ { name = "nvidia-cublas-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
1205
1222
  ]
1206
1223
  wheels = [
1207
1224
  { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
@@ -1212,7 +1229,7 @@ name = "nvidia-cufft-cu12"
1212
1229
  version = "11.3.3.83"
1213
1230
  source = { registry = "https://pypi.org/simple" }
1214
1231
  dependencies = [
1215
- { name = "nvidia-nvjitlink-cu12" },
1232
+ { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
1216
1233
  ]
1217
1234
  wheels = [
1218
1235
  { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
@@ -1239,9 +1256,9 @@ name = "nvidia-cusolver-cu12"
1239
1256
  version = "11.7.3.90"
1240
1257
  source = { registry = "https://pypi.org/simple" }
1241
1258
  dependencies = [
1242
- { name = "nvidia-cublas-cu12" },
1243
- { name = "nvidia-cusparse-cu12" },
1244
- { name = "nvidia-nvjitlink-cu12" },
1259
+ { name = "nvidia-cublas-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
1260
+ { name = "nvidia-cusparse-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
1261
+ { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
1245
1262
  ]
1246
1263
  wheels = [
1247
1264
  { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
@@ -1252,7 +1269,7 @@ name = "nvidia-cusparse-cu12"
1252
1269
  version = "12.5.8.93"
1253
1270
  source = { registry = "https://pypi.org/simple" }
1254
1271
  dependencies = [
1255
- { name = "nvidia-nvjitlink-cu12" },
1272
+ { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" },
1256
1273
  ]
1257
1274
  wheels = [
1258
1275
  { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },