mlx-ruby-lm 0.30.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +83 -0
- data/exe/mlx_lm +7 -0
- data/lib/mlx_lm/benchmark.rb +67 -0
- data/lib/mlx_lm/chat_template.rb +41 -0
- data/lib/mlx_lm/cli.rb +113 -0
- data/lib/mlx_lm/config.rb +30 -0
- data/lib/mlx_lm/convert_utils.rb +51 -0
- data/lib/mlx_lm/generate.rb +204 -0
- data/lib/mlx_lm/load_utils.rb +87 -0
- data/lib/mlx_lm/model_args.rb +54 -0
- data/lib/mlx_lm/models/activations.rb +46 -0
- data/lib/mlx_lm/models/afm7.rb +131 -0
- data/lib/mlx_lm/models/afmoe.rb +421 -0
- data/lib/mlx_lm/models/apertus.rb +179 -0
- data/lib/mlx_lm/models/baichuan_m1.rb +306 -0
- data/lib/mlx_lm/models/bailing_moe.rb +399 -0
- data/lib/mlx_lm/models/bailing_moe_linear.rb +91 -0
- data/lib/mlx_lm/models/bitlinear_layers.rb +108 -0
- data/lib/mlx_lm/models/bitnet.rb +176 -0
- data/lib/mlx_lm/models/cache.rb +792 -0
- data/lib/mlx_lm/models/cohere.rb +150 -0
- data/lib/mlx_lm/models/cohere2.rb +224 -0
- data/lib/mlx_lm/models/dbrx.rb +286 -0
- data/lib/mlx_lm/models/deepseek.rb +239 -0
- data/lib/mlx_lm/models/deepseek_v2.rb +108 -0
- data/lib/mlx_lm/models/deepseek_v3.rb +34 -0
- data/lib/mlx_lm/models/deepseek_v32.rb +45 -0
- data/lib/mlx_lm/models/dots1.rb +292 -0
- data/lib/mlx_lm/models/ernie4_5.rb +165 -0
- data/lib/mlx_lm/models/ernie4_5_moe.rb +97 -0
- data/lib/mlx_lm/models/exaone.rb +169 -0
- data/lib/mlx_lm/models/exaone4.rb +233 -0
- data/lib/mlx_lm/models/exaone_moe.rb +421 -0
- data/lib/mlx_lm/models/falcon_h1.rb +102 -0
- data/lib/mlx_lm/models/gated_delta.rb +136 -0
- data/lib/mlx_lm/models/gemma.rb +159 -0
- data/lib/mlx_lm/models/gemma2.rb +198 -0
- data/lib/mlx_lm/models/gemma3.rb +85 -0
- data/lib/mlx_lm/models/gemma3_text.rb +270 -0
- data/lib/mlx_lm/models/gemma3n.rb +79 -0
- data/lib/mlx_lm/models/glm.rb +164 -0
- data/lib/mlx_lm/models/glm4.rb +180 -0
- data/lib/mlx_lm/models/glm4_moe.rb +343 -0
- data/lib/mlx_lm/models/glm4_moe_lite.rb +131 -0
- data/lib/mlx_lm/models/glm_moe_dsa.rb +26 -0
- data/lib/mlx_lm/models/gpt2.rb +166 -0
- data/lib/mlx_lm/models/gpt_bigcode.rb +154 -0
- data/lib/mlx_lm/models/gpt_neox.rb +178 -0
- data/lib/mlx_lm/models/gpt_oss.rb +319 -0
- data/lib/mlx_lm/models/granite.rb +170 -0
- data/lib/mlx_lm/models/granitemoe.rb +58 -0
- data/lib/mlx_lm/models/granitemoehybrid.rb +178 -0
- data/lib/mlx_lm/models/helium.rb +158 -0
- data/lib/mlx_lm/models/hunyuan.rb +378 -0
- data/lib/mlx_lm/models/hunyuan_v1_dense.rb +235 -0
- data/lib/mlx_lm/models/internlm2.rb +160 -0
- data/lib/mlx_lm/models/internlm3.rb +237 -0
- data/lib/mlx_lm/models/iquestloopcoder.rb +261 -0
- data/lib/mlx_lm/models/jamba.rb +158 -0
- data/lib/mlx_lm/models/kimi_k25.rb +98 -0
- data/lib/mlx_lm/models/kimi_linear.rb +124 -0
- data/lib/mlx_lm/models/kimi_vl.rb +93 -0
- data/lib/mlx_lm/models/klear.rb +283 -0
- data/lib/mlx_lm/models/lfm2.rb +120 -0
- data/lib/mlx_lm/models/lfm2_moe.rb +421 -0
- data/lib/mlx_lm/models/lfm2_vl.rb +67 -0
- data/lib/mlx_lm/models/lille_130m.rb +148 -0
- data/lib/mlx_lm/models/llama.rb +183 -0
- data/lib/mlx_lm/models/llama4.rb +357 -0
- data/lib/mlx_lm/models/llama4_text.rb +195 -0
- data/lib/mlx_lm/models/longcat_flash.rb +153 -0
- data/lib/mlx_lm/models/longcat_flash_ngram.rb +137 -0
- data/lib/mlx_lm/models/mamba.rb +301 -0
- data/lib/mlx_lm/models/mamba2.rb +292 -0
- data/lib/mlx_lm/models/mimo.rb +174 -0
- data/lib/mlx_lm/models/mimo_v2_flash.rb +491 -0
- data/lib/mlx_lm/models/minicpm.rb +169 -0
- data/lib/mlx_lm/models/minicpm3.rb +237 -0
- data/lib/mlx_lm/models/minimax.rb +282 -0
- data/lib/mlx_lm/models/ministral3.rb +304 -0
- data/lib/mlx_lm/models/mistral3.rb +84 -0
- data/lib/mlx_lm/models/mixtral.rb +192 -0
- data/lib/mlx_lm/models/mla.rb +75 -0
- data/lib/mlx_lm/models/nanochat.rb +167 -0
- data/lib/mlx_lm/models/nemotron.rb +202 -0
- data/lib/mlx_lm/models/nemotron_h.rb +212 -0
- data/lib/mlx_lm/models/nemotron_nas.rb +404 -0
- data/lib/mlx_lm/models/olmo.rb +165 -0
- data/lib/mlx_lm/models/olmo2.rb +169 -0
- data/lib/mlx_lm/models/olmo3.rb +254 -0
- data/lib/mlx_lm/models/olmoe.rb +64 -0
- data/lib/mlx_lm/models/openelm.rb +208 -0
- data/lib/mlx_lm/models/phi.rb +156 -0
- data/lib/mlx_lm/models/phi3.rb +171 -0
- data/lib/mlx_lm/models/phi3small.rb +196 -0
- data/lib/mlx_lm/models/phimoe.rb +206 -0
- data/lib/mlx_lm/models/phixtral.rb +208 -0
- data/lib/mlx_lm/models/pipeline.rb +37 -0
- data/lib/mlx_lm/models/pixtral.rb +47 -0
- data/lib/mlx_lm/models/plamo.rb +169 -0
- data/lib/mlx_lm/models/plamo2.rb +173 -0
- data/lib/mlx_lm/models/qwen.rb +175 -0
- data/lib/mlx_lm/models/qwen2.rb +162 -0
- data/lib/mlx_lm/models/qwen2_moe.rb +189 -0
- data/lib/mlx_lm/models/qwen2_vl.rb +48 -0
- data/lib/mlx_lm/models/qwen3.rb +167 -0
- data/lib/mlx_lm/models/qwen3_5.rb +69 -0
- data/lib/mlx_lm/models/qwen3_5_moe.rb +54 -0
- data/lib/mlx_lm/models/qwen3_moe.rb +166 -0
- data/lib/mlx_lm/models/qwen3_next.rb +147 -0
- data/lib/mlx_lm/models/qwen3_vl.rb +48 -0
- data/lib/mlx_lm/models/qwen3_vl_moe.rb +92 -0
- data/lib/mlx_lm/models/recurrent_gemma.rb +444 -0
- data/lib/mlx_lm/models/rope_utils.rb +316 -0
- data/lib/mlx_lm/models/rwkv7.rb +101 -0
- data/lib/mlx_lm/models/seed_oss.rb +167 -0
- data/lib/mlx_lm/models/smollm3.rb +89 -0
- data/lib/mlx_lm/models/solar_open.rb +79 -0
- data/lib/mlx_lm/models/ssm.rb +162 -0
- data/lib/mlx_lm/models/stablelm.rb +160 -0
- data/lib/mlx_lm/models/starcoder2.rb +161 -0
- data/lib/mlx_lm/models/step3p5.rb +479 -0
- data/lib/mlx_lm/models/switch_layers.rb +221 -0
- data/lib/mlx_lm/models/telechat3.rb +192 -0
- data/lib/mlx_lm/models/youtu_llm.rb +230 -0
- data/lib/mlx_lm/models.rb +33 -0
- data/lib/mlx_lm/perplexity.rb +48 -0
- data/lib/mlx_lm/quantize.rb +131 -0
- data/lib/mlx_lm/sample_utils.rb +159 -0
- data/lib/mlx_lm/server.rb +190 -0
- data/lib/mlx_lm/tokenizer_utils.rb +158 -0
- data/lib/mlx_lm/tuner/lora.rb +165 -0
- data/lib/mlx_lm/version.rb +3 -0
- data/lib/mlx_lm/weight_utils.rb +170 -0
- data/lib/mlx_lm.rb +135 -0
- metadata +272 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
require "safetensors"
|
|
2
|
+
|
|
3
|
+
module MlxLm
|
|
4
|
+
module WeightUtils
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
DTYPE_UNPACK = {
|
|
8
|
+
"F32" => ["e*", :float32],
|
|
9
|
+
"F16" => ["e*", :float16], # handled specially
|
|
10
|
+
"BF16" => ["e*", :bfloat16], # handled specially
|
|
11
|
+
"F64" => ["E*", :float32],
|
|
12
|
+
"I8" => ["c*", :int8],
|
|
13
|
+
"I16" => ["s<*", :int16],
|
|
14
|
+
"I32" => ["l<*", :int32],
|
|
15
|
+
"I64" => ["q<*", :int64],
|
|
16
|
+
"U8" => ["C*", :uint8],
|
|
17
|
+
"U16" => ["S<*", :uint16],
|
|
18
|
+
"U32" => ["L<*", :uint32],
|
|
19
|
+
}.freeze
|
|
20
|
+
|
|
21
|
+
# Load a single safetensors file, returning { name => MLX::Core::Array }
|
|
22
|
+
def load_safetensors(path)
|
|
23
|
+
mx = MLX::Core
|
|
24
|
+
raw = File.binread(path)
|
|
25
|
+
tensors = Safetensors.deserialize(raw)
|
|
26
|
+
result = {}
|
|
27
|
+
tensors.each do |name, info|
|
|
28
|
+
result[name] = _tensor_to_mlx(info, mx)
|
|
29
|
+
end
|
|
30
|
+
result
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Load all model*.safetensors shards from a directory
|
|
34
|
+
def load_sharded_safetensors(directory)
|
|
35
|
+
pattern = File.join(directory, "model*.safetensors")
|
|
36
|
+
files = Dir.glob(pattern).sort
|
|
37
|
+
|
|
38
|
+
if files.empty?
|
|
39
|
+
raise "No safetensors found in #{directory}"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
weights = {}
|
|
43
|
+
files.each do |f|
|
|
44
|
+
weights.merge!(load_safetensors(f))
|
|
45
|
+
end
|
|
46
|
+
weights
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def _tensor_to_mlx(info, mx)
|
|
50
|
+
shape = info["shape"]
|
|
51
|
+
dtype_str = info["dtype"]
|
|
52
|
+
data = info["data"]
|
|
53
|
+
|
|
54
|
+
# For F32/float32, unpack as little-endian floats
|
|
55
|
+
if dtype_str == "F32" || dtype_str == "float32"
|
|
56
|
+
values = data.unpack("e*")
|
|
57
|
+
mx.array(values).reshape(shape)
|
|
58
|
+
elsif dtype_str == "F16"
|
|
59
|
+
# 16-bit float: unpack as uint16, create array as float32, then view as float16
|
|
60
|
+
values = data.unpack("S<*")
|
|
61
|
+
mx.array(values, dtype: mx.uint16).view(mx.float16).reshape(shape)
|
|
62
|
+
elsif dtype_str == "BF16"
|
|
63
|
+
values = data.unpack("S<*")
|
|
64
|
+
mx.array(values, dtype: mx.uint16).view(mx.bfloat16).reshape(shape)
|
|
65
|
+
elsif dtype_str == "I32" || dtype_str == "int32"
|
|
66
|
+
values = data.unpack("l<*")
|
|
67
|
+
mx.array(values, dtype: mx.int32).reshape(shape)
|
|
68
|
+
elsif dtype_str == "I64"
|
|
69
|
+
values = data.unpack("q<*")
|
|
70
|
+
mx.array(values, dtype: mx.int64).reshape(shape)
|
|
71
|
+
elsif dtype_str == "U8"
|
|
72
|
+
values = data.unpack("C*")
|
|
73
|
+
mx.array(values, dtype: mx.uint8).reshape(shape)
|
|
74
|
+
else
|
|
75
|
+
# Fallback: try F32
|
|
76
|
+
values = data.unpack("e*")
|
|
77
|
+
mx.array(values).reshape(shape)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Convert a flat weight dict like:
|
|
82
|
+
# { "model.layers.0.weight" => tensor, ... }
|
|
83
|
+
# into a nested hash/array structure like:
|
|
84
|
+
# { "model" => { "layers" => [{ "weight" => tensor }] } }
|
|
85
|
+
#
|
|
86
|
+
# Numeric path segments become array indices.
|
|
87
|
+
def tree_unflatten(flat)
|
|
88
|
+
root = {}
|
|
89
|
+
|
|
90
|
+
flat.each do |dotted_key, value|
|
|
91
|
+
parts = dotted_key.split(".")
|
|
92
|
+
node = root
|
|
93
|
+
|
|
94
|
+
parts.each_with_index do |part, i|
|
|
95
|
+
is_last = (i == parts.length - 1)
|
|
96
|
+
next_part = parts[i + 1] unless is_last
|
|
97
|
+
|
|
98
|
+
if is_last
|
|
99
|
+
_set_in_node(node, part, value)
|
|
100
|
+
else
|
|
101
|
+
node = _ensure_child(node, part, next_part)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
_finalize_arrays(root)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Set a value in the current node (hash or array)
|
|
110
|
+
def _set_in_node(node, key, value)
|
|
111
|
+
if node.is_a?(Hash)
|
|
112
|
+
if key.match?(/\A\d+\z/)
|
|
113
|
+
idx = key.to_i
|
|
114
|
+
node[idx] = value
|
|
115
|
+
else
|
|
116
|
+
node[key] = value
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Ensure a child container exists for the given key
|
|
122
|
+
def _ensure_child(node, key, next_key)
|
|
123
|
+
numeric_key = key.match?(/\A\d+\z/)
|
|
124
|
+
next_is_numeric = next_key && next_key.match?(/\A\d+\z/)
|
|
125
|
+
|
|
126
|
+
if numeric_key
|
|
127
|
+
idx = key.to_i
|
|
128
|
+
existing = node[idx]
|
|
129
|
+
if existing.nil?
|
|
130
|
+
child = next_is_numeric ? {} : {}
|
|
131
|
+
node[idx] = child
|
|
132
|
+
child
|
|
133
|
+
else
|
|
134
|
+
existing
|
|
135
|
+
end
|
|
136
|
+
else
|
|
137
|
+
existing = node[key]
|
|
138
|
+
if existing.nil?
|
|
139
|
+
child = {}
|
|
140
|
+
node[key] = child
|
|
141
|
+
child
|
|
142
|
+
else
|
|
143
|
+
existing
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Recursively convert hashes with all-integer keys into arrays
|
|
149
|
+
def _finalize_arrays(node)
|
|
150
|
+
return node unless node.is_a?(Hash)
|
|
151
|
+
|
|
152
|
+
# First, recurse into children
|
|
153
|
+
node.each do |k, v|
|
|
154
|
+
node[k] = _finalize_arrays(v)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Check if all keys are integers (indicating this should be an array)
|
|
158
|
+
if node.keys.all? { |k| k.is_a?(Integer) }
|
|
159
|
+
max_idx = node.keys.max
|
|
160
|
+
arr = Array.new(max_idx + 1)
|
|
161
|
+
node.each { |k, v| arr[k] = v }
|
|
162
|
+
arr
|
|
163
|
+
else
|
|
164
|
+
node
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
private_class_method :_set_in_node, :_ensure_child, :_finalize_arrays, :_tensor_to_mlx
|
|
169
|
+
end
|
|
170
|
+
end
|
data/lib/mlx_lm.rb
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
require_relative "mlx_lm/version"
|
|
2
|
+
require_relative "mlx_lm/model_args"
|
|
3
|
+
require_relative "mlx_lm/weight_utils"
|
|
4
|
+
require_relative "mlx_lm/config"
|
|
5
|
+
require_relative "mlx_lm/tokenizer_utils"
|
|
6
|
+
require_relative "mlx_lm/sample_utils"
|
|
7
|
+
require_relative "mlx_lm/models"
|
|
8
|
+
require_relative "mlx_lm/models/cache"
|
|
9
|
+
require_relative "mlx_lm/models/activations"
|
|
10
|
+
require_relative "mlx_lm/models/bitlinear_layers"
|
|
11
|
+
require_relative "mlx_lm/models/bitnet"
|
|
12
|
+
require_relative "mlx_lm/models/gated_delta"
|
|
13
|
+
require_relative "mlx_lm/models/pipeline"
|
|
14
|
+
require_relative "mlx_lm/models/rope_utils"
|
|
15
|
+
require_relative "mlx_lm/models/ssm"
|
|
16
|
+
require_relative "mlx_lm/models/mla"
|
|
17
|
+
require_relative "mlx_lm/models/llama"
|
|
18
|
+
require_relative "mlx_lm/models/gemma"
|
|
19
|
+
require_relative "mlx_lm/models/qwen2"
|
|
20
|
+
require_relative "mlx_lm/models/qwen2_vl"
|
|
21
|
+
require_relative "mlx_lm/models/qwen"
|
|
22
|
+
require_relative "mlx_lm/models/qwen3"
|
|
23
|
+
require_relative "mlx_lm/models/qwen3_vl"
|
|
24
|
+
require_relative "mlx_lm/models/qwen3_5"
|
|
25
|
+
require_relative "mlx_lm/models/qwen3_5_moe"
|
|
26
|
+
require_relative "mlx_lm/models/phi"
|
|
27
|
+
require_relative "mlx_lm/models/phi3"
|
|
28
|
+
require_relative "mlx_lm/models/exaone"
|
|
29
|
+
require_relative "mlx_lm/models/exaone4"
|
|
30
|
+
require_relative "mlx_lm/models/glm"
|
|
31
|
+
require_relative "mlx_lm/models/glm4"
|
|
32
|
+
require_relative "mlx_lm/models/helium"
|
|
33
|
+
require_relative "mlx_lm/models/olmo"
|
|
34
|
+
require_relative "mlx_lm/models/seed_oss"
|
|
35
|
+
require_relative "mlx_lm/models/starcoder2"
|
|
36
|
+
require_relative "mlx_lm/models/stablelm"
|
|
37
|
+
require_relative "mlx_lm/models/cohere"
|
|
38
|
+
require_relative "mlx_lm/models/cohere2"
|
|
39
|
+
require_relative "mlx_lm/models/pixtral"
|
|
40
|
+
require_relative "mlx_lm/models/gemma2"
|
|
41
|
+
require_relative "mlx_lm/models/gemma3_text"
|
|
42
|
+
require_relative "mlx_lm/models/gemma3"
|
|
43
|
+
require_relative "mlx_lm/models/gemma3n"
|
|
44
|
+
require_relative "mlx_lm/models/granite"
|
|
45
|
+
require_relative "mlx_lm/models/granitemoe"
|
|
46
|
+
require_relative "mlx_lm/models/olmo2"
|
|
47
|
+
require_relative "mlx_lm/models/olmoe"
|
|
48
|
+
require_relative "mlx_lm/models/openelm"
|
|
49
|
+
require_relative "mlx_lm/models/gpt_neox"
|
|
50
|
+
require_relative "mlx_lm/models/switch_layers"
|
|
51
|
+
require_relative "mlx_lm/models/qwen3_moe"
|
|
52
|
+
require_relative "mlx_lm/models/qwen3_vl_moe"
|
|
53
|
+
require_relative "mlx_lm/models/mixtral"
|
|
54
|
+
require_relative "mlx_lm/models/phixtral"
|
|
55
|
+
require_relative "mlx_lm/models/mistral3"
|
|
56
|
+
require_relative "mlx_lm/models/minicpm"
|
|
57
|
+
require_relative "mlx_lm/models/minicpm3"
|
|
58
|
+
require_relative "mlx_lm/models/nanochat"
|
|
59
|
+
require_relative "mlx_lm/models/smollm3"
|
|
60
|
+
require_relative "mlx_lm/models/lfm2"
|
|
61
|
+
require_relative "mlx_lm/models/lfm2_vl"
|
|
62
|
+
require_relative "mlx_lm/models/deepseek"
|
|
63
|
+
require_relative "mlx_lm/models/deepseek_v2"
|
|
64
|
+
require_relative "mlx_lm/models/deepseek_v3"
|
|
65
|
+
require_relative "mlx_lm/models/deepseek_v32"
|
|
66
|
+
require_relative "mlx_lm/models/glm_moe_dsa"
|
|
67
|
+
require_relative "mlx_lm/models/kimi_k25"
|
|
68
|
+
require_relative "mlx_lm/models/kimi_vl"
|
|
69
|
+
require_relative "mlx_lm/models/internlm2"
|
|
70
|
+
require_relative "mlx_lm/models/internlm3"
|
|
71
|
+
require_relative "mlx_lm/models/telechat3"
|
|
72
|
+
require_relative "mlx_lm/models/olmo3"
|
|
73
|
+
require_relative "mlx_lm/models/gpt2"
|
|
74
|
+
require_relative "mlx_lm/models/gpt_bigcode"
|
|
75
|
+
require_relative "mlx_lm/models/nemotron"
|
|
76
|
+
require_relative "mlx_lm/models/apertus"
|
|
77
|
+
require_relative "mlx_lm/models/youtu_llm"
|
|
78
|
+
require_relative "mlx_lm/models/ernie4_5"
|
|
79
|
+
require_relative "mlx_lm/models/ernie4_5_moe"
|
|
80
|
+
require_relative "mlx_lm/models/baichuan_m1"
|
|
81
|
+
require_relative "mlx_lm/models/solar_open"
|
|
82
|
+
require_relative "mlx_lm/models/lille_130m"
|
|
83
|
+
require_relative "mlx_lm/models/mimo"
|
|
84
|
+
require_relative "mlx_lm/models/qwen2_moe"
|
|
85
|
+
require_relative "mlx_lm/models/phimoe"
|
|
86
|
+
require_relative "mlx_lm/models/llama4_text"
|
|
87
|
+
require_relative "mlx_lm/models/plamo"
|
|
88
|
+
require_relative "mlx_lm/models/mamba"
|
|
89
|
+
require_relative "mlx_lm/models/mamba2"
|
|
90
|
+
require_relative "mlx_lm/models/hunyuan_v1_dense"
|
|
91
|
+
require_relative "mlx_lm/models/dbrx"
|
|
92
|
+
require_relative "mlx_lm/models/klear"
|
|
93
|
+
require_relative "mlx_lm/models/iquestloopcoder"
|
|
94
|
+
require_relative "mlx_lm/models/phi3small"
|
|
95
|
+
require_relative "mlx_lm/models/dots1"
|
|
96
|
+
require_relative "mlx_lm/models/llama4"
|
|
97
|
+
require_relative "mlx_lm/models/ministral3"
|
|
98
|
+
require_relative "mlx_lm/models/hunyuan"
|
|
99
|
+
require_relative "mlx_lm/models/gpt_oss"
|
|
100
|
+
require_relative "mlx_lm/models/mimo_v2_flash"
|
|
101
|
+
require_relative "mlx_lm/models/lfm2_moe"
|
|
102
|
+
require_relative "mlx_lm/models/afmoe"
|
|
103
|
+
require_relative "mlx_lm/models/bailing_moe"
|
|
104
|
+
require_relative "mlx_lm/models/exaone_moe"
|
|
105
|
+
require_relative "mlx_lm/models/glm4_moe"
|
|
106
|
+
require_relative "mlx_lm/models/minimax"
|
|
107
|
+
require_relative "mlx_lm/models/nemotron_nas"
|
|
108
|
+
require_relative "mlx_lm/models/recurrent_gemma"
|
|
109
|
+
require_relative "mlx_lm/models/step3p5"
|
|
110
|
+
require_relative "mlx_lm/models/afm7"
|
|
111
|
+
require_relative "mlx_lm/models/bailing_moe_linear"
|
|
112
|
+
require_relative "mlx_lm/models/falcon_h1"
|
|
113
|
+
require_relative "mlx_lm/models/glm4_moe_lite"
|
|
114
|
+
require_relative "mlx_lm/models/granitemoehybrid"
|
|
115
|
+
require_relative "mlx_lm/models/jamba"
|
|
116
|
+
require_relative "mlx_lm/models/kimi_linear"
|
|
117
|
+
require_relative "mlx_lm/models/longcat_flash"
|
|
118
|
+
require_relative "mlx_lm/models/longcat_flash_ngram"
|
|
119
|
+
require_relative "mlx_lm/models/nemotron_h"
|
|
120
|
+
require_relative "mlx_lm/models/plamo2"
|
|
121
|
+
require_relative "mlx_lm/models/qwen3_next"
|
|
122
|
+
require_relative "mlx_lm/models/rwkv7"
|
|
123
|
+
require_relative "mlx_lm/generate"
|
|
124
|
+
require_relative "mlx_lm/quantize"
|
|
125
|
+
require_relative "mlx_lm/load_utils"
|
|
126
|
+
require_relative "mlx_lm/tuner/lora"
|
|
127
|
+
require_relative "mlx_lm/chat_template"
|
|
128
|
+
require_relative "mlx_lm/cli"
|
|
129
|
+
require_relative "mlx_lm/server"
|
|
130
|
+
require_relative "mlx_lm/perplexity"
|
|
131
|
+
require_relative "mlx_lm/benchmark"
|
|
132
|
+
require_relative "mlx_lm/convert_utils"
|
|
133
|
+
|
|
134
|
+
module MlxLm
|
|
135
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: mlx-ruby-lm
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.30.7.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Alex Skryl
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: mlx
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: 0.30.7.5
|
|
19
|
+
- - "<"
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '1.0'
|
|
22
|
+
type: :runtime
|
|
23
|
+
prerelease: false
|
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
+
requirements:
|
|
26
|
+
- - ">="
|
|
27
|
+
- !ruby/object:Gem::Version
|
|
28
|
+
version: 0.30.7.5
|
|
29
|
+
- - "<"
|
|
30
|
+
- !ruby/object:Gem::Version
|
|
31
|
+
version: '1.0'
|
|
32
|
+
- !ruby/object:Gem::Dependency
|
|
33
|
+
name: safetensors
|
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
|
35
|
+
requirements:
|
|
36
|
+
- - "~>"
|
|
37
|
+
- !ruby/object:Gem::Version
|
|
38
|
+
version: '0.2'
|
|
39
|
+
type: :runtime
|
|
40
|
+
prerelease: false
|
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
42
|
+
requirements:
|
|
43
|
+
- - "~>"
|
|
44
|
+
- !ruby/object:Gem::Version
|
|
45
|
+
version: '0.2'
|
|
46
|
+
- !ruby/object:Gem::Dependency
|
|
47
|
+
name: tokenizers
|
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
|
49
|
+
requirements:
|
|
50
|
+
- - "~>"
|
|
51
|
+
- !ruby/object:Gem::Version
|
|
52
|
+
version: '0.6'
|
|
53
|
+
type: :runtime
|
|
54
|
+
prerelease: false
|
|
55
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
56
|
+
requirements:
|
|
57
|
+
- - "~>"
|
|
58
|
+
- !ruby/object:Gem::Version
|
|
59
|
+
version: '0.6'
|
|
60
|
+
- !ruby/object:Gem::Dependency
|
|
61
|
+
name: minitest
|
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
|
63
|
+
requirements:
|
|
64
|
+
- - "~>"
|
|
65
|
+
- !ruby/object:Gem::Version
|
|
66
|
+
version: '5.20'
|
|
67
|
+
type: :development
|
|
68
|
+
prerelease: false
|
|
69
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
70
|
+
requirements:
|
|
71
|
+
- - "~>"
|
|
72
|
+
- !ruby/object:Gem::Version
|
|
73
|
+
version: '5.20'
|
|
74
|
+
- !ruby/object:Gem::Dependency
|
|
75
|
+
name: ostruct
|
|
76
|
+
requirement: !ruby/object:Gem::Requirement
|
|
77
|
+
requirements:
|
|
78
|
+
- - ">="
|
|
79
|
+
- !ruby/object:Gem::Version
|
|
80
|
+
version: '0'
|
|
81
|
+
type: :development
|
|
82
|
+
prerelease: false
|
|
83
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
84
|
+
requirements:
|
|
85
|
+
- - ">="
|
|
86
|
+
- !ruby/object:Gem::Version
|
|
87
|
+
version: '0'
|
|
88
|
+
- !ruby/object:Gem::Dependency
|
|
89
|
+
name: rake
|
|
90
|
+
requirement: !ruby/object:Gem::Requirement
|
|
91
|
+
requirements:
|
|
92
|
+
- - "~>"
|
|
93
|
+
- !ruby/object:Gem::Version
|
|
94
|
+
version: '13.0'
|
|
95
|
+
type: :development
|
|
96
|
+
prerelease: false
|
|
97
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
98
|
+
requirements:
|
|
99
|
+
- - "~>"
|
|
100
|
+
- !ruby/object:Gem::Version
|
|
101
|
+
version: '13.0'
|
|
102
|
+
description: A Ruby port of mlx-lm providing large language model inference, quantization,
|
|
103
|
+
LoRA fine-tuning, and an OpenAI-compatible server built on the mlx gem. Supports
|
|
104
|
+
Llama, Gemma, Qwen2, Phi3, Mixtral, DeepSeek, and many more architectures.
|
|
105
|
+
email:
|
|
106
|
+
- rut216@gmail.com
|
|
107
|
+
executables:
|
|
108
|
+
- mlx_lm
|
|
109
|
+
extensions: []
|
|
110
|
+
extra_rdoc_files: []
|
|
111
|
+
files:
|
|
112
|
+
- LICENSE.txt
|
|
113
|
+
- README.md
|
|
114
|
+
- exe/mlx_lm
|
|
115
|
+
- lib/mlx_lm.rb
|
|
116
|
+
- lib/mlx_lm/benchmark.rb
|
|
117
|
+
- lib/mlx_lm/chat_template.rb
|
|
118
|
+
- lib/mlx_lm/cli.rb
|
|
119
|
+
- lib/mlx_lm/config.rb
|
|
120
|
+
- lib/mlx_lm/convert_utils.rb
|
|
121
|
+
- lib/mlx_lm/generate.rb
|
|
122
|
+
- lib/mlx_lm/load_utils.rb
|
|
123
|
+
- lib/mlx_lm/model_args.rb
|
|
124
|
+
- lib/mlx_lm/models.rb
|
|
125
|
+
- lib/mlx_lm/models/activations.rb
|
|
126
|
+
- lib/mlx_lm/models/afm7.rb
|
|
127
|
+
- lib/mlx_lm/models/afmoe.rb
|
|
128
|
+
- lib/mlx_lm/models/apertus.rb
|
|
129
|
+
- lib/mlx_lm/models/baichuan_m1.rb
|
|
130
|
+
- lib/mlx_lm/models/bailing_moe.rb
|
|
131
|
+
- lib/mlx_lm/models/bailing_moe_linear.rb
|
|
132
|
+
- lib/mlx_lm/models/bitlinear_layers.rb
|
|
133
|
+
- lib/mlx_lm/models/bitnet.rb
|
|
134
|
+
- lib/mlx_lm/models/cache.rb
|
|
135
|
+
- lib/mlx_lm/models/cohere.rb
|
|
136
|
+
- lib/mlx_lm/models/cohere2.rb
|
|
137
|
+
- lib/mlx_lm/models/dbrx.rb
|
|
138
|
+
- lib/mlx_lm/models/deepseek.rb
|
|
139
|
+
- lib/mlx_lm/models/deepseek_v2.rb
|
|
140
|
+
- lib/mlx_lm/models/deepseek_v3.rb
|
|
141
|
+
- lib/mlx_lm/models/deepseek_v32.rb
|
|
142
|
+
- lib/mlx_lm/models/dots1.rb
|
|
143
|
+
- lib/mlx_lm/models/ernie4_5.rb
|
|
144
|
+
- lib/mlx_lm/models/ernie4_5_moe.rb
|
|
145
|
+
- lib/mlx_lm/models/exaone.rb
|
|
146
|
+
- lib/mlx_lm/models/exaone4.rb
|
|
147
|
+
- lib/mlx_lm/models/exaone_moe.rb
|
|
148
|
+
- lib/mlx_lm/models/falcon_h1.rb
|
|
149
|
+
- lib/mlx_lm/models/gated_delta.rb
|
|
150
|
+
- lib/mlx_lm/models/gemma.rb
|
|
151
|
+
- lib/mlx_lm/models/gemma2.rb
|
|
152
|
+
- lib/mlx_lm/models/gemma3.rb
|
|
153
|
+
- lib/mlx_lm/models/gemma3_text.rb
|
|
154
|
+
- lib/mlx_lm/models/gemma3n.rb
|
|
155
|
+
- lib/mlx_lm/models/glm.rb
|
|
156
|
+
- lib/mlx_lm/models/glm4.rb
|
|
157
|
+
- lib/mlx_lm/models/glm4_moe.rb
|
|
158
|
+
- lib/mlx_lm/models/glm4_moe_lite.rb
|
|
159
|
+
- lib/mlx_lm/models/glm_moe_dsa.rb
|
|
160
|
+
- lib/mlx_lm/models/gpt2.rb
|
|
161
|
+
- lib/mlx_lm/models/gpt_bigcode.rb
|
|
162
|
+
- lib/mlx_lm/models/gpt_neox.rb
|
|
163
|
+
- lib/mlx_lm/models/gpt_oss.rb
|
|
164
|
+
- lib/mlx_lm/models/granite.rb
|
|
165
|
+
- lib/mlx_lm/models/granitemoe.rb
|
|
166
|
+
- lib/mlx_lm/models/granitemoehybrid.rb
|
|
167
|
+
- lib/mlx_lm/models/helium.rb
|
|
168
|
+
- lib/mlx_lm/models/hunyuan.rb
|
|
169
|
+
- lib/mlx_lm/models/hunyuan_v1_dense.rb
|
|
170
|
+
- lib/mlx_lm/models/internlm2.rb
|
|
171
|
+
- lib/mlx_lm/models/internlm3.rb
|
|
172
|
+
- lib/mlx_lm/models/iquestloopcoder.rb
|
|
173
|
+
- lib/mlx_lm/models/jamba.rb
|
|
174
|
+
- lib/mlx_lm/models/kimi_k25.rb
|
|
175
|
+
- lib/mlx_lm/models/kimi_linear.rb
|
|
176
|
+
- lib/mlx_lm/models/kimi_vl.rb
|
|
177
|
+
- lib/mlx_lm/models/klear.rb
|
|
178
|
+
- lib/mlx_lm/models/lfm2.rb
|
|
179
|
+
- lib/mlx_lm/models/lfm2_moe.rb
|
|
180
|
+
- lib/mlx_lm/models/lfm2_vl.rb
|
|
181
|
+
- lib/mlx_lm/models/lille_130m.rb
|
|
182
|
+
- lib/mlx_lm/models/llama.rb
|
|
183
|
+
- lib/mlx_lm/models/llama4.rb
|
|
184
|
+
- lib/mlx_lm/models/llama4_text.rb
|
|
185
|
+
- lib/mlx_lm/models/longcat_flash.rb
|
|
186
|
+
- lib/mlx_lm/models/longcat_flash_ngram.rb
|
|
187
|
+
- lib/mlx_lm/models/mamba.rb
|
|
188
|
+
- lib/mlx_lm/models/mamba2.rb
|
|
189
|
+
- lib/mlx_lm/models/mimo.rb
|
|
190
|
+
- lib/mlx_lm/models/mimo_v2_flash.rb
|
|
191
|
+
- lib/mlx_lm/models/minicpm.rb
|
|
192
|
+
- lib/mlx_lm/models/minicpm3.rb
|
|
193
|
+
- lib/mlx_lm/models/minimax.rb
|
|
194
|
+
- lib/mlx_lm/models/ministral3.rb
|
|
195
|
+
- lib/mlx_lm/models/mistral3.rb
|
|
196
|
+
- lib/mlx_lm/models/mixtral.rb
|
|
197
|
+
- lib/mlx_lm/models/mla.rb
|
|
198
|
+
- lib/mlx_lm/models/nanochat.rb
|
|
199
|
+
- lib/mlx_lm/models/nemotron.rb
|
|
200
|
+
- lib/mlx_lm/models/nemotron_h.rb
|
|
201
|
+
- lib/mlx_lm/models/nemotron_nas.rb
|
|
202
|
+
- lib/mlx_lm/models/olmo.rb
|
|
203
|
+
- lib/mlx_lm/models/olmo2.rb
|
|
204
|
+
- lib/mlx_lm/models/olmo3.rb
|
|
205
|
+
- lib/mlx_lm/models/olmoe.rb
|
|
206
|
+
- lib/mlx_lm/models/openelm.rb
|
|
207
|
+
- lib/mlx_lm/models/phi.rb
|
|
208
|
+
- lib/mlx_lm/models/phi3.rb
|
|
209
|
+
- lib/mlx_lm/models/phi3small.rb
|
|
210
|
+
- lib/mlx_lm/models/phimoe.rb
|
|
211
|
+
- lib/mlx_lm/models/phixtral.rb
|
|
212
|
+
- lib/mlx_lm/models/pipeline.rb
|
|
213
|
+
- lib/mlx_lm/models/pixtral.rb
|
|
214
|
+
- lib/mlx_lm/models/plamo.rb
|
|
215
|
+
- lib/mlx_lm/models/plamo2.rb
|
|
216
|
+
- lib/mlx_lm/models/qwen.rb
|
|
217
|
+
- lib/mlx_lm/models/qwen2.rb
|
|
218
|
+
- lib/mlx_lm/models/qwen2_moe.rb
|
|
219
|
+
- lib/mlx_lm/models/qwen2_vl.rb
|
|
220
|
+
- lib/mlx_lm/models/qwen3.rb
|
|
221
|
+
- lib/mlx_lm/models/qwen3_5.rb
|
|
222
|
+
- lib/mlx_lm/models/qwen3_5_moe.rb
|
|
223
|
+
- lib/mlx_lm/models/qwen3_moe.rb
|
|
224
|
+
- lib/mlx_lm/models/qwen3_next.rb
|
|
225
|
+
- lib/mlx_lm/models/qwen3_vl.rb
|
|
226
|
+
- lib/mlx_lm/models/qwen3_vl_moe.rb
|
|
227
|
+
- lib/mlx_lm/models/recurrent_gemma.rb
|
|
228
|
+
- lib/mlx_lm/models/rope_utils.rb
|
|
229
|
+
- lib/mlx_lm/models/rwkv7.rb
|
|
230
|
+
- lib/mlx_lm/models/seed_oss.rb
|
|
231
|
+
- lib/mlx_lm/models/smollm3.rb
|
|
232
|
+
- lib/mlx_lm/models/solar_open.rb
|
|
233
|
+
- lib/mlx_lm/models/ssm.rb
|
|
234
|
+
- lib/mlx_lm/models/stablelm.rb
|
|
235
|
+
- lib/mlx_lm/models/starcoder2.rb
|
|
236
|
+
- lib/mlx_lm/models/step3p5.rb
|
|
237
|
+
- lib/mlx_lm/models/switch_layers.rb
|
|
238
|
+
- lib/mlx_lm/models/telechat3.rb
|
|
239
|
+
- lib/mlx_lm/models/youtu_llm.rb
|
|
240
|
+
- lib/mlx_lm/perplexity.rb
|
|
241
|
+
- lib/mlx_lm/quantize.rb
|
|
242
|
+
- lib/mlx_lm/sample_utils.rb
|
|
243
|
+
- lib/mlx_lm/server.rb
|
|
244
|
+
- lib/mlx_lm/tokenizer_utils.rb
|
|
245
|
+
- lib/mlx_lm/tuner/lora.rb
|
|
246
|
+
- lib/mlx_lm/version.rb
|
|
247
|
+
- lib/mlx_lm/weight_utils.rb
|
|
248
|
+
homepage: https://github.com/skryl/mlx-ruby-lm
|
|
249
|
+
licenses:
|
|
250
|
+
- MIT
|
|
251
|
+
metadata:
|
|
252
|
+
homepage_uri: https://github.com/skryl/mlx-ruby-lm
|
|
253
|
+
source_code_uri: https://github.com/skryl/mlx-ruby-lm
|
|
254
|
+
changelog_uri: https://github.com/skryl/mlx-ruby-lm/blob/main/CHANGELOG.md
|
|
255
|
+
rdoc_options: []
|
|
256
|
+
require_paths:
|
|
257
|
+
- lib
|
|
258
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
259
|
+
requirements:
|
|
260
|
+
- - ">="
|
|
261
|
+
- !ruby/object:Gem::Version
|
|
262
|
+
version: '3.4'
|
|
263
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
264
|
+
requirements:
|
|
265
|
+
- - ">="
|
|
266
|
+
- !ruby/object:Gem::Version
|
|
267
|
+
version: '0'
|
|
268
|
+
requirements: []
|
|
269
|
+
rubygems_version: 4.0.3
|
|
270
|
+
specification_version: 4
|
|
271
|
+
summary: LLM inference and fine-tuning on MLX for Ruby
|
|
272
|
+
test_files: []
|