lalamo 0.5.15__py3-none-any.whl → 0.5.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lalamo/__init__.py CHANGED
@@ -15,7 +15,7 @@ from lalamo.speculator import (
15
15
  SpeculatorTrainingEvent,
16
16
  )
17
17
 
18
- __version__ = "0.5.15"
18
+ __version__ = "0.5.16"
19
19
 
20
20
  __all__ = [
21
21
  "AssistantMessage",
@@ -13,6 +13,7 @@ from lalamo.modules import (
13
13
  LlamaRoPEConfig,
14
14
  MLXQuantizedLinearConfig,
15
15
  MLXQuantizedTiedEmbeddingConfig,
16
+ MLXQuantizedUntiedEmbeddingConfig,
16
17
  NormalizationConfig,
17
18
  SiLU,
18
19
  TiedEmbeddingConfig,
@@ -89,27 +90,37 @@ class HFLlamaConfig(HuggingFaceLMConfig):
89
90
  ) -> DecoderConfig:
90
91
  quantization = self.quantization or self.quantization_config
91
92
  if isinstance(quantization, MLXQuantizationConfig):
92
- assert self.tie_word_embeddings, "only tied embeddings are supported"
93
- embedding_config = MLXQuantizedTiedEmbeddingConfig(
94
- input_scale=None,
95
- logit_soft_cap=None,
96
- group_size=quantization.group_size,
97
- embedding_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
98
- activation_quantization_mode=None,
99
- activation_precision=activation_precision,
100
- )
101
- elif self.tie_word_embeddings:
102
- embedding_config = TiedEmbeddingConfig(
103
- input_scale=None,
104
- logit_soft_cap=None,
105
- precision=activation_precision,
106
- )
107
- else:
108
- embedding_config = UntiedEmbeddingConfig(
109
- input_scale=None,
110
- logit_soft_cap=None,
111
- precision=activation_precision,
112
- )
93
+ if self.tie_word_embeddings:
94
+ embedding_config = MLXQuantizedTiedEmbeddingConfig(
95
+ input_scale=None,
96
+ logit_soft_cap=None,
97
+ group_size=quantization.group_size,
98
+ embedding_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
99
+ activation_quantization_mode=None,
100
+ activation_precision=activation_precision,
101
+ )
102
+ else:
103
+ embedding_config = MLXQuantizedUntiedEmbeddingConfig(
104
+ input_scale=None,
105
+ logit_soft_cap=None,
106
+ group_size=quantization.group_size,
107
+ embedding_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
108
+ activation_quantization_mode=None,
109
+ activation_precision=activation_precision,
110
+ )
111
+ else: # noqa: PLR5501
112
+ if self.tie_word_embeddings:
113
+ embedding_config = TiedEmbeddingConfig(
114
+ input_scale=None,
115
+ logit_soft_cap=None,
116
+ precision=activation_precision,
117
+ )
118
+ else:
119
+ embedding_config = UntiedEmbeddingConfig(
120
+ input_scale=None,
121
+ logit_soft_cap=None,
122
+ precision=activation_precision,
123
+ )
113
124
  if self.rope_scaling is None:
114
125
  rope_config = UnscaledRoPEConfig(
115
126
  precision=activation_precision,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lalamo
3
- Version: 0.5.15
3
+ Version: 0.5.16
4
4
  Summary: JAX library for optimization and export of models for use with the UZU inference engine.
5
5
  Requires-Python: <4,>=3.12
6
6
  Description-Content-Type: text/markdown
@@ -1,4 +1,4 @@
1
- lalamo/__init__.py,sha256=zoyKblopG_NpALNJ1tfamO79HeDREGBFQxBXvOpn8Ag,815
1
+ lalamo/__init__.py,sha256=FjfGsBVSl14mNsDoFJEwXMRUq1-Kg_lessRzlJNG3KM,815
2
2
  lalamo/common.py,sha256=5NUFD26yQgOnEEk3LaQnce8n-VwJxILkEpFesHZhtQU,3820
3
3
  lalamo/main.py,sha256=GgUT7lT48-XQuAEH7qzsDKG8Lx9iBf-sYBIRhZL9q7E,23978
4
4
  lalamo/message_processor.py,sha256=bSUAQg7CemLTnBV4LtPxJBicAalruDCA-JXjkTYPZ8U,5797
@@ -23,7 +23,7 @@ lalamo/model_import/decoder_configs/huggingface/gemma2.py,sha256=g8LH_GlSNyL04WW
23
23
  lalamo/model_import/decoder_configs/huggingface/gemma3.py,sha256=UXiEyNqlD0Czc5Gj3n4hNqNDp9Ml5YzH1XZ6BXj0mgU,10223
24
24
  lalamo/model_import/decoder_configs/huggingface/gpt_oss.py,sha256=MBCoPbuWyzbJiBRtHOtpaPHJjQ1UVCAYcVrfIejTnlQ,7446
25
25
  lalamo/model_import/decoder_configs/huggingface/lfm2.py,sha256=vrBMxtiKEg0eHNDL_bWM9odlrsab7jlMXEY8vjEB7-c,7595
26
- lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=jrbTjRBfT_LP5lSSk8ZpYIaCEJdqimbC2o4WgrulrHo,7985
26
+ lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=pGuBQTY6qpx6CriWwdsLpuTSRS7ECoTP1kt5pSKRlNQ,8549
27
27
  lalamo/model_import/decoder_configs/huggingface/llamba.py,sha256=ANB-vQK8U-zVFubZSTDXXt2S70T5SVOGzf7eOVvPzIQ,5773
28
28
  lalamo/model_import/decoder_configs/huggingface/mistral.py,sha256=MDGC0ivzJuUpOC11n8vFdcVzqccUyaRw_hkL74mVlAg,4599
29
29
  lalamo/model_import/decoder_configs/huggingface/modern_bert.py,sha256=A8nNIMhPVumvPWIFR3RexRc6XkFyUd_3mmNpmvyPEGE,8816
@@ -85,9 +85,9 @@ lalamo/speculator/estimator.py,sha256=4D8dPZCWsrpORb7y8pQ6VsiIg1Cblvvxe6gXCoYtcD
85
85
  lalamo/speculator/inference.py,sha256=5GntUgj0HQLeLn3HIHnVX8EEO0EBzmKeP5-_U7kdFAM,3670
86
86
  lalamo/speculator/ngram.py,sha256=95mdfAWhx4d5XOnOwhyhElnvcy6nlUjYhcbJzqDs414,5875
87
87
  lalamo/speculator/utils.py,sha256=0wZoMMIzzk0Q-3zq5H5f-JBplePNHxywndkrNtOJOyo,1697
88
- lalamo-0.5.15.dist-info/licenses/LICENSE,sha256=diHRfjSEJHD1nnEeMIfMRCjR3UERf8bT3eseD6b1ayA,1072
89
- lalamo-0.5.15.dist-info/METADATA,sha256=VeVb82AvCzH828Xm0TSQ8xJnDhhd_PzWjdQtja1-YMs,3147
90
- lalamo-0.5.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
- lalamo-0.5.15.dist-info/entry_points.txt,sha256=qli7qTfnBk5WP10rOGXXEckHMtt-atJMDWd8jN89Uks,43
92
- lalamo-0.5.15.dist-info/top_level.txt,sha256=VHvWL5JN5XRG36NsN_MieJ7EwRihEOrEjyDaTdFJ-aI,7
93
- lalamo-0.5.15.dist-info/RECORD,,
88
+ lalamo-0.5.16.dist-info/licenses/LICENSE,sha256=diHRfjSEJHD1nnEeMIfMRCjR3UERf8bT3eseD6b1ayA,1072
89
+ lalamo-0.5.16.dist-info/METADATA,sha256=dcs0vT9RULTxt4cxJJmfjP-4UJi7ZkrifXAaSMAgKeU,3147
90
+ lalamo-0.5.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
+ lalamo-0.5.16.dist-info/entry_points.txt,sha256=qli7qTfnBk5WP10rOGXXEckHMtt-atJMDWd8jN89Uks,43
92
+ lalamo-0.5.16.dist-info/top_level.txt,sha256=VHvWL5JN5XRG36NsN_MieJ7EwRihEOrEjyDaTdFJ-aI,7
93
+ lalamo-0.5.16.dist-info/RECORD,,