lalamo 0.5.12__py3-none-any.whl → 0.5.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lalamo/__init__.py CHANGED
@@ -15,7 +15,7 @@ from lalamo.speculator import (
15
15
  SpeculatorTrainingEvent,
16
16
  )
17
17
 
18
- __version__ = "0.5.12"
18
+ __version__ = "0.5.14"
19
19
 
20
20
  __all__ = [
21
21
  "AssistantMessage",
@@ -5,7 +5,13 @@ from typing import Literal
5
5
  import jax.numpy as jnp
6
6
  from jaxtyping import DTypeLike
7
7
 
8
- from lalamo.modules import DecoderConfig, TiedEmbeddingConfig, TransformerConfig
8
+ from lalamo.modules import (
9
+ DecoderConfig,
10
+ MLXQuantizedLinearConfig,
11
+ MLXQuantizedTiedEmbeddingConfig,
12
+ TiedEmbeddingConfig,
13
+ TransformerConfig,
14
+ )
9
15
  from lalamo.modules.activations import GELU
10
16
  from lalamo.modules.linear import FullPrecisionLinearConfig
11
17
  from lalamo.modules.mlp import DenseMLPConfig
@@ -13,8 +19,9 @@ from lalamo.modules.normalization import NormalizationConfig, UpcastMode
13
19
  from lalamo.modules.rope import LinearScalingRoPEConfig, UnscaledRoPEConfig, YARNRoPEConfig
14
20
  from lalamo.modules.token_mixers.attention import AttentionConfig
15
21
  from lalamo.modules.transformer_layer import TransformerLayerConfig
22
+ from lalamo.quantization import QuantizationMode
16
23
 
17
- from .common import HuggingFaceLMConfig
24
+ from .common import HuggingFaceLMConfig, MLXQuantizationConfig, QuantizationConfigType
18
25
 
19
26
  __all__ = ["HFGemma3Config", "HFGemma3TextConfig"]
20
27
 
@@ -61,6 +68,9 @@ class HFGemma3TextConfigRaw:
61
68
  final_logit_softcapping: float | None = None
62
69
  vocab_size: int = 262208
63
70
 
71
+ quantization: QuantizationConfigType = None
72
+ quantization_config: QuantizationConfigType = None
73
+
64
74
  @property
65
75
  def sliding_window_sizes(self) -> list[int | None]:
66
76
  result = []
@@ -77,14 +87,28 @@ class HFGemma3TextConfigRaw:
77
87
  activation_precision: DTypeLike,
78
88
  accumulation_precision: DTypeLike,
79
89
  metadata_dict: Mapping[str, str], # noqa: ARG002
90
+ fallback_quantization: QuantizationConfigType | None = None,
80
91
  ) -> DecoderConfig:
92
+ quantization = self.quantization or self.quantization_config or fallback_quantization
81
93
  input_scale = _round_to_bfloat16(self.hidden_size**0.5)
82
94
  attention_scale = self.query_pre_attn_scalar**-0.5
83
- embedding_config = TiedEmbeddingConfig(
84
- input_scale=input_scale,
85
- logit_soft_cap=self.final_logit_softcapping,
86
- precision=activation_precision,
87
- )
95
+ if quantization is None:
96
+ embedding_config = TiedEmbeddingConfig(
97
+ input_scale=input_scale,
98
+ logit_soft_cap=self.final_logit_softcapping,
99
+ precision=activation_precision,
100
+ )
101
+ elif isinstance(quantization, MLXQuantizationConfig):
102
+ embedding_config = MLXQuantizedTiedEmbeddingConfig(
103
+ input_scale=input_scale,
104
+ logit_soft_cap=self.final_logit_softcapping,
105
+ group_size=quantization.group_size,
106
+ embedding_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
107
+ activation_quantization_mode=None,
108
+ activation_precision=activation_precision,
109
+ )
110
+ else:
111
+ raise RuntimeError(f"Unsupported quantization format: {type(quantization)}")
88
112
  rms_norm_config = NormalizationConfig(
89
113
  scale_precision=activation_precision,
90
114
  accumulation_precision=accumulation_precision,
@@ -127,7 +151,17 @@ class HFGemma3TextConfigRaw:
127
151
  max_sequence_length=context_length or self.max_position_embeddings,
128
152
  )
129
153
 
130
- linear_config = FullPrecisionLinearConfig(precision=activation_precision)
154
+ if quantization is None:
155
+ linear_config = FullPrecisionLinearConfig(precision=activation_precision)
156
+ elif isinstance(quantization, MLXQuantizationConfig):
157
+ linear_config = MLXQuantizedLinearConfig(
158
+ group_size=quantization.group_size,
159
+ weight_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
160
+ activation_quantization_mode=None,
161
+ activation_precision=activation_precision,
162
+ )
163
+ else:
164
+ raise RuntimeError(f"Unsupported quantization format: {type(quantization)}")
131
165
  mlp_config = DenseMLPConfig(
132
166
  linear_config=linear_config,
133
167
  activation=GELU(),
@@ -214,6 +248,9 @@ class HFGemma3Config(HuggingFaceLMConfig):
214
248
  transformers_version: str
215
249
  vision_config: HFGemma3VisionConfig
216
250
 
251
+ quantization: QuantizationConfigType = None
252
+ quantization_config: QuantizationConfigType = None
253
+
217
254
  def to_decoder_config(
218
255
  self,
219
256
  context_length: int | None,
@@ -221,9 +258,11 @@ class HFGemma3Config(HuggingFaceLMConfig):
221
258
  accumulation_precision: DTypeLike,
222
259
  metadata_dict: Mapping[str, str],
223
260
  ) -> DecoderConfig:
261
+ quantization = self.quantization or self.quantization_config
224
262
  return self.text_config.to_decoder_config(
225
263
  context_length=context_length,
226
264
  activation_precision=activation_precision,
227
265
  accumulation_precision=accumulation_precision,
228
266
  metadata_dict=metadata_dict,
267
+ fallback_quantization=quantization,
229
268
  )
@@ -11,6 +11,8 @@ from lalamo.modules import (
11
11
  FullPrecisionLinearConfig,
12
12
  GroupQuantizedLinearConfig,
13
13
  LlamaRoPEConfig,
14
+ MLXQuantizedLinearConfig,
15
+ MLXQuantizedTiedEmbeddingConfig,
14
16
  NormalizationConfig,
15
17
  SiLU,
16
18
  TiedEmbeddingConfig,
@@ -23,7 +25,7 @@ from lalamo.modules import (
23
25
  )
24
26
  from lalamo.quantization import QuantizationMode
25
27
 
26
- from .common import AWQQuantizationConfig, GPTQQuantizationConfig, HuggingFaceLMConfig
28
+ from .common import HuggingFaceLMConfig, MLXQuantizationConfig, QuantizationConfigType
27
29
 
28
30
  __all__ = ["HFLlamaConfig"]
29
31
 
@@ -75,7 +77,8 @@ class HFLlamaConfig(HuggingFaceLMConfig):
75
77
  vocab_size: int
76
78
  head_dim: int | None = None
77
79
 
78
- quantization_config: AWQQuantizationConfig | GPTQQuantizationConfig | None = None
80
+ quantization: QuantizationConfigType = None
81
+ quantization_config: QuantizationConfigType = None
79
82
 
80
83
  def to_decoder_config(
81
84
  self,
@@ -84,7 +87,18 @@ class HFLlamaConfig(HuggingFaceLMConfig):
84
87
  accumulation_precision: DTypeLike,
85
88
  metadata_dict: Mapping[str, str], # noqa: ARG002
86
89
  ) -> DecoderConfig:
87
- if self.tie_word_embeddings:
90
+ quantization = self.quantization or self.quantization_config
91
+ if isinstance(quantization, MLXQuantizationConfig):
92
+ assert self.tie_word_embeddings, "only tied embeddings are supported"
93
+ embedding_config = MLXQuantizedTiedEmbeddingConfig(
94
+ input_scale=None,
95
+ logit_soft_cap=None,
96
+ group_size=quantization.group_size,
97
+ embedding_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
98
+ activation_quantization_mode=None,
99
+ activation_precision=activation_precision,
100
+ )
101
+ elif self.tie_word_embeddings:
88
102
  embedding_config = TiedEmbeddingConfig(
89
103
  input_scale=None,
90
104
  logit_soft_cap=None,
@@ -133,14 +147,21 @@ class HFLlamaConfig(HuggingFaceLMConfig):
133
147
  upcast_mode=UpcastMode.ONLY_NORMALIZATION,
134
148
  subtract_mean=False,
135
149
  )
136
- if self.quantization_config is None:
150
+ if quantization is None:
137
151
  linear_config = FullPrecisionLinearConfig(
138
152
  precision=activation_precision,
139
153
  )
154
+ elif isinstance(quantization, MLXQuantizationConfig):
155
+ linear_config = MLXQuantizedLinearConfig(
156
+ group_size=quantization.group_size,
157
+ weight_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
158
+ activation_quantization_mode=None,
159
+ activation_precision=activation_precision,
160
+ )
140
161
  else:
141
162
  linear_config = GroupQuantizedLinearConfig(
142
- group_size=self.quantization_config.group_size,
143
- weight_quantization_mode=QuantizationMode.from_num_bits(self.quantization_config.bits),
163
+ group_size=quantization.group_size,
164
+ weight_quantization_mode=QuantizationMode.from_num_bits(quantization.bits),
144
165
  activation_quantization_mode=None,
145
166
  activation_precision=activation_precision,
146
167
  )
@@ -3,8 +3,9 @@ from lalamo.model_import.decoder_configs import (
3
3
  HFGemma3Config,
4
4
  HFGemma3TextConfig,
5
5
  )
6
+ from lalamo.quantization import QuantizationMode
6
7
 
7
- from .common import ModelSpec, WeightsType
8
+ from .common import ConfigMap, FileSpec, ModelSpec, WeightsType
8
9
 
9
10
  __all__ = ["GEMMA_MODELS"]
10
11
 
@@ -31,6 +32,28 @@ GEMMA3 = [
31
32
  config_type=HFGemma3TextConfig,
32
33
  weights_type=WeightsType.SAFETENSORS,
33
34
  ),
35
+ ModelSpec(
36
+ vendor="Google",
37
+ family="Gemma-3",
38
+ name="Gemma-3-1B-Instruct-4bit",
39
+ size="1B",
40
+ quantization=QuantizationMode.UINT4,
41
+ repo="mlx-community/gemma-3-1b-it-4bit",
42
+ config_type=HFGemma3TextConfig,
43
+ weights_type=WeightsType.SAFETENSORS,
44
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "google/gemma-3-1b-it")),
45
+ ),
46
+ ModelSpec(
47
+ vendor="Google",
48
+ family="Gemma-3",
49
+ name="Gemma-3-1B-Instruct-8bit",
50
+ size="1B",
51
+ quantization=QuantizationMode.UINT8,
52
+ repo="mlx-community/gemma-3-1b-it-8bit",
53
+ config_type=HFGemma3TextConfig,
54
+ weights_type=WeightsType.SAFETENSORS,
55
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "google/gemma-3-1b-it")),
56
+ ),
34
57
  ModelSpec(
35
58
  vendor="Google",
36
59
  family="Gemma-3",
@@ -41,6 +64,28 @@ GEMMA3 = [
41
64
  config_type=HFGemma3Config,
42
65
  weights_type=WeightsType.SAFETENSORS,
43
66
  ),
67
+ ModelSpec(
68
+ vendor="Google",
69
+ family="Gemma-3",
70
+ name="Gemma-3-4B-Instruct-4bit",
71
+ size="4B",
72
+ quantization=QuantizationMode.UINT4,
73
+ repo="mlx-community/gemma-3-4b-it-4bit",
74
+ config_type=HFGemma3Config,
75
+ weights_type=WeightsType.SAFETENSORS,
76
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "google/gemma-3-4b-it")),
77
+ ),
78
+ ModelSpec(
79
+ vendor="Google",
80
+ family="Gemma-3",
81
+ name="Gemma-3-4B-Instruct-8bit",
82
+ size="4B",
83
+ quantization=QuantizationMode.UINT8,
84
+ repo="mlx-community/gemma-3-4b-it-8bit",
85
+ config_type=HFGemma3Config,
86
+ weights_type=WeightsType.SAFETENSORS,
87
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "google/gemma-3-4b-it")),
88
+ ),
44
89
  ModelSpec(
45
90
  vendor="Google",
46
91
  family="Gemma-3",
@@ -51,6 +96,28 @@ GEMMA3 = [
51
96
  config_type=HFGemma3Config,
52
97
  weights_type=WeightsType.SAFETENSORS,
53
98
  ),
99
+ ModelSpec(
100
+ vendor="Google",
101
+ family="Gemma-3",
102
+ name="Gemma-3-27B-Instruct-4bit",
103
+ size="27B",
104
+ quantization=QuantizationMode.UINT4,
105
+ repo="mlx-community/gemma-3-27b-it-4bit",
106
+ config_type=HFGemma3Config,
107
+ weights_type=WeightsType.SAFETENSORS,
108
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "google/gemma-3-27b-it")),
109
+ ),
110
+ ModelSpec(
111
+ vendor="Google",
112
+ family="Gemma-3",
113
+ name="Gemma-3-27B-Instruct-8bit",
114
+ size="27B",
115
+ quantization=QuantizationMode.UINT8,
116
+ repo="mlx-community/gemma-3-27b-it-8bit",
117
+ config_type=HFGemma3Config,
118
+ weights_type=WeightsType.SAFETENSORS,
119
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "google/gemma-3-27b-it")),
120
+ ),
54
121
  ]
55
122
 
56
123
 
@@ -1,6 +1,7 @@
1
1
  from lalamo.model_import.decoder_configs import HFLlamaConfig
2
+ from lalamo.quantization import QuantizationMode
2
3
 
3
- from .common import ModelSpec
4
+ from .common import ConfigMap, FileSpec, ModelSpec
4
5
 
5
6
  __all__ = ["LLAMA_MODELS"]
6
7
 
@@ -15,6 +16,17 @@ LLAMA31 = [
15
16
  config_type=HFLlamaConfig,
16
17
  use_cases=tuple(),
17
18
  ),
19
+ ModelSpec(
20
+ vendor="Meta",
21
+ family="Llama-3.1",
22
+ name="Llama-3.1-8B-Instruct-4bit",
23
+ size="8B",
24
+ quantization=QuantizationMode.UINT4,
25
+ repo="mlx-community/Llama-3.1-8B-Instruct-4bit",
26
+ config_type=HFLlamaConfig,
27
+ use_cases=tuple(),
28
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "meta-llama/Llama-3.1-8B-Instruct")),
29
+ ),
18
30
  ]
19
31
 
20
32
 
@@ -29,6 +41,28 @@ LLAMA32 = [
29
41
  config_type=HFLlamaConfig,
30
42
  use_cases=tuple(),
31
43
  ),
44
+ ModelSpec(
45
+ vendor="Meta",
46
+ family="Llama-3.2",
47
+ name="Llama-3.2-1B-Instruct-4bit",
48
+ size="1B",
49
+ quantization=QuantizationMode.UINT4,
50
+ repo="mlx-community/Llama-3.2-1B-Instruct-4bit",
51
+ config_type=HFLlamaConfig,
52
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "meta-llama/Llama-3.2-1B-Instruct")),
53
+ use_cases=tuple(),
54
+ ),
55
+ ModelSpec(
56
+ vendor="Meta",
57
+ family="Llama-3.2",
58
+ name="Llama-3.2-1B-Instruct-8bit",
59
+ size="1B",
60
+ quantization=QuantizationMode.UINT8,
61
+ repo="mlx-community/Llama-3.2-1B-Instruct-8bit",
62
+ config_type=HFLlamaConfig,
63
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "meta-llama/Llama-3.2-1B-Instruct")),
64
+ use_cases=tuple(),
65
+ ),
32
66
  ModelSpec(
33
67
  vendor="Meta",
34
68
  family="Llama-3.2",
@@ -39,6 +73,28 @@ LLAMA32 = [
39
73
  config_type=HFLlamaConfig,
40
74
  use_cases=tuple(),
41
75
  ),
76
+ ModelSpec(
77
+ vendor="Meta",
78
+ family="Llama-3.2",
79
+ name="Llama-3.2-3B-Instruct-4bit",
80
+ size="3B",
81
+ quantization=QuantizationMode.UINT4,
82
+ repo="mlx-community/Llama-3.2-3B-Instruct-4bit",
83
+ config_type=HFLlamaConfig,
84
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "meta-llama/Llama-3.2-3B-Instruct")),
85
+ use_cases=tuple(),
86
+ ),
87
+ ModelSpec(
88
+ vendor="Meta",
89
+ family="Llama-3.2",
90
+ name="Llama-3.2-3B-Instruct-8bit",
91
+ size="3B",
92
+ quantization=QuantizationMode.UINT8,
93
+ repo="mlx-community/Llama-3.2-3B-Instruct-8bit",
94
+ config_type=HFLlamaConfig,
95
+ configs=ConfigMap(generation_config=FileSpec("generation_config.json", "meta-llama/Llama-3.2-3B-Instruct")),
96
+ use_cases=tuple(),
97
+ ),
42
98
  ]
43
99
 
44
100
  LLAMA_MODELS = LLAMA31 + LLAMA32
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lalamo
3
- Version: 0.5.12
3
+ Version: 0.5.14
4
4
  Summary: JAX library for optimization and export of models for use with the UZU inference engine.
5
5
  Requires-Python: <4,>=3.12
6
6
  Description-Content-Type: text/markdown
@@ -10,7 +10,7 @@ Requires-Dist: click>=8.1.8
10
10
  Requires-Dist: einops>=0.8.0
11
11
  Requires-Dist: equinox>=0.11.11
12
12
  Requires-Dist: huggingface-hub[hf-transfer]>=0.27.1
13
- Requires-Dist: jax>=0.7.2
13
+ Requires-Dist: jax>=0.8.1
14
14
  Requires-Dist: jaxtyping>=0.2.36
15
15
  Requires-Dist: jinja2>=3.1.6
16
16
  Requires-Dist: ml-dtypes>=0.5.1
@@ -23,11 +23,11 @@ Requires-Dist: safetensors>=0.6.2
23
23
  Requires-Dist: polars>=1.33.1
24
24
  Requires-Dist: xxhash>=3.5.0
25
25
  Provides-Extra: cpu
26
- Requires-Dist: jax[cpu]>=0.7.2; extra == "cpu"
26
+ Requires-Dist: jax[cpu]>=0.8.1; extra == "cpu"
27
27
  Provides-Extra: cuda
28
- Requires-Dist: jax[cuda]>=0.7.2; extra == "cuda"
28
+ Requires-Dist: jax[cuda]>=0.8.1; extra == "cuda"
29
29
  Provides-Extra: tpu
30
- Requires-Dist: jax[tpu]>=0.7.2; extra == "tpu"
30
+ Requires-Dist: jax[tpu]>=0.8.1; extra == "tpu"
31
31
  Dynamic: license-file
32
32
 
33
33
  <p align="center">
@@ -1,4 +1,4 @@
1
- lalamo/__init__.py,sha256=Q9W7Wur0iL4StzN99Y70CLGeiCHriTxjg50OLzXIDLw,815
1
+ lalamo/__init__.py,sha256=W4DF7RbQmPfUVbw1GM5kDfJzUeKaqK7po5iod8VLThQ,815
2
2
  lalamo/common.py,sha256=5NUFD26yQgOnEEk3LaQnce8n-VwJxILkEpFesHZhtQU,3820
3
3
  lalamo/main.py,sha256=GgUT7lT48-XQuAEH7qzsDKG8Lx9iBf-sYBIRhZL9q7E,23978
4
4
  lalamo/message_processor.py,sha256=bSUAQg7CemLTnBV4LtPxJBicAalruDCA-JXjkTYPZ8U,5797
@@ -20,10 +20,10 @@ lalamo/model_import/decoder_configs/executorch.py,sha256=fTEG_j-7d8riR3Fu_H5tHDj
20
20
  lalamo/model_import/decoder_configs/huggingface/__init__.py,sha256=AboZJgZxOuIigPShskj-FqBkBqwlJZoKHP0RDqx-MyY,696
21
21
  lalamo/model_import/decoder_configs/huggingface/common.py,sha256=YYIDEQy8x7lqL2qtxUHrNqfjZEiizBZ_26sTqOzjRtQ,3792
22
22
  lalamo/model_import/decoder_configs/huggingface/gemma2.py,sha256=g8LH_GlSNyL04WWi596zI0rWsD3ahnfNjDk-9zZNcDE,4759
23
- lalamo/model_import/decoder_configs/huggingface/gemma3.py,sha256=EYkcsRUKVQqPFyL8dZuocDVAUaM8d9dr_aMh5-jTvTM,8376
23
+ lalamo/model_import/decoder_configs/huggingface/gemma3.py,sha256=UXiEyNqlD0Czc5Gj3n4hNqNDp9Ml5YzH1XZ6BXj0mgU,10223
24
24
  lalamo/model_import/decoder_configs/huggingface/gpt_oss.py,sha256=MBCoPbuWyzbJiBRtHOtpaPHJjQ1UVCAYcVrfIejTnlQ,7446
25
25
  lalamo/model_import/decoder_configs/huggingface/lfm2.py,sha256=vrBMxtiKEg0eHNDL_bWM9odlrsab7jlMXEY8vjEB7-c,7595
26
- lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=UPeQiz2Dix8YaZYRxn9z44OZJ6c4xBQmcUZcM0Ymvh4,6934
26
+ lalamo/model_import/decoder_configs/huggingface/llama.py,sha256=jrbTjRBfT_LP5lSSk8ZpYIaCEJdqimbC2o4WgrulrHo,7985
27
27
  lalamo/model_import/decoder_configs/huggingface/llamba.py,sha256=ANB-vQK8U-zVFubZSTDXXt2S70T5SVOGzf7eOVvPzIQ,5773
28
28
  lalamo/model_import/decoder_configs/huggingface/mistral.py,sha256=MDGC0ivzJuUpOC11n8vFdcVzqccUyaRw_hkL74mVlAg,4599
29
29
  lalamo/model_import/decoder_configs/huggingface/modern_bert.py,sha256=A8nNIMhPVumvPWIFR3RexRc6XkFyUd_3mmNpmvyPEGE,8816
@@ -38,11 +38,11 @@ lalamo/model_import/model_specs/__init__.py,sha256=JISqwJkloQkGD2jvi1MakNEWapIwl
38
38
  lalamo/model_import/model_specs/common.py,sha256=RLySCIkmGiA1IVZgLeemssMBMo4hMYMpmBjV0cRwBb4,6586
39
39
  lalamo/model_import/model_specs/deepseek.py,sha256=Umef93_ZBuq93yYsejIRNwj3udoln1gHfrv3SK5jyMo,417
40
40
  lalamo/model_import/model_specs/essential_ai.py,sha256=xbHcwRpAWhR9gOgypVzcgunFspoUEk3iNsw-46CVR4o,390
41
- lalamo/model_import/model_specs/gemma.py,sha256=irWgylL-pc7y3Gn5DK3fjKoCT9kJWH3B7mTa-1Gmxqc,1306
41
+ lalamo/model_import/model_specs/gemma.py,sha256=dwKwOHU1sBJNLFAwtEyydsRUF9QENN3SHtjbfqtOSic,3876
42
42
  lalamo/model_import/model_specs/gpt_oss.py,sha256=PLo0QGrXKdX61ReTRdyOaP_EH3Dmj5lp3fpJjZRwRVA,542
43
43
  lalamo/model_import/model_specs/huggingface.py,sha256=TEkU8y95_hmUWyF-Q5hn0dE2SvXbApghAsQwhWRu4D0,431
44
44
  lalamo/model_import/model_specs/lfm2.py,sha256=uzuFbcj4Wj2OqL7XJE8Q431VYZelS_HkfPFpl7rJuJY,1038
45
- lalamo/model_import/model_specs/llama.py,sha256=Ml-xvRGlXBT9NJhmEpwgNo6C84oBSMYgA1_PrCYGcAw,990
45
+ lalamo/model_import/model_specs/llama.py,sha256=TxhKbIBFmGV2NopOg_k3ltsKlJccbxKyu-GQ7hYWCyw,3140
46
46
  lalamo/model_import/model_specs/llamba.py,sha256=Ic3sWTv34FLJ4fG6OR_Mc5goGJQR6fa5b2WbVXbn9FA,1471
47
47
  lalamo/model_import/model_specs/mirai.py,sha256=eifYVV5-fABiLH6rr82_DiVFtDyqpW0vbvXCYsQQzto,617
48
48
  lalamo/model_import/model_specs/mistral.py,sha256=HAojorjOqsJn2DoMBzYRw8A70qCslhFEsE9AF5xumlg,1278
@@ -85,9 +85,9 @@ lalamo/speculator/estimator.py,sha256=4D8dPZCWsrpORb7y8pQ6VsiIg1Cblvvxe6gXCoYtcD
85
85
  lalamo/speculator/inference.py,sha256=5GntUgj0HQLeLn3HIHnVX8EEO0EBzmKeP5-_U7kdFAM,3670
86
86
  lalamo/speculator/ngram.py,sha256=95mdfAWhx4d5XOnOwhyhElnvcy6nlUjYhcbJzqDs414,5875
87
87
  lalamo/speculator/utils.py,sha256=0wZoMMIzzk0Q-3zq5H5f-JBplePNHxywndkrNtOJOyo,1697
88
- lalamo-0.5.12.dist-info/licenses/LICENSE,sha256=diHRfjSEJHD1nnEeMIfMRCjR3UERf8bT3eseD6b1ayA,1072
89
- lalamo-0.5.12.dist-info/METADATA,sha256=dRj887UMuZ5JBHgriCILQxCKklC-ZNyAcsOLAwbbyrU,3147
90
- lalamo-0.5.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
- lalamo-0.5.12.dist-info/entry_points.txt,sha256=qli7qTfnBk5WP10rOGXXEckHMtt-atJMDWd8jN89Uks,43
92
- lalamo-0.5.12.dist-info/top_level.txt,sha256=VHvWL5JN5XRG36NsN_MieJ7EwRihEOrEjyDaTdFJ-aI,7
93
- lalamo-0.5.12.dist-info/RECORD,,
88
+ lalamo-0.5.14.dist-info/licenses/LICENSE,sha256=diHRfjSEJHD1nnEeMIfMRCjR3UERf8bT3eseD6b1ayA,1072
89
+ lalamo-0.5.14.dist-info/METADATA,sha256=-FRZfOuOcMb_6CuqbsXtECUtU4K7nh074cu3oiogueg,3147
90
+ lalamo-0.5.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
+ lalamo-0.5.14.dist-info/entry_points.txt,sha256=qli7qTfnBk5WP10rOGXXEckHMtt-atJMDWd8jN89Uks,43
92
+ lalamo-0.5.14.dist-info/top_level.txt,sha256=VHvWL5JN5XRG36NsN_MieJ7EwRihEOrEjyDaTdFJ-aI,7
93
+ lalamo-0.5.14.dist-info/RECORD,,