PyPI - lalamo - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

lalamo 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{lalamo-0.2.2 → lalamo-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lalamo
-Version: 0.2.2
+Version: 0.2.4
 Summary: JAX library for optimization and export of models for use with the UZU inference engine.
 Requires-Python: <4,>=3.12
 Description-Content-Type: text/markdown

{lalamo-0.2.2 → lalamo-0.2.4}/lalamo/__init__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from lalamo.model_import import REPO_TO_MODEL, ModelSpec, import_model
 from lalamo.modules import Decoder
-__version__ = "0.2.2"
+__version__ = "0.2.4"
 __all__ = [
     "REPO_TO_MODEL",

{lalamo-0.2.2 → lalamo-0.2.4}/lalamo/model_import/configs/__init__.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from .common import ForeignConfig
-from .executorch import ETLlamaConfig
+# from .executorch import ETLlamaConfig
 from .huggingface import (
     HFGemma2Config,
     HFGemma3Config,
@@ -11,7 +12,7 @@ from .huggingface import (
 )
 __all__ = [
-    "ETLlamaConfig",
+    # "ETLlamaConfig",
     "ForeignConfig",
     "HFGemma2Config",
     "HFGemma3Config",

{lalamo-0.2.2 → lalamo-0.2.4}/lalamo/model_import/configs/executorch.py RENAMED Viewed

@@ -3,7 +3,7 @@ from dataclasses import dataclass
 import jax.numpy as jnp
 from jaxtyping import Array, DTypeLike
-from lalamo.model_import.loaders import load_executorch
+from lalamo.model_import.loaders.executorch import load_executorch
 from lalamo.modules import (
     Activation,
     AttentionConfig,

{lalamo-0.2.2 → lalamo-0.2.4}/lalamo/model_import/loaders/__init__.py RENAMED Viewed

@@ -1,7 +1,7 @@
-from .executorch import load_executorch
+# from .executorch import load_executorch
 from .huggingface import load_huggingface
 __all__ = [
-    "load_executorch",
+    # "load_executorch",
     "load_huggingface",
 ]

{lalamo-0.2.2 → lalamo-0.2.4}/lalamo/model_import/model_specs/common.py RENAMED Viewed

@@ -3,7 +3,6 @@ from enum import Enum
 from pathlib import Path
 import jax.numpy as jnp
-import torch
 from jaxtyping import Array, DTypeLike
 from safetensors.flax import load_file as load_safetensors
@@ -17,9 +16,9 @@ __all__ = [
     "ModelSpec",
     "TokenizerFileSpec",
     "UseCase",
-    "huggingface_weight_files",
     "awq_model_spec",
     "build_quantized_models",
+    "huggingface_weight_files",
 ]
@@ -36,6 +35,9 @@ class WeightsType(Enum):
     def load(self, filename: Path | str, float_dtype: DTypeLike) -> dict[str, jnp.ndarray]:
         if self == WeightsType.SAFETENSORS:
             return {k: cast_if_float(v, float_dtype) for k, v in load_safetensors(filename).items()}
+        import torch
         torch_weights = torch.load(filename, map_location="cpu", weights_only=True)
         return {k: cast_if_float(torch_to_jax(v), float_dtype) for k, v in torch_weights.items()}
@@ -72,11 +74,15 @@ def huggingface_weight_files(num_shards: int) -> tuple[str, ...]:
     return tuple(f"model-{i:05d}-of-{num_shards:05d}.safetensors" for i in range(1, num_shards + 1))
-def awq_model_spec(model_spec: ModelSpec, repo: str, quantization: QuantizationMode = QuantizationMode.UINT4) -> ModelSpec:
+def awq_model_spec(
+    model_spec: ModelSpec,
+    repo: str,
+    quantization: QuantizationMode = QuantizationMode.UINT4,
+) -> ModelSpec:
     return ModelSpec(
         vendor=model_spec.vendor,
         family=model_spec.family,
-        name="{}-AWQ".format(model_spec.name),
+        name=f"{model_spec.name}-AWQ",
         size=model_spec.size,
         quantization=quantization,
         repo=repo,

{lalamo-0.2.2 → lalamo-0.2.4}/lalamo/model_import/model_specs/llama.py RENAMED Viewed

@@ -1,7 +1,6 @@
 from dataclasses import replace
-from lalamo.model_import.configs import ETLlamaConfig, HFLlamaConfig
-from lalamo.quantization import QuantizationMode
+from lalamo.model_import.configs import HFLlamaConfig
 from .common import (
     HUGGINFACE_GENERATION_CONFIG_FILE,
@@ -54,20 +53,20 @@ LLAMA32 = [
         tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
         use_cases=tuple(),
     ),
-    ModelSpec(
-        vendor="Meta",
-        family="Llama-3.2",
-        name="Llama-3.2-1B-Instruct-QLoRA",
-        size="1B",
-        quantization=QuantizationMode.UINT4,
-        repo="meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8",
-        config_type=ETLlamaConfig,
-        config_file_name="params.json",
-        weights_file_names=("consolidated.00.pth",),
-        weights_type=WeightsType.TORCH,
-        tokenizer_files=_tokenizer_files_from_another_repo("meta-llama/Llama-3.2-1B-Instruct"),
-        use_cases=tuple(),
-    ),
+    # ModelSpec(
+    #     vendor="Meta",
+    #     family="Llama-3.2",
+    #     name="Llama-3.2-1B-Instruct-QLoRA",
+    #     size="1B",
+    #     quantization=QuantizationMode.UINT4,
+    #     repo="meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8",
+    #     config_type=ETLlamaConfig,
+    #     config_file_name="params.json",
+    #     weights_file_names=("consolidated.00.pth",),
+    #     weights_type=WeightsType.TORCH,
+    #     tokenizer_files=_tokenizer_files_from_another_repo("meta-llama/Llama-3.2-1B-Instruct"),
+    #     use_cases=tuple(),
+    # ),
     ModelSpec(
         vendor="Meta",
         family="Llama-3.2",
@@ -82,20 +81,20 @@ LLAMA32 = [
         tokenizer_files=(*HUGGINGFACE_TOKENIZER_FILES, HUGGINFACE_GENERATION_CONFIG_FILE),
         use_cases=tuple(),
     ),
-    ModelSpec(
-        vendor="Meta",
-        family="Llama-3.2",
-        name="Llama-3.2-3B-Instruct-QLoRA",
-        size="3B",
-        quantization=QuantizationMode.UINT4,
-        repo="meta-llama/Llama-3.2-3B-Instruct-QLORA_INT4_EO8",
-        config_type=ETLlamaConfig,
-        config_file_name="params.json",
-        weights_file_names=("consolidated.00.pth",),
-        tokenizer_files=_tokenizer_files_from_another_repo("meta-llama/Llama-3.2-3B-Instruct"),
-        weights_type=WeightsType.TORCH,
-        use_cases=tuple(),
-    ),
+    # ModelSpec(
+    #     vendor="Meta",
+    #     family="Llama-3.2",
+    #     name="Llama-3.2-3B-Instruct-QLoRA",
+    #     size="3B",
+    #     quantization=QuantizationMode.UINT4,
+    #     repo="meta-llama/Llama-3.2-3B-Instruct-QLORA_INT4_EO8",
+    #     config_type=ETLlamaConfig,
+    #     config_file_name="params.json",
+    #     weights_file_names=("consolidated.00.pth",),
+    #     tokenizer_files=_tokenizer_files_from_another_repo("meta-llama/Llama-3.2-3B-Instruct"),
+    #     weights_type=WeightsType.TORCH,
+    #     use_cases=tuple(),
+    # ),
 ]
 LLAMA_MODELS = LLAMA31 + LLAMA32

{lalamo-0.2.2 → lalamo-0.2.4}/lalamo.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lalamo
-Version: 0.2.2
+Version: 0.2.4
 Summary: JAX library for optimization and export of models for use with the UZU inference engine.
 Requires-Python: <4,>=3.12
 Description-Content-Type: text/markdown