PyPI - lalamo - Versions diffs - 0.5.16__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

lalamo 0.5.16py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

lalamo/__init__.py +26 -2
lalamo/commands.py +429 -0
lalamo/common.py +14 -1
lalamo/main.py +375 -229
lalamo/message_processor.py +4 -1
lalamo/model_import/common.py +8 -17
lalamo/model_import/decoder_configs/huggingface/lfm2.py +14 -4
lalamo/model_import/decoder_configs/huggingface/llamba.py +2 -2
lalamo/model_import/decoder_configs/huggingface/modern_bert.py +2 -2
lalamo/model_import/huggingface_generation_config.py +21 -3
lalamo/model_import/loaders/executorch.py +2 -2
lalamo/model_import/loaders/huggingface.py +3 -3
lalamo/model_import/model_specs/common.py +8 -4
lalamo/model_import/model_specs/lfm2.py +41 -9
lalamo/models/common.py +3 -3
lalamo/models/language_model.py +7 -6
lalamo/modules/activations.py +1 -1
lalamo/modules/classifier.py +11 -24
lalamo/modules/common.py +4 -1
lalamo/modules/decoder.py +5 -11
lalamo/modules/embedding.py +25 -62
lalamo/modules/linear.py +19 -33
lalamo/modules/mlp.py +9 -19
lalamo/modules/mlx_interop.py +1 -1
lalamo/modules/rope.py +1 -1
lalamo/modules/token_mixers/__init__.py +1 -1
lalamo/modules/token_mixers/attention.py +9 -27
lalamo/modules/token_mixers/mamba.py +9 -24
lalamo/modules/token_mixers/short_conv.py +5 -12
lalamo/modules/transformer.py +10 -20
lalamo/modules/transformer_layer.py +8 -20
lalamo/registry_abc.py +4 -4
lalamo/safetensors.py +97 -0
lalamo/sampling.py +14 -0
lalamo/speculator/estimator.py +11 -4
lalamo/speculator/ngram.py +1 -1
lalamo/utils.py +0 -13
{lalamo-0.5.16.dist-info → lalamo-0.6.0.dist-info}/METADATA +1 -2
{lalamo-0.5.16.dist-info → lalamo-0.6.0.dist-info}/RECORD +43 -41
{lalamo-0.5.16.dist-info → lalamo-0.6.0.dist-info}/WHEEL +0 -0
{lalamo-0.5.16.dist-info → lalamo-0.6.0.dist-info}/entry_points.txt +0 -0
{lalamo-0.5.16.dist-info → lalamo-0.6.0.dist-info}/licenses/LICENSE +0 -0
{lalamo-0.5.16.dist-info → lalamo-0.6.0.dist-info}/top_level.txt +0 -0

lalamo/main.py CHANGED Viewed

@@ -1,20 +1,19 @@
-import json
 import random
 import re
 import shutil
 import sys
-from enum import Enum
-from itertools import chain, islice
+from contextlib import ExitStack
+from dataclasses import dataclass, field
+from functools import partial
+from itertools import islice
 from pathlib import Path
 from typing import Annotated
-import jax
 import jax.profiler
 import thefuzz.process
 from click import Context as ClickContext
 from click import Parameter as ClickParameter
 from click import ParamType
-from jaxtyping import DTypeLike
 from rich import box
 from rich.console import Console
 from rich.live import Live
@@ -23,48 +22,42 @@ from rich.progress import (
     MofNCompleteColumn,
     Progress,
     SpinnerColumn,
+    TaskID,
     TextColumn,
     TimeElapsedColumn,
     TimeRemainingColumn,
 )
+from rich.prompt import Confirm
 from rich.table import Table
-from safetensors.flax import save_file
 from typer import Argument, Context, Exit, Option, Typer
-from lalamo.common import flatten_parameters
-from lalamo.data import import_hf_parquet
+from lalamo.commands import (
+    CollectTracesCallbacks,
+    ConversionCallbacks,
+    EstimateBatchsizeCallbacks,
+    Precision,
+    TraceCallbacks,
+    TrainCallbacks,
+)
+from lalamo.commands import collect_traces as _collect_traces
+from lalamo.commands import convert as _convert
+from lalamo.commands import estimate_batchsize as _estimate_batchsize
+from lalamo.commands import trace as _trace
+from lalamo.commands import train as _train
 from lalamo.data.lalamo_completions import LalamoCompletion
 from lalamo.message_processor import UserMessage
-from lalamo.model_import import REPO_TO_MODEL, ModelMetadata, ModelSpec, import_model
-from lalamo.model_import.common import (
-    DownloadingFileEvent,
-    FinishedDownloadingFileEvent,
-    FinishedInitializingModelEvent,
-    InitializingModelEvent,
-    StatusEvent,
-)
+from lalamo.model_import import REPO_TO_MODEL, ModelSpec
+from lalamo.model_import.common import FileSpec
 from lalamo.models import ClassifierModelConfig, LanguageModelConfig
-from lalamo.modules import config_converter
-from lalamo.speculator.estimator import EstimateBatchsizeFromMemoryEvent, estimate_batchsize_from_memory
-from lalamo.speculator.inference import CollectTracesEvent, inference_collect_traces
+from lalamo.speculator.estimator import get_default_device_memory
 from lalamo.speculator.ngram import NGramSpeculator
-from lalamo.speculator.utils import (
-    SpeculatorTrainingEvent,
-    test_speculator,
-    train_speculator,
-)
+from lalamo.speculator.utils import test_speculator
 SCRIPT_NAME = Path(sys.argv[0]).name
 DEFAULT_OUTPUT_DIR = Path("models")
-class Precision(Enum):
-    FLOAT32 = "float32"
-    FLOAT16 = "float16"
-    BFLOAT16 = "bfloat16"
 console = Console()
 err_console = Console(stderr=True)
 app = Typer(
@@ -92,7 +85,7 @@ class ModelParser(ParamType):
                 f"\n\nUse the `{SCRIPT_NAME} list-models` command to see the list of currently supported models.",
             )
             error_message = "".join(error_message_parts)
-            self.fail(error_message, param, ctx)
+            return self.fail(error_message, param, ctx)
         return result
@@ -120,10 +113,18 @@ def chat(
             metavar="MODEL_PATH",
         ),
     ],
+    message: Annotated[
+        str | None,
+        Option(
+            help="Message for non-interactive mode",
+            show_default="None, run interactively",
+        ),
+    ] = None,
 ) -> None:
     with Progress(
         SpinnerColumn(),
         TextColumn("[progress.description]{task.description}"),
+        console=err_console,
         transient=True,
     ) as progress:
         loading_task = progress.add_task("🚀 [cyan]Loading model...[/cyan]")
@@ -132,21 +133,28 @@ def chat(
         warmup_task = progress.add_task("🔥 Warming up compilation cache...")
         list(model.stream_reply_text([UserMessage("")], max_output_length=1))
         progress.remove_task(warmup_task)
-    console.print(f"🤖 Chatting with [blue]{model_path}[/blue]:")
-    messages = []
-    while True:
-        user_text = console.input("[cyan]user> [/cyan]")
-        user_message = UserMessage(user_text)
-        messages.append(user_message)
-        console.print("[red]assistant> [/red]", end="")
-        model_response_tokens = []
-        for token in model.stream_reply_text(messages):
+    if message is None:
+        console.print(f"🤖 Chatting with [blue]{model_path}[/blue]:")
+        messages = []
+        while True:
+            user_text = console.input("[cyan]user> [/cyan]")
+            user_message = UserMessage(user_text)
+            messages.append(user_message)
+            console.print("[red]assistant> [/red]", end="")
+            model_response_tokens = []
+            for token in model.stream_reply_text(messages):
+                console.print(token, end="")
+                model_response_tokens.append(token)
+            console.print()
+            model_response_text = "".join(model_response_tokens)
+            messages.append(model.message_processor.parse_response(model_response_text))
+    else:
+        for token in model.stream_reply_text([UserMessage(message)]):
             console.print(token, end="")
-            model_response_tokens.append(token)
         console.print()
-        model_response_text = "".join(model_response_tokens)
-        messages.append(model.message_processor.parse_response(model_response_text))
 @app.command(help="Classify given message with a Classifier type of model.")
@@ -182,6 +190,79 @@ def classify(
         console.print()
+@dataclass
+class CliConversionCallbacks(ConversionCallbacks):
+    overwrite: bool = False
+    stack: ExitStack = field(default_factory=ExitStack)
+    progress: Progress | None = None
+    downloading_tasks: dict[FileSpec, TaskID] = field(default_factory=dict)
+    initializing_task: TaskID | None = None
+    saving_task: TaskID | None = None
+    def started(self) -> None:
+        conversion_strs = [
+            f"🚀 Converting [cyan]{self.model_spec.name}[/cyan] by [cyan]{self.model_spec.vendor}[/cyan]",
+        ]
+        if self.precision is not None:
+            conversion_strs.append(
+                f" and converting floating-point weights into [cyan]{self.precision.name.lower()}[/cyan] precision",
+            )
+        conversion_strs.append(".")
+        console.print("".join(conversion_strs))
+        self.progress = self.stack.enter_context(
+            Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                transient=True,
+            ),
+        )
+    def output_dir_exists(self) -> None:
+        if not self.overwrite and not Confirm().ask(
+            rf"⚠️ Output directory [cyan]{self.output_dir}[/cyan] already exists."
+            r" Do you want to overwrite it?",
+        ):
+            raise Exit
+        shutil.rmtree(self.output_dir)
+    def downloading(self, file_spec: FileSpec) -> None:
+        assert self.progress is not None
+        self.downloading_tasks[file_spec] = self.progress.add_task(f"Retrieving {file_spec.filename}...")
+    def finished_downloading(self, file_spec: FileSpec) -> None:
+        assert self.progress is not None
+        self.progress.remove_task(self.downloading_tasks[file_spec])
+    def initializing_model(self) -> None:
+        assert self.progress is not None
+        self.initializing_task = self.progress.add_task("Initializing model...")
+    def finished_initializing_model(self) -> None:
+        assert self.progress is not None
+        assert self.initializing_task is not None
+        self.progress.remove_task(self.initializing_task)
+    def saving_model(self) -> None:
+        assert self.progress is not None
+        self.saving_task = self.progress.add_task(f"💾 Saving the model to {self.output_dir}")
+    def finished_saving_model(self) -> None:
+        assert self.progress is not None
+        assert self.saving_task is not None
+        self.progress.remove_task(self.saving_task)
+        self.stack.close()
+        console.print(f"🧑‍🍳 Model successfully cooked and saved to [cyan]`{self.output_dir}`[/cyan]!")
 @app.command(help="Convert the model for use with the Uzu inference engine.")
 def convert(
     model_repo: Annotated[
@@ -219,104 +300,130 @@ def convert(
             show_default="Model's native maximum context length.",
         ),
     ] = None,
-    include_traces: Annotated[
-        bool,
-        Option(
-            help="Export activation traces for debugging purposes.",
-        ),
-    ] = False,
     overwrite: Annotated[
         bool,
         Option(
             help="Overwrite existing model files.",
         ),
     ] = False,
-    message_for_trace: Annotated[
-        str | None,
-        Option(
-            help="Text message to use as prompt when recording trace",
-        ),
-    ] = None,
 ) -> None:
-    if precision is not None:
-        precision_dtype = config_converter.structure(precision.value, DTypeLike)  # type: ignore
-    else:
-        precision_dtype = None
     if output_dir is None:
         output_dir = DEFAULT_OUTPUT_DIR / model_repo.name
-    conversion_strs = [f"🚀 Converting [cyan]{model_repo.name}[/cyan] by [cyan]{model_repo.vendor}[/cyan]"]
-    if precision is not None:
-        conversion_strs.append(
-            f" and converting floating-point weights into [cyan]{precision.name.lower()}[/cyan] precision",
-        )
-    conversion_strs.append(".")
-    console.print("".join(conversion_strs))
+    _convert(
+        model_repo,
+        output_dir,
+        precision,
+        context_length,
+        partial(CliConversionCallbacks, overwrite=overwrite),
+    )
-    if output_dir.exists() and not overwrite:
-        answer = console.input(
-            rf"⚠️ Output directory [cyan]{output_dir}[/cyan] already exists."
-            r" Do you want to overwrite it? [cyan]\[y/n][/cyan]: ",
-        )
-        while answer.lower() not in ["y", "n", "yes", "no"]:
-            answer = console.input("Please enter 'y' or 'n': ")
-        if answer.lower() in ["y", "yes"]:
-            shutil.rmtree(output_dir)
-        else:
-            console.print("Exiting...")
+@dataclass
+class CliTraceCallbacks(TraceCallbacks):
+    overwrite: bool = False
+    stack: ExitStack = field(default_factory=ExitStack)
+    progress: Progress | None = None
+    loading_task: TaskID | None = None
+    tracing_task: TaskID | None = None
+    saving_task: TaskID | None = None
+    def output_exists(self) -> None:
+        if not self.overwrite and not Confirm().ask(
+            rf"⚠️ Output [cyan]{self.output_path}[/cyan] already exists."
+            r" Do you want to overwrite it?",
+        ):
             raise Exit
-    message = None if message_for_trace is None else [UserMessage(content=message_for_trace)]
+        self.output_path.unlink()
-    with Progress(
-        SpinnerColumn(),
-        TextColumn("[progress.description]{task.description}"),
-        transient=True,
-    ) as progress:
-        event_to_task = {}
-        def progress_callback(event: StatusEvent) -> None:
-            match event:
-                case DownloadingFileEvent(file_spec):
-                    event_to_task[event] = progress.add_task(f"Retrieving {file_spec.filename}...")
-                case FinishedDownloadingFileEvent(file_spec):
-                    progress.remove_task(event_to_task[event])
-                case InitializingModelEvent():
-                    event_to_task[event] = progress.add_task("Initializing model...")
-                case FinishedInitializingModelEvent():
-                    progress.remove_task(event_to_task[event])
-        main_task = progress.add_task("👨‍🍳 Cooking...")
-        model, metadata = import_model(
-            model_repo,
-            precision=precision_dtype,
-            context_length=context_length,
-            progress_callback=progress_callback,
+    def started(self) -> None:
+        console.print(f"🔍 Tracing [cyan]{self.model_path}[/cyan]")
+        self.progress = self.stack.enter_context(
+            Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                transient=True,
+            ),
         )
-        save_task = progress.add_task(f"💾 Saving the model to {output_dir}")
-        output_dir.mkdir(parents=True, exist_ok=True)
-        if include_traces:
-            trace_task = progress.add_task("🚁 Generating traces...")
-            result = model.record_trace(message)
-            traces = flatten_parameters(result.export())
-            save_file(traces, output_dir / "traces.safetensors")
-            progress.remove_task(trace_task)
-        progress.remove_task(main_task)
+    def loading_model(self) -> None:
+        assert self.progress is not None
+        self.loading_task = self.progress.add_task("🧠 Loading model...")
+    def finished_loading_model(self) -> None:
+        assert self.progress is not None
+        assert self.loading_task is not None
-        model.message_processor.tokenizer.save(str(output_dir / "tokenizer.json"))
-        weights = flatten_parameters(model.export_weights())
-        del model
+        self.progress.remove_task(self.loading_task)
-        save_file(weights, output_dir / "model.safetensors")
+    def tracing_model(self) -> None:
+        assert self.progress is not None
-        config_json = config_converter.unstructure(metadata, ModelMetadata)
-        with open(output_dir / "config.json", "w") as file:
-            json.dump(config_json, file, indent=4)
-        progress.remove_task(save_task)
+        self.tracing_task = self.progress.add_task("🔍 Recording trace...")
-    console.print(f"🧑‍🍳 Model successfully cooked and saved to [cyan]`{output_dir}`[/cyan]!")
+    def finished_tracing_model(self) -> None:
+        assert self.progress is not None
+        assert self.tracing_task is not None
+        self.progress.remove_task(self.tracing_task)
+    def saving_trace(self) -> None:
+        assert self.progress is not None
+        self.saving_task = self.progress.add_task(f"💾 Saving trace to {self.output_path}")
+    def finished_saving_trace(self) -> None:
+        assert self.progress is not None
+        assert self.saving_task is not None
+        self.progress.remove_task(self.saving_task)
+        self.stack.close()
+        console.print(f"💾 Trace saved to [cyan]{self.output_path}[/cyan]")
+@app.command(help="Trace a model.")
+def trace(
+    model_path: Annotated[
+        Path,
+        Argument(
+            help="Path to the model directory.",
+            metavar="MODEL_PATH",
+        ),
+    ],
+    output_path: Annotated[
+        Path | None,
+        Option(
+            help="Path to save the trace to.",
+            show_default="${MODEL_PATH}/traces.safetensors",
+        ),
+    ] = None,
+    overwrite: Annotated[
+        bool,
+        Option(
+            help="Overwrite existing trace file.",
+        ),
+    ] = False,
+    message: Annotated[
+        str | None,
+        Option(
+            help="Text message to use as prompt when recording trace",
+        ),
+    ] = None,
+) -> None:
+    if output_path is None:
+        output_path = model_path / "traces.safetensors"
+    messages = None if message is None else [UserMessage(content=message)]
+    _trace(
+        model_path,
+        output_path,
+        messages,
+        partial(CliTraceCallbacks, overwrite=overwrite),
+    )
 def _model_size_string_to_int(
@@ -385,6 +492,41 @@ speculator_app = Typer()
 app.add_typer(speculator_app, name="speculator", help="Train a speculator for a model.")
+@dataclass
+class CliEstimateBatchsizeCallbacks(EstimateBatchsizeCallbacks):
+    stack: ExitStack = field(default_factory=ExitStack)
+    loading_task: TaskID | None = None
+    estimating_task: TaskID | None = None
+    def loading_model(self) -> None:
+        self.progress = self.stack.enter_context(
+            Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                transient=True,
+            ),
+        )
+        self.loading_task = self.progress.add_task("[cyan]Loading model...[/cyan]")
+    def finished_loading_model(self) -> None:
+        assert self.loading_task is not None
+        self.progress.remove_task(self.loading_task)
+    def estimating_batchsize(self, lo: int, hi: int | None) -> None:
+        hi_str = str(hi) if hi is not None else "?"
+        description = f"[cyan]Estimating batch size... ({lo}..{hi_str})[/cyan]"
+        if self.estimating_task is None:
+            self.estimating_task = self.progress.add_task(description)
+        else:
+            self.progress.update(self.estimating_task, description=description)
+    def finished_estimating_batchsize(self, batchsize: int) -> None:
+        if self.estimating_task is not None:
+            self.progress.remove_task(self.estimating_task)
+        self.stack.close()
+        console.print(f"Found maximum batch size: [cyan]{batchsize}[/cyan]")
 @speculator_app.command(help="Estimate maximum batch size at which a model can be run.")
 def estimate_batchsize(
     model_path: Annotated[
@@ -416,44 +558,64 @@ def estimate_batchsize(
 ) -> None:
     if vram_gb is not None:
         mem = vram_gb * 1024 * 1024 * 1024
-    else:
-        memory_stats = jax.local_devices()[0].memory_stats()
-        if memory_stats is None:
-            err_console.print("Cannot get the default device's memory stats, use --vram-gb")
-            raise Exit(1)
-        if "bytes_limit" not in memory_stats:
-            err_console.print("Cannot get the default device's bytes limit, use --vram-gb")
-            raise Exit(1)
-        mem = memory_stats["bytes_limit"]
+    elif (mem := get_default_device_memory()) is None:
+        err_console.print("Cannot get the default device's memory stats, use --vram-gb")
+        raise Exit(1)
-    with Progress(
-        SpinnerColumn(),
-        TextColumn("[progress.description]{task.description}"),
-        transient=True,
-    ) as progress:
-        loading_model_task = progress.add_task("[cyan]Loading model...[/cyan]")
-        model = LanguageModelConfig.load_model(model_path)
-        progress.remove_task(loading_model_task)
-        estimating_batchsize_task = progress.add_task("[cyan]Estimating batch size...[/cyan]")
-        def progress_callback(event: EstimateBatchsizeFromMemoryEvent) -> None:
-            lo = str(event.lo)
-            hi = str(event.hi) if event.hi is not None else "?"
-            description = f"[cyan]Estimating batch size... ({lo}..{hi})[/cyan]"
-            progress.update(estimating_batchsize_task, description=description)
-        bs = estimate_batchsize_from_memory(
-            model,
-            max_input_length,
-            max_output_length,
-            num_logits_per_token,
-            mem,
-            progress_callback,
+    callbacks_type = CliEstimateBatchsizeCallbacks
+    _estimate_batchsize(model_path, mem, max_input_length, max_output_length, num_logits_per_token, callbacks_type)
+@dataclass
+class CliCollectTracesCallbacks(CollectTracesCallbacks):
+    stack: ExitStack = field(default_factory=ExitStack)
+    live: Live | None = None
+    loading_task: TaskID | None = None
+    inference_task: TaskID | None = None
+    def loading_model(self) -> None:
+        self.live = self.stack.enter_context(Live(refresh_per_second=10))
+        self.progress = Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            transient=True,
+        )
+        self.live.update(self.progress, refresh=True)
+        self.loading_task = self.progress.add_task("🧠 [cyan]Loading model...[/cyan]")
+    def finished_loading_model(self) -> None:
+        assert self.loading_task is not None
+        self.progress.remove_task(self.loading_task)
+    def loading_dataset(self) -> None:
+        self.loading_task = self.progress.add_task("🗂️ [cyan]Loading dataset...[/cyan]")
+    def finished_loading_dataset(self) -> None:
+        assert self.loading_task is not None
+        assert self.live is not None
+        self.progress.remove_task(self.loading_task)
+        self.progress = Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            MofNCompleteColumn(),
+            TimeElapsedColumn(),
+            TimeRemainingColumn(),
         )
-        progress.remove_task(estimating_batchsize_task)
+        self.live.update(self.progress, refresh=True)
+        self.inference_task = self.progress.add_task(
+            "🔮 [cyan]Running inference...[/cyan]",
+            total=self.num_tokens_to_generate,
+        )
+    def inference_progress(self, tokens_generated: int) -> None:
+        assert self.inference_task is not None
+        self.progress.update(self.inference_task, completed=tokens_generated)
-    console.print(f"Found maximum batch size: [cyan]{bs}[/cyan]")
+    def finished_inference(self) -> None:
+        assert self.inference_task is not None
+        self.progress.update(self.inference_task, description="✅ Completed")
+        self.stack.close()
 @speculator_app.command(help="Run model inference and collect traces for speculator training")
@@ -503,55 +665,17 @@ def collect_traces(
         ),
     ] = None,
 ) -> None:
-    with Live(refresh_per_second=10) as live:
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            transient=True,
-            disable=True,
-        ) as progress:
-            live.update(progress, refresh=True)
-            loading_model_task = progress.add_task("🧠 [cyan]Loading model...[/cyan]")
-            model = LanguageModelConfig.load_model(model_path)
-            progress.remove_task(loading_model_task)
-            loading_dataset_task = progress.add_task("🗂️ [cyan]Loading dataset...[/cyan]")
-            dataset = iter(import_hf_parquet(dataset_path))
-            dataset = chain([next(dataset)], dataset)  # iterator is lazy, force it to actually open the file
-            progress.remove_task(loading_dataset_task)
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            MofNCompleteColumn(),
-            TimeElapsedColumn(),
-            TimeRemainingColumn(),
-            disable=True,
-        ) as progress:
-            live.update(progress, refresh=True)
-            inference_task = progress.add_task("🔮 [cyan]Running inference...[/cyan]", total=num_tokens_to_generate)
-            def progress_callback(event: CollectTracesEvent) -> None:
-                progress.update(inference_task, completed=event.tokens_generated)
-            traces = inference_collect_traces(
-                model,
-                dataset,
-                num_logits_per_token,
-                batch_size,
-                max_input_length,
-                max_output_length,
-                num_tokens_to_generate,
-                progress_callback,
-            )
-            output_path.parent.mkdir(parents=True, exist_ok=True)
-            with open(output_path, "wb+") as output_fd:
-                for trace in traces:
-                    blob = trace.serialize()
-                    output_fd.write(blob)
-            progress.update(inference_task, description="✅ Completed")
+    _collect_traces(
+        model_path,
+        dataset_path,
+        output_path,
+        num_logits_per_token,
+        max_input_length,
+        max_output_length,
+        batch_size,
+        num_tokens_to_generate,
+        CliCollectTracesCallbacks,
+    )
 @speculator_app.command(help="View model inference traces")
@@ -597,6 +721,43 @@ def view_traces(
         console.print(table)
+@dataclass
+class CliTrainCallbacks(TrainCallbacks):
+    stack: ExitStack = field(default_factory=ExitStack)
+    training_task: TaskID | None = None
+    def started(self) -> None:
+        self.progress = self.stack.enter_context(
+            Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                MofNCompleteColumn(),
+                TimeElapsedColumn(),
+                TimeRemainingColumn(),
+            ),
+        )
+        self.training_task = self.progress.add_task(
+            "🔮 [cyan]Training speculator...[/cyan]",
+            total=self.subsample_size,
+        )
+    def training_progress(self, trained_tokens: int) -> None:
+        assert self.training_task is not None
+        self.progress.update(self.training_task, completed=trained_tokens)
+    def finished_training(self) -> None:
+        assert self.training_task is not None
+        self.progress.update(self.training_task, description="✅ Completed")
+        self.progress.remove_task(self.training_task)
+        self.stack.close()
+    def saving_speculator(self) -> None:
+        pass
+    def finished_saving_speculator(self) -> None:
+        console.print(f"💾 Speculator saved to [cyan]{self.output_path}[/cyan]")
 @speculator_app.command(help="Train a speculator from inference traces")
 def train(
     trace_path: Annotated[
@@ -633,30 +794,15 @@ def train(
         ),
     ] = None,
 ) -> None:
-    with open(trace_path, "rb") as trace_fd:
-        traces = LalamoCompletion.deserialize_many(trace_fd)
-        speculator = NGramSpeculator.new(hashtable_size, num_logits_per_token, ngram_size)
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            MofNCompleteColumn(),
-            TimeElapsedColumn(),
-            TimeRemainingColumn(),
-        ) as progress:
-            inference_task = progress.add_task("🔮 [cyan]Training speculator...[/cyan]", total=subsample_size)
-            def progress_callback(event: SpeculatorTrainingEvent) -> None:
-                progress.update(inference_task, completed=event.trained_tokens)
-            train_speculator(speculator, traces, subsample_size, progress_callback)
-            progress.update(inference_task, description="✅ Completed")
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    with open(output_path, "wb+") as fd:
-        fd.write(speculator.serialize())
+    _train(
+        trace_path,
+        output_path,
+        hashtable_size,
+        num_logits_per_token,
+        ngram_size,
+        subsample_size,
+        CliTrainCallbacks,
+    )
 @speculator_app.command(help="Run speculator as an autoregressive llm")

lalamo 0.5.16__py3-none-any.whl → 0.6.0__py3-none-any.whl

lalamo 0.5.16py3-none-any.whl → 0.6.0py3-none-any.whl