PyPI - flashspec - Versions diffs - 0.1.0__py3-none-any.whl - Mend

flashspec 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

flashspec/__init__.py +43 -0
flashspec/bandit/__init__.py +14 -0
flashspec/bandit/base.py +402 -0
flashspec/bandit/oracle.py +181 -0
flashspec/bandit/thompson.py +178 -0
flashspec/bandit/ucb.py +175 -0
flashspec/engine/__init__.py +15 -0
flashspec/engine/drafter.py +247 -0
flashspec/engine/speculative.py +257 -0
flashspec/engine/verifier.py +205 -0
flashspec/export/__init__.py +5 -0
flashspec/export/onnx.py +113 -0
flashspec/kernels/__init__.py +18 -0
flashspec/kernels/_reference.py +196 -0
flashspec/kernels/gather_kernel.py +136 -0
flashspec/kernels/verify_kernel.py +228 -0
flashspec/metrics/__init__.py +11 -0
flashspec/metrics/acceptance.py +175 -0
flashspec/metrics/latency.py +234 -0
flashspec/metrics/throughput.py +249 -0
flashspec/py.typed +0 -0
flashspec/sampling/__init__.py +9 -0
flashspec/sampling/rejection.py +235 -0
flashspec/sampling/typical.py +138 -0
flashspec/utils/__init__.py +20 -0
flashspec/utils/config.py +159 -0
flashspec/utils/device.py +165 -0
flashspec/utils/logging.py +117 -0
flashspec-0.1.0.dist-info/METADATA +331 -0
flashspec-0.1.0.dist-info/RECORD +32 -0
flashspec-0.1.0.dist-info/WHEEL +4 -0
flashspec-0.1.0.dist-info/licenses/LICENSE +117 -0

flashspec/__init__.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""FlashSpec — Adaptive speculative-decoding inference engine.
+Adaptive speculative-decoding inference engine with Triton-optimised
+verification and online bandit draft selection.
+Public API surface (AGENTS.md §13.2 — do not modify without explicit approval):
+    flashspec.SpeculativeEngine
+    flashspec.GenerationResult
+    flashspec.FlashSpecConfig
+    flashspec.BanditConfig
+    flashspec.SamplingConfig
+    flashspec.MetricsConfig
+    flashspec.register          (draft model decorator)
+    flashspec.get_drafter
+    flashspec.list_drafters
+References
+----------
+.. [1] Leviathan et al. (2023), "Fast Inference from Transformers via
+   Speculative Decoding", arXiv:2211.17192.
+.. [2] Myet (2025), "FlashSpec: Adaptive Speculative Decoding with Online
+   Bandit Draft Selection and Triton-Optimised Verification".
+"""
+from flashspec.engine.drafter import get_drafter, list_drafters, register
+from flashspec.engine.speculative import GenerationResult, SpeculativeEngine
+from flashspec.utils.config import BanditConfig, FlashSpecConfig, MetricsConfig, SamplingConfig
+__all__ = [
+    "BanditConfig",
+    "FlashSpecConfig",
+    "GenerationResult",
+    "MetricsConfig",
+    "SamplingConfig",
+    "SpeculativeEngine",
+    "get_drafter",
+    "list_drafters",
+    "register",
+]
+__version__ = "0.1.0"
+__author__ = "Min Htet Myet (Mattral)"

flashspec/bandit/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Online bandit draft selector sub-package."""
+from flashspec.bandit.base import ArmStats, DraftSelector
+from flashspec.bandit.oracle import OracleSelector
+from flashspec.bandit.thompson import ThompsonSelector
+from flashspec.bandit.ucb import UCB1Selector
+__all__ = [
+    "ArmStats",
+    "DraftSelector",
+    "OracleSelector",
+    "ThompsonSelector",
+    "UCB1Selector",
+]

flashspec/bandit/base.py ADDED Viewed

@@ -0,0 +1,402 @@
+"""Abstract base class for online bandit draft selectors.
+All concrete selectors (UCB1, Thompson, Oracle) inherit from ``DraftSelector``
+and must honour its JSON serialisation and thread-safety contracts.
+"""
+from __future__ import annotations
+import json
+import threading
+from abc import ABC, abstractmethod
+from collections import deque
+from dataclasses import dataclass, field
+from typing import Any
+__all__ = ["DraftSelector", "ArmStats"]
+# ── Value object for per-arm statistics ───────────────────────────────────────
+@dataclass(slots=True, frozen=False)
+class ArmStats:
+    """Mutable per-arm statistics used by bandit selectors.
+    Parameters
+    ----------
+    n_pulls : int
+        Total number of times this arm has been selected.
+    n_accepted : int
+        Total number of accepted tokens attributed to this arm.
+    window_accepts : deque[int]
+        Rolling window of per-round accept counts (1 or 0) for windowed stats.
+    window_size : int
+        Maximum size of the rolling window.  0 disables windowing.
+    """
+    n_pulls: int = 0
+    n_accepted: int = 0
+    window_accepts: deque[int] = field(default_factory=deque)
+    window_size: int = 500
+    def record(self, accepted: int) -> None:
+        """Record the outcome of one round for this arm.
+        Parameters
+        ----------
+        accepted : int
+            Number of tokens accepted in this round (typically 0 or 1).
+        Returns
+        -------
+        None
+        Notes
+        -----
+        When ``window_size > 0`` the oldest entry is evicted once the window
+        is full, so ``mean_accept_rate`` reflects only the most recent
+        ``window_size`` rounds.
+        Examples
+        --------
+        >>> stats = ArmStats(window_size=100)
+        >>> stats.record(accepted=1)
+        >>> stats.n_pulls
+        1
+        """
+        self.n_pulls += 1
+        self.n_accepted += accepted
+        if self.window_size > 0:
+            self.window_accepts.append(accepted)
+            if len(self.window_accepts) > self.window_size:
+                self.window_accepts.popleft()
+    @property
+    def mean_accept_rate(self) -> float:
+        """Mean acceptance rate, optionally windowed.
+        Returns
+        -------
+        float
+            Windowed mean if ``window_size > 0`` and there are observations,
+            else global mean, else 0.0.
+        Notes
+        -----
+        When windowing is enabled (``window_size > 0``) the rate reflects
+        only the last ``window_size`` rounds, allowing the bandit to track
+        non-stationary acceptance distributions.
+        Examples
+        --------
+        >>> stats = ArmStats(window_size=0)
+        >>> stats.record(1); stats.record(0)
+        >>> stats.mean_accept_rate
+        0.5
+        """
+        if self.window_size > 0 and self.window_accepts:
+            return sum(self.window_accepts) / len(self.window_accepts)
+        if self.n_pulls > 0:
+            return self.n_accepted / self.n_pulls
+        return 0.0
+    def to_dict(self) -> dict[str, Any]:
+        """Serialise to a JSON-compatible dict.
+        Returns
+        -------
+        dict[str, Any]
+            Dictionary with keys ``n_pulls``, ``n_accepted``,
+            ``window_accepts``, and ``window_size``.
+        Notes
+        -----
+        The returned dict can be passed directly to :meth:`from_dict` to
+        reconstruct an identical ``ArmStats`` instance.
+        Examples
+        --------
+        >>> stats = ArmStats(n_pulls=5, n_accepted=3, window_size=10)
+        >>> d = stats.to_dict()
+        >>> d["n_pulls"]
+        5
+        """
+        return {
+            "n_pulls": self.n_pulls,
+            "n_accepted": self.n_accepted,
+            "window_accepts": list(self.window_accepts),
+            "window_size": self.window_size,
+        }
+    @classmethod
+    def from_dict(cls, d: dict[str, Any]) -> "ArmStats":
+        """Deserialise from a dict produced by :meth:`to_dict`.
+        Parameters
+        ----------
+        d : dict[str, Any]
+            Dictionary as returned by :meth:`to_dict`.
+        Returns
+        -------
+        ArmStats
+            Reconstructed instance with identical statistics.
+        Notes
+        -----
+        The ``window_accepts`` deque is reconstructed with the original
+        ``window_size`` as its ``maxlen``.
+        Examples
+        --------
+        >>> stats = ArmStats(window_size=50)
+        >>> stats.record(1)
+        >>> restored = ArmStats.from_dict(stats.to_dict())
+        >>> restored.n_pulls == stats.n_pulls
+        True
+        """
+        obj = cls(
+            n_pulls=d["n_pulls"],
+            n_accepted=d["n_accepted"],
+            window_size=d["window_size"],
+        )
+        obj.window_accepts = deque(
+            d["window_accepts"], maxlen=d["window_size"] or None
+        )
+        return obj
+# ── Abstract selector ─────────────────────────────────────────────────────────
+class DraftSelector(ABC):
+    """Abstract base class for online bandit draft-model selectors.
+    Subclasses implement :meth:`select` and :meth:`update`.  All methods are
+    thread-safe via a per-instance ``threading.Lock``.
+    Parameters
+    ----------
+    n_arms : int
+        Number of draft-model arms.
+    window_size : int
+        Rolling window size for acceptance statistics (0 = disabled).
+    Raises
+    ------
+    ValueError
+        If ``n_arms`` < 1 or ``window_size`` < 0.
+    Notes
+    -----
+    The selector maintains one :class:`ArmStats` object per arm.
+    The internal round counter ``t`` counts total calls to :meth:`update`.
+    All public methods acquire ``self._lock`` before mutating state so that
+    multiple generation workers can share a single selector safely.
+    Examples
+    --------
+    >>> selector = UCB1Selector(n_arms=3, window_size=200)
+    >>> arm = selector.select()
+    >>> selector.update(arm, accepted=1)
+    """
+    def __init__(self, n_arms: int, window_size: int = 500) -> None:
+        if n_arms < 1:
+            raise ValueError(f"n_arms must be >= 1; got {n_arms}.")
+        if window_size < 0:
+            raise ValueError(f"window_size must be >= 0; got {window_size}.")
+        self._n_arms = n_arms
+        self._window_size = window_size
+        self._arms: list[ArmStats] = [
+            ArmStats(window_size=window_size) for _ in range(n_arms)
+        ]
+        self._t: int = 0
+        self._lock = threading.Lock()
+    # ── Public interface ───────────────────────────────────────────────────
+    @property
+    def n_arms(self) -> int:
+        """Number of arms.
+        Returns
+        -------
+        int
+            Count of available draft-model arms.
+        Notes
+        -----
+        Fixed at construction time; cannot be changed after initialisation.
+        Examples
+        --------
+        >>> selector = UCB1Selector(n_arms=3)
+        >>> selector.n_arms
+        3
+        """
+        return self._n_arms
+    @property
+    def t(self) -> int:
+        """Total rounds elapsed (equal to the number of :meth:`update` calls).
+        Returns
+        -------
+        int
+            Non-negative integer round counter.
+        Notes
+        -----
+        Resets to 0 after :meth:`reset` is called.
+        Examples
+        --------
+        >>> selector = UCB1Selector(n_arms=2)
+        >>> selector.update(0, accepted=1)
+        >>> selector.t
+        1
+        """
+        return self._t
+    @abstractmethod
+    def select(self) -> int:
+        """Select an arm index to pull.
+        Returns
+        -------
+        int
+            Index in ``[0, n_arms)``.
+        Notes
+        -----
+        Implementations must be thread-safe (acquire ``self._lock`` around
+        any read-modify-write on shared state).
+        Examples
+        --------
+        >>> arm = selector.select()
+        >>> assert 0 <= arm < selector.n_arms
+        """
+    @abstractmethod
+    def update(self, arm: int, accepted: int) -> None:
+        """Record the outcome of pulling an arm.
+        Parameters
+        ----------
+        arm : int
+            Index of the arm that was pulled.
+        accepted : int
+            Number of tokens accepted in this round.
+        Raises
+        ------
+        ValueError
+            If ``arm`` is not in ``[0, n_arms)``.
+        Notes
+        -----
+        Increments the internal round counter ``t`` and delegates to
+        ``self._arms[arm].record(accepted)``.
+        Examples
+        --------
+        >>> selector.update(0, accepted=1)
+        """
+    def reset(self) -> None:
+        """Reset all arm statistics and the round counter to zero.
+        Returns
+        -------
+        None
+        Notes
+        -----
+        Intended for per-context-window resets when the prompt distribution
+        shifts and accumulated statistics are no longer representative.
+        Thread-safe: acquires ``self._lock`` before mutating state.
+        Examples
+        --------
+        >>> selector.reset()
+        >>> selector.t
+        0
+        """
+        with self._lock:
+            self._arms = [
+                ArmStats(window_size=self._window_size)
+                for _ in range(self._n_arms)
+            ]
+            self._t = 0
+    def to_json(self) -> str:
+        """Serialise bandit state to a JSON string.
+        Returns
+        -------
+        str
+            Compact JSON-encoded bandit state suitable for checkpointing.
+        Notes
+        -----
+        Thread-safe: acquires ``self._lock`` before reading state.
+        The returned string can be passed to :meth:`from_json` on any
+        concrete subclass to reconstruct an identical instance.
+        Examples
+        --------
+        >>> state_json = selector.to_json()
+        >>> selector2 = UCB1Selector.from_json(state_json)
+        """
+        with self._lock:
+            return json.dumps(self._state_dict(), separators=(",", ":"))
+    @classmethod
+    def from_json(cls, json_str: str) -> "DraftSelector":
+        """Restore bandit state from a JSON string produced by :meth:`to_json`.
+        Parameters
+        ----------
+        json_str : str
+            JSON string previously produced by :meth:`to_json`.
+        Returns
+        -------
+        DraftSelector
+            Restored selector instance with identical state.
+        Raises
+        ------
+        ValueError
+            If ``json_str`` is not valid JSON or is missing required fields.
+        Notes
+        -----
+        Delegates to the concrete subclass's :meth:`_from_state_dict` method.
+        The subclass is determined by the ``"type"`` key in the JSON object.
+        Examples
+        --------
+        >>> json_str = selector.to_json()
+        >>> restored = UCB1Selector.from_json(json_str)
+        >>> restored.t == selector.t
+        True
+        """
+        try:
+            state = json.loads(json_str)
+        except json.JSONDecodeError as exc:
+            raise ValueError(f"Invalid JSON for bandit state: {exc}") from exc
+        return cls._from_state_dict(state)
+    # ── Subclass hooks ─────────────────────────────────────────────────────
+    @abstractmethod
+    def _state_dict(self) -> dict[str, Any]:
+        """Return a JSON-serialisable dict of all state."""
+    @classmethod
+    @abstractmethod
+    def _from_state_dict(cls, state: dict[str, Any]) -> "DraftSelector":
+        """Restore an instance from a state dict."""

flashspec/bandit/oracle.py ADDED Viewed

@@ -0,0 +1,181 @@
+"""Oracle bandit selector — upper-bound baseline for regret experiments.
+The Oracle always picks the arm with the highest *true* acceptance rate,
+which must be supplied externally.  It is used only to compute the regret
+upper bound in experiments; it is never used in production inference.
+"""
+from __future__ import annotations
+from typing import Any
+from flashspec.bandit.base import ArmStats, DraftSelector
+from flashspec.utils.logging import get_logger
+__all__ = ["OracleSelector"]
+logger = get_logger(__name__)
+class OracleSelector(DraftSelector):
+    """Oracle bandit selector that always picks the true best arm.
+    Requires ground-truth acceptance rates to be provided at construction
+    time and updated via :meth:`set_true_rates`.  Used only in regret
+    upper-bound experiments — never in production inference.
+    Parameters
+    ----------
+    n_arms : int
+        Number of draft-model arms.
+    true_rates : list[float]
+        Ground-truth acceptance rate for each arm.  Must have length ``n_arms``
+        with values in ``[0, 1]``.
+    window_size : int
+        Rolling window for acceptance statistics.
+    Raises
+    ------
+    ValueError
+        If ``len(true_rates) != n_arms`` or any rate is outside ``[0, 1]``.
+    Notes
+    -----
+    The oracle's cumulative reward serves as the upper bound for regret
+    calculations in ``tests/unit/test_bandit.py``.
+    Examples
+    --------
+    >>> selector = OracleSelector(n_arms=2, true_rates=[0.6, 0.9])
+    >>> selector.select()
+    1
+    """
+    def __init__(
+        self,
+        n_arms: int,
+        true_rates: list[float],
+        window_size: int = 500,
+    ) -> None:
+        if len(true_rates) != n_arms:
+            raise ValueError(
+                f"len(true_rates) must equal n_arms={n_arms}; "
+                f"got {len(true_rates)}."
+            )
+        for i, r in enumerate(true_rates):
+            if not (0.0 <= r <= 1.0):
+                raise ValueError(
+                    f"true_rates[{i}]={r} is outside [0, 1]."
+                )
+        super().__init__(n_arms=n_arms, window_size=window_size)
+        self._true_rates: list[float] = list(true_rates)
+    def select(self) -> int:
+        """Return the arm index with the highest true acceptance rate.
+        Returns
+        -------
+        int
+            Arm index in ``[0, n_arms)``.
+        Notes
+        -----
+        The oracle has perfect knowledge of ``true_rates`` and always picks
+        ``argmax(true_rates)``.  It serves as the regret upper bound in
+        experiments; it is never used in production inference.
+        Examples
+        --------
+        >>> OracleSelector(n_arms=2, true_rates=[0.4, 0.8]).select()
+        1
+        """
+        with self._lock:
+            return int(max(range(self._n_arms), key=lambda k: self._true_rates[k]))
+    def update(self, arm: int, accepted: int) -> None:
+        """Record outcome (used for regret tracking only; does not affect selection).
+        Parameters
+        ----------
+        arm : int
+            Arm index that was pulled.
+        accepted : int
+            Number of accepted tokens.
+        Raises
+        ------
+        ValueError
+            If ``arm`` is not in ``[0, n_arms)``.
+        Notes
+        -----
+        The oracle's selection policy is independent of observed outcomes;
+        it always selects the arm with the highest ``true_rates``.  This
+        method records statistics only so that cumulative regret can be
+        computed from arm pull counts.
+        Examples
+        --------
+        >>> selector.update(1, accepted=1)
+        """
+        if not (0 <= arm < self._n_arms):
+            raise ValueError(f"arm must be in [0, {self._n_arms}); got {arm}.")
+        with self._lock:
+            self._arms[arm].record(accepted)
+            self._t += 1
+    def set_true_rates(self, true_rates: list[float]) -> None:
+        """Update ground-truth acceptance rates (for non-stationary experiments).
+        Parameters
+        ----------
+        true_rates : list[float]
+            New ground-truth rates.  Must have the same length as ``n_arms``
+            and all values in ``[0, 1]``.
+        Raises
+        ------
+        ValueError
+            If length or values are invalid.
+        Notes
+        -----
+        Thread-safe: acquires ``self._lock`` before mutating state.
+        Used in chaos tests to simulate a sudden swap of best/worst arm,
+        verifying that adaptive bandits (UCB1, Thompson) recover.
+        Examples
+        --------
+        >>> selector.set_true_rates([0.9, 0.4])  # swap best/worst arm
+        """
+        if len(true_rates) != self._n_arms:
+            raise ValueError(
+                f"len(true_rates) must equal n_arms={self._n_arms}; "
+                f"got {len(true_rates)}."
+            )
+        for i, r in enumerate(true_rates):
+            if not (0.0 <= r <= 1.0):
+                raise ValueError(f"true_rates[{i}]={r} is outside [0, 1].")
+        with self._lock:
+            self._true_rates = list(true_rates)
+    def _state_dict(self) -> dict[str, Any]:
+        return {
+            "type": "oracle",
+            "n_arms": self._n_arms,
+            "window_size": self._window_size,
+            "true_rates": self._true_rates,
+            "t": self._t,
+            "arms": [a.to_dict() for a in self._arms],
+        }
+    @classmethod
+    def _from_state_dict(cls, state: dict[str, Any]) -> "OracleSelector":
+        obj = cls(
+            n_arms=state["n_arms"],
+            true_rates=state["true_rates"],
+            window_size=state["window_size"],
+        )
+        obj._t = state["t"]
+        obj._arms = [ArmStats.from_dict(d) for d in state["arms"]]
+        return obj