PyPI - llm-compression - Versions diffs - 0.1.0__tar.gz → 0.1.2__tar.gz - Mend

llm-compression 0.1.0tar.gz → 0.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{llm_compression-0.1.0 → llm_compression-0.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llm-compression
-Version: 0.1.0
+Version: 0.1.2
 Summary: Arithmetic compression using Llama LLM models
 License: GPL3
 Author: OdoctorG

{llm_compression-0.1.0 → llm_compression-0.1.2}/llm_compression/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 from .arithmetic_coding import *
 from .llama_model import *
-from .probability_model import *
+from .probability_model import *

{llm_compression-0.1.0 → llm_compression-0.1.2}/llm_compression/arithmetic_coding.py RENAMED Viewed

@@ -1,14 +1,26 @@
 import numpy as np
 import math
-from probability_model import ProbabilityModel
+from .probability_model import ProbabilityModel
+def encode(input_arr: np.ndarray, model: ProbabilityModel) -> list[int]:
+    """
+    Encodes an input array of symbols into a list of bits using arithmetic coding.
+    Args:
+        input_arr (np.ndarray): An array of input symbols to encode.
+        model (ProbabilityModel): The probability model used to get the symbol probabilities.
+    Returns:
+        list[int]: A list of bits representing the encoded input symbols.
+    """
-def encode(input_arr: np.ndarray, model: ProbabilityModel):
     BIT_PRECISION = 64
     MAX = (1 << BIT_PRECISION) - 1
     HALF = 1 << (BIT_PRECISION - 1)
     QUARTER = 1 << (BIT_PRECISION - 2)
     low = 0
     high = MAX
     output = []
@@ -17,18 +29,18 @@ def encode(input_arr: np.ndarray, model: ProbabilityModel):
     for symbol in input_arr:
         symbols, cdfs = model.get_prob(input_arr[:k])
-        symbol_idx = np.where(symbols == symbol)[0] #symbols.index(symbol)
+        symbol_idx = np.where(symbols == symbol)[0]  # symbols.index(symbol)
         # Calculate probability bounds using floating-point CDFs
-        cdf_low = cdfs[symbol_idx-1][0] if symbol_idx > 0 else 0.0
+        cdf_low = cdfs[symbol_idx - 1][0] if symbol_idx > 0 else 0.0
         cdf_high = cdfs[symbol_idx][0]
         # Convert to integer ranges with careful rounding
         range_size = high - low + 1
         new_low = low + math.floor(cdf_low * range_size)
         new_high = low + math.ceil(cdf_high * range_size) - 1
         low, high = new_low, new_high
         # Interval scaling and bit emission
@@ -64,7 +76,22 @@ def encode(input_arr: np.ndarray, model: ProbabilityModel):
     return output
-def decode(encoded_bits: np.ndarray, model: ProbabilityModel, num_symbols: int):
+def decode(
+    encoded_bits: np.ndarray, model: ProbabilityModel, num_symbols: int
+) -> np.ndarray:
+    """
+    Decodes a list of encoded bits into an array of symbols using arithmetic decoding.
+    Args:
+        encoded_bits (np.ndarray): Numpy array of bits representing the encoded input symbols.
+        model (ProbabilityModel): The probability model used to get the symbol probabilities.
+        num_symbols (int): The number of symbols in encoded in the input array.
+    Returns:
+        np.ndarray: Array of decoded symbols.
+    """
     import math
     import bisect
@@ -135,16 +162,16 @@ def decode(encoded_bits: np.ndarray, model: ProbabilityModel, num_symbols: int):
     return decoded
 # Testing
 if __name__ == "__main__":
     from llama_model import LlamaModel
     from probability_model import StaticModel
-    model = StaticModel(3, ['a', 'b', 'c'], [0.4, 0.3, 0.3])
+    model = StaticModel(3, ["a", "b", "c"], [0.4, 0.3, 0.3])
     test_str = "abcabc"
     print(len(test_str), " symbols")
     test_arr = np.asarray([test_str[i] for i in range(len(test_str))])
     print(test_arr)
     encoded_bin = encode(test_arr, model)
@@ -153,18 +180,20 @@ if __name__ == "__main__":
     decoded = decode(encoded_bin, model, len(test_arr))
     print(decoded)
     print("LLM TEST")
-    model = LlamaModel(top_p=0.99, max_context=50)
+    model = LlamaModel(
+        model_path="../Llama-3.2-1B-Instruct-Q4_K_M.gguf", top_p=0.99, max_context=50
+    )
     wiki_str = "Weissman var på 1920-talet en av Finlands mest kända kuplettsångare och var en mycket aktiv skådespelare med både operetter och lustspel på sin repertoar. Hans skådespelarkarriär inleddes omkring 1913 och varade fram till 1930-talet. Under den tiden var han verksam vid flera teatrar och skådespelarensembler. Som kuplettsångare uppträdde han på biografer, kaféer och restauranger runt om i landet. På 1910- och 1920-talen gjorde han en stor mängd skivinspelningar och var en aktiv sångare under grammofonfebern 1929. När kuplettgenren gick ur mode på slutet av 1920-talet försökte Weissman anpassa sig till schlagermusiken, men övergav inom kort den konstnärliga banan för att ägna sig åt reklamverksamhet och diverse affärer"
     wiki_str_short = "Weissman var på 1920-talet en av Finlands mest kända kuplettsångare och var en mycket aktiv skådespelare med både operetter och lustspel på sin repertoar."
     wiki_str_short2 = "The building began as a movie theater in 1973, was converted into the Jet Set nightclub in 1994, and underwent renovations in 2010 and 2015"
-    prompt = wiki_str_short2.encode('utf-8')
+    prompt = wiki_str_short2.encode("utf-8")
     prompt_tkn = np.asarray(model.tokenize(prompt))
     print(len(prompt_tkn), " symbols")
     encoded_bin = encode(prompt_tkn, model)
     print(len(encoded_bin), " bits in encoding")
     decoded = decode(encoded_bin, model, len(prompt_tkn))
     outstr = model.detokenize(decoded)
-    print(outstr.decode('utf-8'))
+    print(outstr.decode("utf-8"))

{llm_compression-0.1.0 → llm_compression-0.1.2}/llm_compression/llama_model.py RENAMED Viewed

@@ -2,10 +2,35 @@ from llama_cpp import Llama
 import numpy as np
 import time
-from probability_model import ProbabilityModel
+from .probability_model import ProbabilityModel
 class LlamaModel(ProbabilityModel):
-    def __init__(self, model_path: str = "Llama-3.2-1B-Instruct-Q4_K_M.gguf", top_p: float = 0.99, max_context: int = 50):
+    def __init__(
+        self,
+        model_path: str,
+        top_p: float = 0.99,
+        max_context: int = 50,
+    ):
+        """
+        Initialize a LlamaModel.
+        Parameters
+        ----------
+        model_path : str
+            File path to the LLaMA model .gguf file.
+        top_p : float, optional
+            The top [0, 1] percentage of the most likely tokens to consider when computing the probability distribution.
+            Higher values will generally result in better compression for sequences that the LLM can easily predict.
+        max_context : int, optional
+            The maximum number of tokens to keep in the model's context. Higher values will generally lead to better compression but slower performance.
+        Raises
+        ------
+        ValueError
+            If the provided max_context is too large for the model.
+        """
         t1 = time.perf_counter()
         self.llm = Llama(
             model_path=model_path,
@@ -15,7 +40,9 @@ class LlamaModel(ProbabilityModel):
             verbose=False,
         )
         if self.llm.n_ctx() < max_context:
-            raise ValueError(f"Provided max_context is too large for the model. Provided max_context is {max_context}, but model max context is {self.llm.n_ctx}")
+            raise ValueError(
+                f"Provided max_context is too large for the model. Provided max_context is {max_context}, but model max context is {self.llm.n_ctx}"
+            )
         t2 = time.perf_counter()
         print(f"Model loaded in {t2 - t1} seconds")
         self.N = self.llm.n_vocab()
@@ -25,9 +52,24 @@ class LlamaModel(ProbabilityModel):
         super().__init__(self.N)
     def get_prob(self, prior_symbols: np.ndarray[int]) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Get cumalitive probability distribution of the next token given the prior tokens.
+        Parameters
+        ----------
+        prior_symbols : np.ndarray[int]
+            The sequence of prior tokens.
+        Returns
+        -------
+        (tokens, cdfs)
+            tokens : np.ndarray[int]
+                The symbols in descending order of probability.
+            cdfs : np.ndarray[float]
+                The cumulative probabilities of the tokens in the same order.
+        """
         print(f"Prior symbols: {len(prior_symbols)}")
         # If no prior tokens, return uniform distribution and clear cache
         if len(prior_symbols) == 0:
             self.reset()
@@ -40,16 +82,16 @@ class LlamaModel(ProbabilityModel):
                 tokens[token_id] = token_id
                 cdfs[token_id] = cumulative
             return tokens, cdfs
         # If there are more symbols cached than context, clear oldest half of cache
         if len(self.cache) >= self.max_context:
-            #self.reset()
-            self.cache = self.cache[self.max_context // 2:]
+            # self.reset()
+            self.cache = self.cache[self.max_context // 2 :]
             self.llm.reset()
             # evaluate what is left of cache
             self.llm.eval(self.cache)
             print("Cache cleared")
         # Evaluate latest token
         t1 = time.perf_counter()
         self.llm.eval([prior_symbols[-1]])
@@ -62,12 +104,12 @@ class LlamaModel(ProbabilityModel):
         probs /= probs.sum()
         t4 = time.perf_counter()
         # Get cdf distribution of 90% most likely tokens
         ts1 = time.perf_counter()
         topk = np.argsort(-probs)
         ts2 = time.perf_counter()
         tokens = np.zeros(self.N, dtype=np.int64)
         cdfs = np.zeros(self.N, dtype=np.float64)
@@ -76,9 +118,9 @@ class LlamaModel(ProbabilityModel):
         # Compute cumulative probabilities
         cum_probs = np.cumsum(probs_sorted)
         # Find cutoff index of top_p probability
-        cutoff_index = np.searchsorted(cum_probs, self.top_p, side='right')
+        cutoff_index = np.searchsorted(cum_probs, self.top_p, side="right")
         # Get slice of topk
-        topk_slice = topk[:cutoff_index+1]
+        topk_slice = topk[: cutoff_index + 1]
         n_topk = cutoff_index + 1
         tokens[:n_topk] = topk_slice
@@ -92,39 +134,58 @@ class LlamaModel(ProbabilityModel):
         n_remaining = len(remaining_tokens)
         if n_remaining > 0:
             uniform_prob = (1.0 - cum_probs[cutoff_index]) / n_remaining
-            tokens[n_topk:n_topk + n_remaining] = remaining_tokens
-            cdfs[n_topk:n_topk + n_remaining] = (
+            tokens[n_topk : n_topk + n_remaining] = remaining_tokens
+            cdfs[n_topk : n_topk + n_remaining] = (
                 uniform_prob * np.arange(1, n_remaining + 1) + cum_probs[cutoff_index]
             )
         t5 = time.perf_counter()
         # Cache new token
         self.cache.append(prior_symbols[-1])
         # returns sorted tokens and cdfs
         return (tokens, cdfs)
-    def reset(self):
+    def reset(self) -> None:
+        """ Clear cache and reset LLM. Needed when starting a new compression/decrompression """
         self.cache = []
         self.llm.reset()
-        print("Cache cleared")
     def tokenize(self, text: bytes) -> list[int]:
+        """
+        Tokenize a string of bytes into a sequence of token IDs.
+        This function is a wrapper around Llama's `tokenize` method without adding the BOS token.
+        Parameters
+        ----------
+        text : bytes
+            The string of bytes to tokenize.
+        Returns
+        -------
+        list[int]
+            A list of token IDs.
+        """
         return self.llm.tokenize(text, add_bos=False)
     def detokenize(self, tokens: list[int]) -> bytes:
-        return self.llm.detokenize(tokens)
+        """
+        Convert a sequence of token IDs back into a string of bytes.
+        This function is a wrapper around Llama's `detokenize` method.
+        Parameters
+        ----------
+        tokens : list[int]
+            A list of token IDs to be converted back into bytes.
-# Testing
-if __name__ == "__main__":
-    prompt = "The capital of France is".encode('utf-8')
+        Returns
+        -------
+        bytes
+            The original string of bytes.
+        """
-    model = LlamaModel()
-    prompt_tkn = model.tokenize(prompt)
-    tokens, cdfs = model.get_prob(prompt_tkn)
-    #print(tokens[0])
-    for i in range(10):
-        print(tokens[i])
-        print(model.detokenize([tokens[i]]), cdfs[i])
+        return self.llm.detokenize(tokens)

{llm_compression-0.1.0 → llm_compression-0.1.2}/llm_compression/probability_model.py RENAMED Viewed

@@ -1,14 +1,15 @@
 import numpy as np
 class ProbabilityModel:
     def __init__(self, N):
         self.N = N
     def get_prob(self, prior_symbols: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
         """
         Args:
             prior_symbols: numpy array of previous symbols
         Returns:
             tokens: numpy array of symbols in descending order of probability
             cdfs: numpy array of the cumulative probabilities of the tokens in the same order
@@ -24,39 +25,42 @@ class StaticModel(ProbabilityModel):
         probs = np.array(probs)
         symbols = np.array(symbols)
         # sort in descending order of probability
-        sorted_indices = np.argsort(-probs)
+        sorted_indices = np.argsort(-probs)
         self.symbols = symbols[sorted_indices]
         self.probs = probs[sorted_indices]
     def get_prob(self, prior_symbols: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
         cdfs = np.cumsum(self.probs)
         # Ensure cdfs sum to 1
         cdfs /= cdfs[-1]
         return (np.array(self.symbols), cdfs)
 # Simple adaptive model - places higher probability of symbol that appears more
 class AdaptiveModel(ProbabilityModel):
     def __init__(self, N, symbols):
         super().__init__(N)
         self.symbols = symbols
     def get_prob(self, prior_symbols: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
         probs = np.zeros(self.N)
         for i in range(len(probs)):
             probs[i] = np.sum(prior_symbols == self.symbols[i]) + 0.10
         probs /= probs.sum()
-        combined_sort = sorted(zip(self.symbols, probs), key=lambda x: x[1], reverse=True)
+        combined_sort = sorted(
+            zip(self.symbols, probs), key=lambda x: x[1], reverse=True
+        )
         tokens = [x[0] for x in combined_sort]
         sorted_probs = [x[1] for x in combined_sort]
         cdfs = np.zeros(self.N)
         cumalative = 0
         for i in range(len(sorted_probs)):
             cumalative += sorted_probs[i]
             cdfs[i] = cumalative
         cdfs /= cdfs[-1]
-        return (np.array(tokens), cdfs)
+        return (np.array(tokens), cdfs)

{llm_compression-0.1.0 → llm_compression-0.1.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "llm-compression"
-version = "0.1.0"
+version = "0.1.2"
 description = "Arithmetic compression using Llama LLM models"
 authors = ["OdoctorG <grenoscar@gmail.com>"]
 license = "GPL3"

{llm_compression-0.1.0 → llm_compression-0.1.2}/LICENSE RENAMED Viewed

File without changes

{llm_compression-0.1.0 → llm_compression-0.1.2}/README.md RENAMED Viewed

File without changes

llm-compression 0.1.0__tar.gz → 0.1.2__tar.gz

llm-compression 0.1.0tar.gz → 0.1.2tar.gz