llm-compression 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-compression
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Arithmetic compression using Llama LLM models
5
5
  License: GPL3
6
6
  Author: OdoctorG
@@ -1,7 +1,7 @@
1
1
  import numpy as np
2
2
  import math
3
3
 
4
- from probability_model import ProbabilityModel
4
+ from .probability_model import ProbabilityModel
5
5
 
6
6
 
7
7
  def encode(input_arr: np.ndarray, model: ProbabilityModel) -> list[int]:
@@ -2,7 +2,7 @@ from llama_cpp import Llama
2
2
  import numpy as np
3
3
  import time
4
4
 
5
- from probability_model import ProbabilityModel
5
+ from .probability_model import ProbabilityModel
6
6
 
7
7
 
8
8
  class LlamaModel(ProbabilityModel):
@@ -20,8 +20,8 @@ class LlamaModel(ProbabilityModel):
20
20
  model_path : str
21
21
  File path to the LLaMA model .gguf file.
22
22
  top_p : float, optional
23
- The top [0, 1] percentage of the most likely tokens to consider when computing the probability distribution.
24
- Higher values will generally result in better compression for sequences that the LLM can easily predict.
23
+ The top [0, 1] percentage of the most likely tokens to consider when computing the probability distribution.
24
+ Higher values will generally result in better compression for sequences that the LLM can easily predict.
25
25
  max_context : int, optional
26
26
  The maximum number of tokens to keep in the model's context. Higher values will generally lead to better compression but slower performance.
27
27
 
@@ -58,14 +58,14 @@ class LlamaModel(ProbabilityModel):
58
58
  Parameters
59
59
  ----------
60
60
  prior_symbols : np.ndarray[int]
61
- The sequence of prior tokens.
61
+ The sequence of prior tokens.
62
62
 
63
63
  Returns
64
64
  -------
65
65
  (tokens, cdfs)
66
66
  tokens : np.ndarray[int]
67
67
  The symbols in descending order of probability.
68
- cdfs : np.ndarray[float]
68
+ cdfs : np.ndarray[float]
69
69
  The cumulative probabilities of the tokens in the same order.
70
70
  """
71
71
  print(f"Prior symbols: {len(prior_symbols)}")
@@ -148,7 +148,7 @@ class LlamaModel(ProbabilityModel):
148
148
  return (tokens, cdfs)
149
149
 
150
150
  def reset(self) -> None:
151
- """Clear cache and reset LLM. Needed when starting a new compression/decrompression"""
151
+ """ Clear cache and reset LLM. Needed when starting a new compression/decrompression """
152
152
  self.cache = []
153
153
  self.llm.reset()
154
154
 
@@ -172,4 +172,20 @@ class LlamaModel(ProbabilityModel):
172
172
  return self.llm.tokenize(text, add_bos=False)
173
173
 
174
174
  def detokenize(self, tokens: list[int]) -> bytes:
175
- return self.llm.detokenize(tokens)
175
+ """
176
+ Convert a sequence of token IDs back into a string of bytes.
177
+
178
+ This function is a wrapper around Llama's `detokenize` method.
179
+
180
+ Parameters
181
+ ----------
182
+ tokens : list[int]
183
+ A list of token IDs to be converted back into bytes.
184
+
185
+ Returns
186
+ -------
187
+ bytes
188
+ The original string of bytes.
189
+ """
190
+
191
+ return self.llm.detokenize(tokens)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "llm-compression"
3
- version = "0.1.1"
3
+ version = "0.1.2"
4
4
  description = "Arithmetic compression using Llama LLM models"
5
5
  authors = ["OdoctorG <grenoscar@gmail.com>"]
6
6
  license = "GPL3"
File without changes