llm-compression 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-compression
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Arithmetic compression using Llama LLM models
5
5
  License: GPL3
6
6
  Author: OdoctorG
@@ -1,3 +1,3 @@
1
1
  from .arithmetic_coding import *
2
2
  from .llama_model import *
3
- from .probability_model import *
3
+ from .probability_model import *
@@ -1,14 +1,26 @@
1
1
  import numpy as np
2
2
  import math
3
3
 
4
- from probability_model import ProbabilityModel
4
+ from .probability_model import ProbabilityModel
5
+
6
+
7
+ def encode(input_arr: np.ndarray, model: ProbabilityModel) -> list[int]:
8
+ """
9
+ Encodes an input array of symbols into a list of bits using arithmetic coding.
10
+
11
+ Args:
12
+ input_arr (np.ndarray): An array of input symbols to encode.
13
+ model (ProbabilityModel): The probability model used to get the symbol probabilities.
14
+
15
+ Returns:
16
+ list[int]: A list of bits representing the encoded input symbols.
17
+ """
5
18
 
6
- def encode(input_arr: np.ndarray, model: ProbabilityModel):
7
19
  BIT_PRECISION = 64
8
20
  MAX = (1 << BIT_PRECISION) - 1
9
21
  HALF = 1 << (BIT_PRECISION - 1)
10
22
  QUARTER = 1 << (BIT_PRECISION - 2)
11
-
23
+
12
24
  low = 0
13
25
  high = MAX
14
26
  output = []
@@ -17,18 +29,18 @@ def encode(input_arr: np.ndarray, model: ProbabilityModel):
17
29
 
18
30
  for symbol in input_arr:
19
31
  symbols, cdfs = model.get_prob(input_arr[:k])
20
-
21
- symbol_idx = np.where(symbols == symbol)[0] #symbols.index(symbol)
32
+
33
+ symbol_idx = np.where(symbols == symbol)[0] # symbols.index(symbol)
22
34
 
23
35
  # Calculate probability bounds using floating-point CDFs
24
- cdf_low = cdfs[symbol_idx-1][0] if symbol_idx > 0 else 0.0
36
+ cdf_low = cdfs[symbol_idx - 1][0] if symbol_idx > 0 else 0.0
25
37
  cdf_high = cdfs[symbol_idx][0]
26
-
38
+
27
39
  # Convert to integer ranges with careful rounding
28
40
  range_size = high - low + 1
29
41
  new_low = low + math.floor(cdf_low * range_size)
30
42
  new_high = low + math.ceil(cdf_high * range_size) - 1
31
-
43
+
32
44
  low, high = new_low, new_high
33
45
 
34
46
  # Interval scaling and bit emission
@@ -64,7 +76,22 @@ def encode(input_arr: np.ndarray, model: ProbabilityModel):
64
76
 
65
77
  return output
66
78
 
67
- def decode(encoded_bits: np.ndarray, model: ProbabilityModel, num_symbols: int):
79
+
80
+ def decode(
81
+ encoded_bits: np.ndarray, model: ProbabilityModel, num_symbols: int
82
+ ) -> np.ndarray:
83
+ """
84
+ Decodes a list of encoded bits into an array of symbols using arithmetic decoding.
85
+
86
+ Args:
87
+ encoded_bits (np.ndarray): Numpy array of bits representing the encoded input symbols.
88
+ model (ProbabilityModel): The probability model used to get the symbol probabilities.
89
+ num_symbols (int): The number of symbols in encoded in the input array.
90
+
91
+ Returns:
92
+ np.ndarray: Array of decoded symbols.
93
+ """
94
+
68
95
  import math
69
96
  import bisect
70
97
 
@@ -135,16 +162,16 @@ def decode(encoded_bits: np.ndarray, model: ProbabilityModel, num_symbols: int):
135
162
 
136
163
  return decoded
137
164
 
165
+
138
166
  # Testing
139
167
  if __name__ == "__main__":
140
168
  from llama_model import LlamaModel
141
169
  from probability_model import StaticModel
142
170
 
143
-
144
- model = StaticModel(3, ['a', 'b', 'c'], [0.4, 0.3, 0.3])
171
+ model = StaticModel(3, ["a", "b", "c"], [0.4, 0.3, 0.3])
145
172
  test_str = "abcabc"
146
173
  print(len(test_str), " symbols")
147
-
174
+
148
175
  test_arr = np.asarray([test_str[i] for i in range(len(test_str))])
149
176
  print(test_arr)
150
177
  encoded_bin = encode(test_arr, model)
@@ -153,18 +180,20 @@ if __name__ == "__main__":
153
180
 
154
181
  decoded = decode(encoded_bin, model, len(test_arr))
155
182
  print(decoded)
156
-
183
+
157
184
  print("LLM TEST")
158
185
 
159
- model = LlamaModel(top_p=0.99, max_context=50)
186
+ model = LlamaModel(
187
+ model_path="../Llama-3.2-1B-Instruct-Q4_K_M.gguf", top_p=0.99, max_context=50
188
+ )
160
189
  wiki_str = "Weissman var på 1920-talet en av Finlands mest kända kuplettsångare och var en mycket aktiv skådespelare med både operetter och lustspel på sin repertoar. Hans skådespelarkarriär inleddes omkring 1913 och varade fram till 1930-talet. Under den tiden var han verksam vid flera teatrar och skådespelarensembler. Som kuplettsångare uppträdde han på biografer, kaféer och restauranger runt om i landet. På 1910- och 1920-talen gjorde han en stor mängd skivinspelningar och var en aktiv sångare under grammofonfebern 1929. När kuplettgenren gick ur mode på slutet av 1920-talet försökte Weissman anpassa sig till schlagermusiken, men övergav inom kort den konstnärliga banan för att ägna sig åt reklamverksamhet och diverse affärer"
161
190
  wiki_str_short = "Weissman var på 1920-talet en av Finlands mest kända kuplettsångare och var en mycket aktiv skådespelare med både operetter och lustspel på sin repertoar."
162
191
  wiki_str_short2 = "The building began as a movie theater in 1973, was converted into the Jet Set nightclub in 1994, and underwent renovations in 2010 and 2015"
163
- prompt = wiki_str_short2.encode('utf-8')
192
+ prompt = wiki_str_short2.encode("utf-8")
164
193
  prompt_tkn = np.asarray(model.tokenize(prompt))
165
194
  print(len(prompt_tkn), " symbols")
166
195
  encoded_bin = encode(prompt_tkn, model)
167
196
  print(len(encoded_bin), " bits in encoding")
168
197
  decoded = decode(encoded_bin, model, len(prompt_tkn))
169
198
  outstr = model.detokenize(decoded)
170
- print(outstr.decode('utf-8'))
199
+ print(outstr.decode("utf-8"))
@@ -2,10 +2,35 @@ from llama_cpp import Llama
2
2
  import numpy as np
3
3
  import time
4
4
 
5
- from probability_model import ProbabilityModel
5
+ from .probability_model import ProbabilityModel
6
+
6
7
 
7
8
  class LlamaModel(ProbabilityModel):
8
- def __init__(self, model_path: str = "Llama-3.2-1B-Instruct-Q4_K_M.gguf", top_p: float = 0.99, max_context: int = 50):
9
+ def __init__(
10
+ self,
11
+ model_path: str,
12
+ top_p: float = 0.99,
13
+ max_context: int = 50,
14
+ ):
15
+ """
16
+ Initialize a LlamaModel.
17
+
18
+ Parameters
19
+ ----------
20
+ model_path : str
21
+ File path to the LLaMA model .gguf file.
22
+ top_p : float, optional
23
+ The top [0, 1] percentage of the most likely tokens to consider when computing the probability distribution.
24
+ Higher values will generally result in better compression for sequences that the LLM can easily predict.
25
+ max_context : int, optional
26
+ The maximum number of tokens to keep in the model's context. Higher values will generally lead to better compression but slower performance.
27
+
28
+ Raises
29
+ ------
30
+ ValueError
31
+ If the provided max_context is too large for the model.
32
+ """
33
+
9
34
  t1 = time.perf_counter()
10
35
  self.llm = Llama(
11
36
  model_path=model_path,
@@ -15,7 +40,9 @@ class LlamaModel(ProbabilityModel):
15
40
  verbose=False,
16
41
  )
17
42
  if self.llm.n_ctx() < max_context:
18
- raise ValueError(f"Provided max_context is too large for the model. Provided max_context is {max_context}, but model max context is {self.llm.n_ctx}")
43
+ raise ValueError(
44
+ f"Provided max_context is too large for the model. Provided max_context is {max_context}, but model max context is {self.llm.n_ctx}"
45
+ )
19
46
  t2 = time.perf_counter()
20
47
  print(f"Model loaded in {t2 - t1} seconds")
21
48
  self.N = self.llm.n_vocab()
@@ -25,9 +52,24 @@ class LlamaModel(ProbabilityModel):
25
52
  super().__init__(self.N)
26
53
 
27
54
  def get_prob(self, prior_symbols: np.ndarray[int]) -> tuple[np.ndarray, np.ndarray]:
28
-
55
+ """
56
+ Get cumalitive probability distribution of the next token given the prior tokens.
57
+
58
+ Parameters
59
+ ----------
60
+ prior_symbols : np.ndarray[int]
61
+ The sequence of prior tokens.
62
+
63
+ Returns
64
+ -------
65
+ (tokens, cdfs)
66
+ tokens : np.ndarray[int]
67
+ The symbols in descending order of probability.
68
+ cdfs : np.ndarray[float]
69
+ The cumulative probabilities of the tokens in the same order.
70
+ """
29
71
  print(f"Prior symbols: {len(prior_symbols)}")
30
-
72
+
31
73
  # If no prior tokens, return uniform distribution and clear cache
32
74
  if len(prior_symbols) == 0:
33
75
  self.reset()
@@ -40,16 +82,16 @@ class LlamaModel(ProbabilityModel):
40
82
  tokens[token_id] = token_id
41
83
  cdfs[token_id] = cumulative
42
84
  return tokens, cdfs
43
-
85
+
44
86
  # If there are more symbols cached than context, clear oldest half of cache
45
87
  if len(self.cache) >= self.max_context:
46
- #self.reset()
47
- self.cache = self.cache[self.max_context // 2:]
88
+ # self.reset()
89
+ self.cache = self.cache[self.max_context // 2 :]
48
90
  self.llm.reset()
49
91
  # evaluate what is left of cache
50
92
  self.llm.eval(self.cache)
51
93
  print("Cache cleared")
52
-
94
+
53
95
  # Evaluate latest token
54
96
  t1 = time.perf_counter()
55
97
  self.llm.eval([prior_symbols[-1]])
@@ -62,12 +104,12 @@ class LlamaModel(ProbabilityModel):
62
104
  probs /= probs.sum()
63
105
 
64
106
  t4 = time.perf_counter()
65
-
107
+
66
108
  # Get cdf distribution of 90% most likely tokens
67
109
  ts1 = time.perf_counter()
68
110
  topk = np.argsort(-probs)
69
111
  ts2 = time.perf_counter()
70
-
112
+
71
113
  tokens = np.zeros(self.N, dtype=np.int64)
72
114
  cdfs = np.zeros(self.N, dtype=np.float64)
73
115
 
@@ -76,9 +118,9 @@ class LlamaModel(ProbabilityModel):
76
118
  # Compute cumulative probabilities
77
119
  cum_probs = np.cumsum(probs_sorted)
78
120
  # Find cutoff index of top_p probability
79
- cutoff_index = np.searchsorted(cum_probs, self.top_p, side='right')
121
+ cutoff_index = np.searchsorted(cum_probs, self.top_p, side="right")
80
122
  # Get slice of topk
81
- topk_slice = topk[:cutoff_index+1]
123
+ topk_slice = topk[: cutoff_index + 1]
82
124
  n_topk = cutoff_index + 1
83
125
 
84
126
  tokens[:n_topk] = topk_slice
@@ -92,39 +134,58 @@ class LlamaModel(ProbabilityModel):
92
134
  n_remaining = len(remaining_tokens)
93
135
  if n_remaining > 0:
94
136
  uniform_prob = (1.0 - cum_probs[cutoff_index]) / n_remaining
95
- tokens[n_topk:n_topk + n_remaining] = remaining_tokens
96
- cdfs[n_topk:n_topk + n_remaining] = (
137
+ tokens[n_topk : n_topk + n_remaining] = remaining_tokens
138
+ cdfs[n_topk : n_topk + n_remaining] = (
97
139
  uniform_prob * np.arange(1, n_remaining + 1) + cum_probs[cutoff_index]
98
140
  )
99
-
141
+
100
142
  t5 = time.perf_counter()
101
-
143
+
102
144
  # Cache new token
103
145
  self.cache.append(prior_symbols[-1])
104
146
 
105
147
  # returns sorted tokens and cdfs
106
148
  return (tokens, cdfs)
107
-
108
- def reset(self):
149
+
150
+ def reset(self) -> None:
151
+ """ Clear cache and reset LLM. Needed when starting a new compression/decrompression """
109
152
  self.cache = []
110
153
  self.llm.reset()
111
- print("Cache cleared")
112
154
 
113
155
  def tokenize(self, text: bytes) -> list[int]:
156
+ """
157
+ Tokenize a string of bytes into a sequence of token IDs.
158
+
159
+ This function is a wrapper around Llama's `tokenize` method without adding the BOS token.
160
+
161
+ Parameters
162
+ ----------
163
+ text : bytes
164
+ The string of bytes to tokenize.
165
+
166
+ Returns
167
+ -------
168
+ list[int]
169
+ A list of token IDs.
170
+ """
171
+
114
172
  return self.llm.tokenize(text, add_bos=False)
115
173
 
116
174
  def detokenize(self, tokens: list[int]) -> bytes:
117
- return self.llm.detokenize(tokens)
175
+ """
176
+ Convert a sequence of token IDs back into a string of bytes.
177
+
178
+ This function is a wrapper around Llama's `detokenize` method.
118
179
 
180
+ Parameters
181
+ ----------
182
+ tokens : list[int]
183
+ A list of token IDs to be converted back into bytes.
119
184
 
120
- # Testing
121
- if __name__ == "__main__":
122
- prompt = "The capital of France is".encode('utf-8')
185
+ Returns
186
+ -------
187
+ bytes
188
+ The original string of bytes.
189
+ """
123
190
 
124
- model = LlamaModel()
125
- prompt_tkn = model.tokenize(prompt)
126
- tokens, cdfs = model.get_prob(prompt_tkn)
127
- #print(tokens[0])
128
- for i in range(10):
129
- print(tokens[i])
130
- print(model.detokenize([tokens[i]]), cdfs[i])
191
+ return self.llm.detokenize(tokens)
@@ -1,14 +1,15 @@
1
1
  import numpy as np
2
2
 
3
+
3
4
  class ProbabilityModel:
4
5
  def __init__(self, N):
5
6
  self.N = N
6
-
7
+
7
8
  def get_prob(self, prior_symbols: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
8
9
  """
9
10
  Args:
10
11
  prior_symbols: numpy array of previous symbols
11
-
12
+
12
13
  Returns:
13
14
  tokens: numpy array of symbols in descending order of probability
14
15
  cdfs: numpy array of the cumulative probabilities of the tokens in the same order
@@ -24,39 +25,42 @@ class StaticModel(ProbabilityModel):
24
25
  probs = np.array(probs)
25
26
  symbols = np.array(symbols)
26
27
  # sort in descending order of probability
27
- sorted_indices = np.argsort(-probs)
28
+ sorted_indices = np.argsort(-probs)
28
29
  self.symbols = symbols[sorted_indices]
29
30
  self.probs = probs[sorted_indices]
30
-
31
+
31
32
  def get_prob(self, prior_symbols: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
32
33
  cdfs = np.cumsum(self.probs)
33
34
  # Ensure cdfs sum to 1
34
35
  cdfs /= cdfs[-1]
35
36
  return (np.array(self.symbols), cdfs)
36
37
 
38
+
37
39
  # Simple adaptive model - places higher probability of symbol that appears more
38
40
  class AdaptiveModel(ProbabilityModel):
39
41
  def __init__(self, N, symbols):
40
42
  super().__init__(N)
41
43
  self.symbols = symbols
42
-
44
+
43
45
  def get_prob(self, prior_symbols: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
44
46
  probs = np.zeros(self.N)
45
-
47
+
46
48
  for i in range(len(probs)):
47
49
  probs[i] = np.sum(prior_symbols == self.symbols[i]) + 0.10
48
50
  probs /= probs.sum()
49
-
50
- combined_sort = sorted(zip(self.symbols, probs), key=lambda x: x[1], reverse=True)
51
+
52
+ combined_sort = sorted(
53
+ zip(self.symbols, probs), key=lambda x: x[1], reverse=True
54
+ )
51
55
  tokens = [x[0] for x in combined_sort]
52
56
  sorted_probs = [x[1] for x in combined_sort]
53
-
57
+
54
58
  cdfs = np.zeros(self.N)
55
59
  cumalative = 0
56
60
  for i in range(len(sorted_probs)):
57
61
  cumalative += sorted_probs[i]
58
62
  cdfs[i] = cumalative
59
-
63
+
60
64
  cdfs /= cdfs[-1]
61
65
 
62
- return (np.array(tokens), cdfs)
66
+ return (np.array(tokens), cdfs)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "llm-compression"
3
- version = "0.1.0"
3
+ version = "0.1.2"
4
4
  description = "Arithmetic compression using Llama LLM models"
5
5
  authors = ["OdoctorG <grenoscar@gmail.com>"]
6
6
  license = "GPL3"
File without changes