llm-compression 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llm_compression-0.1.1 → llm_compression-0.1.2}/PKG-INFO +1 -1
- {llm_compression-0.1.1 → llm_compression-0.1.2}/llm_compression/arithmetic_coding.py +1 -1
- {llm_compression-0.1.1 → llm_compression-0.1.2}/llm_compression/llama_model.py +23 -7
- {llm_compression-0.1.1 → llm_compression-0.1.2}/pyproject.toml +1 -1
- {llm_compression-0.1.1 → llm_compression-0.1.2}/LICENSE +0 -0
- {llm_compression-0.1.1 → llm_compression-0.1.2}/README.md +0 -0
- {llm_compression-0.1.1 → llm_compression-0.1.2}/llm_compression/__init__.py +0 -0
- {llm_compression-0.1.1 → llm_compression-0.1.2}/llm_compression/probability_model.py +0 -0
@@ -2,7 +2,7 @@ from llama_cpp import Llama
|
|
2
2
|
import numpy as np
|
3
3
|
import time
|
4
4
|
|
5
|
-
from probability_model import ProbabilityModel
|
5
|
+
from .probability_model import ProbabilityModel
|
6
6
|
|
7
7
|
|
8
8
|
class LlamaModel(ProbabilityModel):
|
@@ -20,8 +20,8 @@ class LlamaModel(ProbabilityModel):
|
|
20
20
|
model_path : str
|
21
21
|
File path to the LLaMA model .gguf file.
|
22
22
|
top_p : float, optional
|
23
|
-
The top [0, 1] percentage of the most likely tokens to consider when computing the probability distribution.
|
24
|
-
Higher values will generally result in better compression for sequences that the LLM can easily predict.
|
23
|
+
The top [0, 1] percentage of the most likely tokens to consider when computing the probability distribution.
|
24
|
+
Higher values will generally result in better compression for sequences that the LLM can easily predict.
|
25
25
|
max_context : int, optional
|
26
26
|
The maximum number of tokens to keep in the model's context. Higher values will generally lead to better compression but slower performance.
|
27
27
|
|
@@ -58,14 +58,14 @@ class LlamaModel(ProbabilityModel):
|
|
58
58
|
Parameters
|
59
59
|
----------
|
60
60
|
prior_symbols : np.ndarray[int]
|
61
|
-
The sequence of prior tokens.
|
61
|
+
The sequence of prior tokens.
|
62
62
|
|
63
63
|
Returns
|
64
64
|
-------
|
65
65
|
(tokens, cdfs)
|
66
66
|
tokens : np.ndarray[int]
|
67
67
|
The symbols in descending order of probability.
|
68
|
-
cdfs : np.ndarray[float]
|
68
|
+
cdfs : np.ndarray[float]
|
69
69
|
The cumulative probabilities of the tokens in the same order.
|
70
70
|
"""
|
71
71
|
print(f"Prior symbols: {len(prior_symbols)}")
|
@@ -148,7 +148,7 @@ class LlamaModel(ProbabilityModel):
|
|
148
148
|
return (tokens, cdfs)
|
149
149
|
|
150
150
|
def reset(self) -> None:
|
151
|
-
"""Clear cache and reset LLM. Needed when starting a new compression/decrompression"""
|
151
|
+
""" Clear cache and reset LLM. Needed when starting a new compression/decrompression """
|
152
152
|
self.cache = []
|
153
153
|
self.llm.reset()
|
154
154
|
|
@@ -172,4 +172,20 @@ class LlamaModel(ProbabilityModel):
|
|
172
172
|
return self.llm.tokenize(text, add_bos=False)
|
173
173
|
|
174
174
|
def detokenize(self, tokens: list[int]) -> bytes:
|
175
|
-
|
175
|
+
"""
|
176
|
+
Convert a sequence of token IDs back into a string of bytes.
|
177
|
+
|
178
|
+
This function is a wrapper around Llama's `detokenize` method.
|
179
|
+
|
180
|
+
Parameters
|
181
|
+
----------
|
182
|
+
tokens : list[int]
|
183
|
+
A list of token IDs to be converted back into bytes.
|
184
|
+
|
185
|
+
Returns
|
186
|
+
-------
|
187
|
+
bytes
|
188
|
+
The original string of bytes.
|
189
|
+
"""
|
190
|
+
|
191
|
+
return self.llm.detokenize(tokens)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|