divergent-beamsearch 0.1.8__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/PKG-INFO +1 -1
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/pyproject.toml +1 -1
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/src/divergent_beamsearch/algorithm.py +15 -7
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/tests/test_beamsearch.py +31 -1
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/.gitignore +0 -0
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/.python-version +0 -0
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/LICENCE +0 -0
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/README.md +0 -0
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/src/divergent_beamsearch/__init__.py +0 -0
- {divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: divergent-beamsearch
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject.
|
5
5
|
License-File: LICENCE
|
6
6
|
Requires-Python: >=3.11
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "divergent-beamsearch"
|
3
|
-
version = "0.
|
3
|
+
version = "0.2.0"
|
4
4
|
description = "A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject."
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">=3.11"
|
{divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/src/divergent_beamsearch/algorithm.py
RENAMED
@@ -35,12 +35,17 @@ def apply_mask_tokens(pred : torch.Tensor, parsers_tokens):
|
|
35
35
|
return pred[~pred.isinf().all(dim=-1)]
|
36
36
|
|
37
37
|
|
38
|
-
def batched_inference_logits(model : GPT2LMHeadModel, input_ids : torch.Tensor,
|
38
|
+
def batched_inference_logits(model : GPT2LMHeadModel, input_ids : torch.Tensor,
|
39
|
+
attention_mask : torch.Tensor | None = None, batch_size : int = 32,
|
40
|
+
to_cpu=False) -> torch.Tensor:
|
39
41
|
logits = []
|
40
42
|
if attention_mask is None:
|
41
43
|
attention_mask = torch.ones_like(input_ids)
|
42
44
|
for i in range(0, input_ids.shape[0], batch_size):
|
43
|
-
|
45
|
+
l = model(input_ids[i:i+batch_size], attention_mask=attention_mask[i:i+batch_size]).logits
|
46
|
+
if to_cpu:
|
47
|
+
l = l.cpu()
|
48
|
+
logits.append(l)
|
44
49
|
return torch.cat(logits, dim=0)
|
45
50
|
|
46
51
|
def select_mask(source : list, mask : list[bool]) -> list:
|
@@ -91,7 +96,9 @@ def pad_to_same_size(tensors : list[torch.Tensor], padding_value : int) -> torch
|
|
91
96
|
return torch.cat(padded_tensors, dim=0)
|
92
97
|
|
93
98
|
@torch.no_grad()
|
94
|
-
def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int,
|
99
|
+
def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int,
|
100
|
+
max_length : int, parser : Parser, pad_token_id : int, batch_size=32,
|
101
|
+
num_solutions = None, end_symb=DEFAULT_END_SYMB, optimize_gpu_mem=True) -> tuple[torch.Tensor, torch.Tensor]:
|
95
102
|
assert input_ids.shape[0] == 1, "Batch size must be 1"
|
96
103
|
device = input_ids.device
|
97
104
|
input_ids = input_ids.cpu()
|
@@ -114,7 +121,7 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
|
|
114
121
|
for _ in range(max_length):
|
115
122
|
if len(input_ids_unfinished) == 0:
|
116
123
|
break
|
117
|
-
pred = batched_inference_logits(model, input_ids_unfinished.to(device), batch_size=batch_size)[:, -1].cpu()
|
124
|
+
pred = batched_inference_logits(model, input_ids_unfinished.to(device), batch_size=batch_size, to_cpu=optimize_gpu_mem)[:, -1].cpu()
|
118
125
|
parsers_tokens, can_end = get_parsers_tokens(parsers_unfinished, end_symb)
|
119
126
|
logprobs = torch.log_softmax(pred, dim=-1)
|
120
127
|
logprobs_filtered = apply_mask_tokens(logprobs, parsers_tokens)
|
@@ -175,19 +182,20 @@ def set_slice_row(x : torch.Tensor, slices : torch.IntTensor, value) -> torch.Te
|
|
175
182
|
@torch.no_grad()
|
176
183
|
def divergent_logprob(input_ids : torch.Tensor, attention_mask : torch.Tensor | None, model : GPT2LMHeadModel,
|
177
184
|
parsers : Parser | list[Parser] | None, batch_size=32,
|
178
|
-
start : int | torch.IntTensor = None, end_symb=DEFAULT_END_SYMB) -> torch.FloatTensor:
|
185
|
+
start : int | torch.IntTensor = None, end_symb=DEFAULT_END_SYMB, optimize_gpu_mem=True) -> torch.FloatTensor:
|
179
186
|
if start is None:
|
180
|
-
start =
|
187
|
+
start = 1
|
181
188
|
if isinstance(start, int):
|
182
189
|
start = torch.tensor([start]*input_ids.shape[0])
|
183
190
|
assert start.shape[0] == input_ids.shape[0]
|
191
|
+
assert (start > 0).all()
|
184
192
|
# -1 because next token offset
|
185
193
|
start = start - 1
|
186
194
|
|
187
195
|
if attention_mask is None:
|
188
196
|
attention_mask = torch.ones_like(input_ids)
|
189
197
|
|
190
|
-
logits = batched_inference_logits(model, input_ids, attention_mask, batch_size).cpu()
|
198
|
+
logits = batched_inference_logits(model, input_ids, attention_mask, batch_size, to_cpu=optimize_gpu_mem).cpu()
|
191
199
|
input_ids = input_ids.cpu()
|
192
200
|
attention_mask = attention_mask.cpu()
|
193
201
|
|
@@ -13,6 +13,7 @@ TEST_END_SYMBS = [DEFAULT_END_SYMB, 'tokenizer']
|
|
13
13
|
def model_and_tokenizer():
|
14
14
|
model = GPT2LMHeadModel.from_pretrained("gpt2")
|
15
15
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
16
|
+
tokenizer.pad_token = tokenizer.eos_token
|
16
17
|
return model, tokenizer
|
17
18
|
|
18
19
|
@pytest.fixture
|
@@ -32,6 +33,7 @@ def fakemodel_and_tokenizer():
|
|
32
33
|
# Instantiate a model with the custom configuration
|
33
34
|
model = GPT2LMHeadModel(config)
|
34
35
|
model.eval()
|
36
|
+
tokenizer.pad_token = tokenizer.eos_token
|
35
37
|
|
36
38
|
return model, tokenizer
|
37
39
|
|
@@ -95,7 +97,6 @@ def test_divergent_logprob(fakemodel_and_tokenizer, device, end_symb):
|
|
95
97
|
"The capital of France is Paris",
|
96
98
|
"The top model Paris Hilton"
|
97
99
|
]
|
98
|
-
tokenizer.pad_token = tokenizer.eos_token
|
99
100
|
inp = tokenizer(prompts, return_tensors="pt", padding=True)
|
100
101
|
input_ids = inp.input_ids.to(device)
|
101
102
|
attention_mask = inp.attention_mask.to(device)
|
@@ -200,3 +201,32 @@ def test_vanilla_beamsearch(model_and_tokenizer, device):
|
|
200
201
|
assert np.isclose(
|
201
202
|
scores.cpu().numpy(), np.array([-8.1361, -8.7745, -9.1053]), atol=0.0001
|
202
203
|
).all()
|
204
|
+
|
205
|
+
@pytest.mark.parametrize("device", ['cpu', 'cuda'])
|
206
|
+
@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float32])
|
207
|
+
def test_element_wise_equivalence_divergent_logprob(fakemodel_and_tokenizer, device, dtype):
|
208
|
+
model, tokenizer = fakemodel_and_tokenizer
|
209
|
+
model.to(device)
|
210
|
+
model.to(dtype)
|
211
|
+
|
212
|
+
texts = [
|
213
|
+
'My name is Roger',
|
214
|
+
'The capital of Morocco is Rabat',
|
215
|
+
'Google is owned by Alphabet'
|
216
|
+
]
|
217
|
+
|
218
|
+
multi_choices_parser = MultiChoicesParser([texts])
|
219
|
+
|
220
|
+
inputs = tokenizer(texts, return_tensors='pt', padding=True).to(device)
|
221
|
+
|
222
|
+
logprobs_global = divergent_logprob(inputs.input_ids, inputs.attention_mask, model, multi_choices_parser)
|
223
|
+
|
224
|
+
logprobs_individual = []
|
225
|
+
|
226
|
+
for text in texts:
|
227
|
+
inputs = tokenizer(text, return_tensors='pt', padding=True).to(device)
|
228
|
+
input_ids, attention_mask = inputs.input_ids, inputs.attention_mask
|
229
|
+
logprobs_individual.append(divergent_logprob(input_ids, attention_mask, model, multi_choices_parser))
|
230
|
+
logprobs_individual = torch.tensor(logprobs_global)
|
231
|
+
|
232
|
+
assert (logprobs_individual == logprobs_global).all()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{divergent_beamsearch-0.1.8 → divergent_beamsearch-0.2.0}/src/divergent_beamsearch/__init__.py
RENAMED
File without changes
|
File without changes
|