divergent-beamsearch 0.1.6__tar.gz → 0.1.8__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/PKG-INFO +1 -1
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/pyproject.toml +1 -1
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/src/divergent_beamsearch/algorithm.py +21 -3
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/tests/test_beamsearch.py +19 -10
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/.gitignore +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/.python-version +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/LICENCE +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/README.md +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/src/divergent_beamsearch/__init__.py +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/uv.lock +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: divergent-beamsearch
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.8
|
4
4
|
Summary: A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject.
|
5
5
|
License-File: LICENCE
|
6
6
|
Requires-Python: >=3.11
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "divergent-beamsearch"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.8"
|
4
4
|
description = "A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject."
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">=3.11"
|
{divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/src/divergent_beamsearch/algorithm.py
RENAMED
@@ -76,6 +76,20 @@ class AcceptEverythingParser(Parser):
|
|
76
76
|
def copy(self):
|
77
77
|
return self
|
78
78
|
|
79
|
+
def index_reduce_lists(x : torch.Tensor, indices : list[list[int]], reduce_func=torch.sum) -> torch.Tensor:
|
80
|
+
values = []
|
81
|
+
for i, index in enumerate(indices):
|
82
|
+
values.append(reduce_func(x[i, index], dim=-1))
|
83
|
+
return torch.tensor(values, dtype=x.dtype, device=x.device, requires_grad=x.requires_grad)
|
84
|
+
|
85
|
+
def pad_to_same_size(tensors : list[torch.Tensor], padding_value : int) -> torch.Tensor:
|
86
|
+
max_size = max(x.shape[-1] for x in tensors)
|
87
|
+
padded_tensors = []
|
88
|
+
for tensor in tensors:
|
89
|
+
pad = torch.full((tensor.shape[0], max_size - tensor.shape[1]), padding_value, dtype=torch.long)
|
90
|
+
padded_tensors.append(torch.cat([tensor, pad], dim=-1))
|
91
|
+
return torch.cat(padded_tensors, dim=0)
|
92
|
+
|
79
93
|
@torch.no_grad()
|
80
94
|
def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int, max_length : int, parser : Parser, pad_token_id : int, batch_size=32, num_solutions = None, end_symb=DEFAULT_END_SYMB) -> tuple[torch.Tensor, torch.Tensor]:
|
81
95
|
assert input_ids.shape[0] == 1, "Batch size must be 1"
|
@@ -120,11 +134,15 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
|
|
120
134
|
|
121
135
|
scores_finished_current = scores_unfinished[can_end]
|
122
136
|
solutions_finished_current = solutions_unfinished[can_end]
|
123
|
-
|
137
|
+
logprob_other_ans = index_reduce_lists(logprobs[can_end], select_mask(parsers_tokens, can_end), reduce_func=torch.logsumexp).squeeze(-1)
|
138
|
+
scores_finished_current = scores_finished_current + log1mexp(logprob_other_ans)
|
124
139
|
scores_finished = torch.cat([scores_finished, scores_finished_current])
|
125
140
|
if len(solutions_finished_current):
|
126
|
-
|
127
|
-
|
141
|
+
if len(solutions_finished):
|
142
|
+
solutions_finished = pad_to_same_size([solutions_finished, solutions_finished_current],
|
143
|
+
padding_value=pad_token_id)
|
144
|
+
else:
|
145
|
+
solutions_finished = solutions_finished_current
|
128
146
|
if solutions_finished.numel():
|
129
147
|
# Keep num_solutions best solutions in finished
|
130
148
|
order = scores_finished.argsort(descending=True)
|
@@ -37,18 +37,18 @@ def fakemodel_and_tokenizer():
|
|
37
37
|
|
38
38
|
@pytest.mark.parametrize("device", ['cpu', 'cuda'])
|
39
39
|
@pytest.mark.parametrize("end_symb", TEST_END_SYMBS)
|
40
|
-
def test_divergent_beamsearch(
|
40
|
+
def test_divergent_beamsearch(model_and_tokenizer, device, end_symb):
|
41
41
|
if device == 'cuda' and not torch.cuda.is_available():
|
42
42
|
pytest.skip("CUDA is not available on this machine.")
|
43
|
-
model, tokenizer =
|
43
|
+
model, tokenizer = model_and_tokenizer
|
44
44
|
model.to(device)
|
45
45
|
prompt = "The capital of France is"
|
46
46
|
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
|
47
|
-
beam_size =
|
47
|
+
beam_size = 10
|
48
48
|
max_length = 10
|
49
49
|
pad_token_id = tokenizer.eos_token_id
|
50
50
|
|
51
|
-
possible_answers = [' Paris', ' Paris Hilton']
|
51
|
+
possible_answers = [' Paris', ' Madrid', ' Paris Hilton', ' Bri bra brouuu Mario Brooos']
|
52
52
|
tokenized_answers = tokenizer(possible_answers).input_ids
|
53
53
|
|
54
54
|
if end_symb == 'tokenizer':
|
@@ -56,9 +56,15 @@ def test_divergent_beamsearch(fakemodel_and_tokenizer, device, end_symb):
|
|
56
56
|
|
57
57
|
multi_choices_parser = MultiChoicesParser([tokenized_answers], end_symb=end_symb)
|
58
58
|
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
with torch.no_grad():
|
60
|
+
logprob_paris = model(input_ids).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[0][0]]
|
61
|
+
logprob_hilton = model(torch.cat([input_ids, torch.tensor(tokenized_answers[2][0], device=device).view(1,1)], dim=-1)).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[2][1]]
|
62
|
+
logprob_paris_hilton = logprob_paris + logprob_hilton
|
63
|
+
logprob_madrid = model(input_ids).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[1][0]]
|
64
|
+
logprob_paris_diverge = logprob_paris + log1mexp(logprob_hilton)
|
65
|
+
input_garbage = torch.tensor(input_ids.tolist()[0] + tokenized_answers[-1]).unsqueeze(0).to(device)
|
66
|
+
logsoftmax_garbage = model(input_garbage).logits.log_softmax(-1)
|
67
|
+
logprob_garbage = torch.gather(logsoftmax_garbage[:, 4:-1, :], 2, input_garbage[:, 5:, None]).squeeze(-1).sum(-1)
|
62
68
|
|
63
69
|
scores, solutions = divergent_beamsearch(
|
64
70
|
input_ids=input_ids,
|
@@ -67,13 +73,16 @@ def test_divergent_beamsearch(fakemodel_and_tokenizer, device, end_symb):
|
|
67
73
|
max_length=max_length,
|
68
74
|
parser=multi_choices_parser,
|
69
75
|
pad_token_id=pad_token_id,
|
70
|
-
num_solutions=
|
76
|
+
num_solutions=beam_size,
|
71
77
|
end_symb=end_symb
|
72
78
|
)
|
73
79
|
true_solutions = torch.nn.utils.rnn.pad_sequence([torch.tensor(ans) for ans in tokenized_answers], batch_first=True, padding_value=pad_token_id)
|
74
80
|
assert (solutions == true_solutions).all(), "Beam search did not return the expected solutions"
|
75
|
-
assert scores[0]
|
76
|
-
assert scores[1]
|
81
|
+
assert torch.isclose(scores[0], logprob_paris_diverge), "Beam search did not return the expected score"
|
82
|
+
assert torch.isclose(scores[1], logprob_madrid), "Beam search did not return the expected score"
|
83
|
+
assert torch.isclose(scores[2], logprob_paris_hilton), "Beam search did not return the expected score"
|
84
|
+
assert torch.isclose(scores[3], logprob_garbage), "Beam search did not return the expected score"
|
85
|
+
|
77
86
|
|
78
87
|
@pytest.mark.parametrize("device", ['cpu', 'cuda'])
|
79
88
|
@pytest.mark.parametrize("end_symb", TEST_END_SYMBS)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.8}/src/divergent_beamsearch/__init__.py
RENAMED
File without changes
|
File without changes
|