divergent-beamsearch 0.1.7__tar.gz → 0.1.8__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/PKG-INFO +1 -1
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/pyproject.toml +1 -1
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/src/divergent_beamsearch/algorithm.py +13 -2
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/tests/test_beamsearch.py +7 -3
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/.gitignore +0 -0
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/.python-version +0 -0
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/LICENCE +0 -0
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/README.md +0 -0
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/src/divergent_beamsearch/__init__.py +0 -0
- {divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/uv.lock +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: divergent-beamsearch
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.8
|
4
4
|
Summary: A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject.
|
5
5
|
License-File: LICENCE
|
6
6
|
Requires-Python: >=3.11
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "divergent-beamsearch"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.8"
|
4
4
|
description = "A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject."
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">=3.11"
|
{divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/src/divergent_beamsearch/algorithm.py
RENAMED
@@ -82,6 +82,14 @@ def index_reduce_lists(x : torch.Tensor, indices : list[list[int]], reduce_func=
|
|
82
82
|
values.append(reduce_func(x[i, index], dim=-1))
|
83
83
|
return torch.tensor(values, dtype=x.dtype, device=x.device, requires_grad=x.requires_grad)
|
84
84
|
|
85
|
+
def pad_to_same_size(tensors : list[torch.Tensor], padding_value : int) -> torch.Tensor:
|
86
|
+
max_size = max(x.shape[-1] for x in tensors)
|
87
|
+
padded_tensors = []
|
88
|
+
for tensor in tensors:
|
89
|
+
pad = torch.full((tensor.shape[0], max_size - tensor.shape[1]), padding_value, dtype=torch.long)
|
90
|
+
padded_tensors.append(torch.cat([tensor, pad], dim=-1))
|
91
|
+
return torch.cat(padded_tensors, dim=0)
|
92
|
+
|
85
93
|
@torch.no_grad()
|
86
94
|
def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int, max_length : int, parser : Parser, pad_token_id : int, batch_size=32, num_solutions = None, end_symb=DEFAULT_END_SYMB) -> tuple[torch.Tensor, torch.Tensor]:
|
87
95
|
assert input_ids.shape[0] == 1, "Batch size must be 1"
|
@@ -130,8 +138,11 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
|
|
130
138
|
scores_finished_current = scores_finished_current + log1mexp(logprob_other_ans)
|
131
139
|
scores_finished = torch.cat([scores_finished, scores_finished_current])
|
132
140
|
if len(solutions_finished_current):
|
133
|
-
|
134
|
-
|
141
|
+
if len(solutions_finished):
|
142
|
+
solutions_finished = pad_to_same_size([solutions_finished, solutions_finished_current],
|
143
|
+
padding_value=pad_token_id)
|
144
|
+
else:
|
145
|
+
solutions_finished = solutions_finished_current
|
135
146
|
if solutions_finished.numel():
|
136
147
|
# Keep num_solutions best solutions in finished
|
137
148
|
order = scores_finished.argsort(descending=True)
|
@@ -44,11 +44,11 @@ def test_divergent_beamsearch(model_and_tokenizer, device, end_symb):
|
|
44
44
|
model.to(device)
|
45
45
|
prompt = "The capital of France is"
|
46
46
|
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
|
47
|
-
beam_size =
|
47
|
+
beam_size = 10
|
48
48
|
max_length = 10
|
49
49
|
pad_token_id = tokenizer.eos_token_id
|
50
50
|
|
51
|
-
possible_answers = [' Paris', ' Madrid', ' Paris Hilton']
|
51
|
+
possible_answers = [' Paris', ' Madrid', ' Paris Hilton', ' Bri bra brouuu Mario Brooos']
|
52
52
|
tokenized_answers = tokenizer(possible_answers).input_ids
|
53
53
|
|
54
54
|
if end_symb == 'tokenizer':
|
@@ -62,6 +62,9 @@ def test_divergent_beamsearch(model_and_tokenizer, device, end_symb):
|
|
62
62
|
logprob_paris_hilton = logprob_paris + logprob_hilton
|
63
63
|
logprob_madrid = model(input_ids).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[1][0]]
|
64
64
|
logprob_paris_diverge = logprob_paris + log1mexp(logprob_hilton)
|
65
|
+
input_garbage = torch.tensor(input_ids.tolist()[0] + tokenized_answers[-1]).unsqueeze(0).to(device)
|
66
|
+
logsoftmax_garbage = model(input_garbage).logits.log_softmax(-1)
|
67
|
+
logprob_garbage = torch.gather(logsoftmax_garbage[:, 4:-1, :], 2, input_garbage[:, 5:, None]).squeeze(-1).sum(-1)
|
65
68
|
|
66
69
|
scores, solutions = divergent_beamsearch(
|
67
70
|
input_ids=input_ids,
|
@@ -70,7 +73,7 @@ def test_divergent_beamsearch(model_and_tokenizer, device, end_symb):
|
|
70
73
|
max_length=max_length,
|
71
74
|
parser=multi_choices_parser,
|
72
75
|
pad_token_id=pad_token_id,
|
73
|
-
num_solutions=
|
76
|
+
num_solutions=beam_size,
|
74
77
|
end_symb=end_symb
|
75
78
|
)
|
76
79
|
true_solutions = torch.nn.utils.rnn.pad_sequence([torch.tensor(ans) for ans in tokenized_answers], batch_first=True, padding_value=pad_token_id)
|
@@ -78,6 +81,7 @@ def test_divergent_beamsearch(model_and_tokenizer, device, end_symb):
|
|
78
81
|
assert torch.isclose(scores[0], logprob_paris_diverge), "Beam search did not return the expected score"
|
79
82
|
assert torch.isclose(scores[1], logprob_madrid), "Beam search did not return the expected score"
|
80
83
|
assert torch.isclose(scores[2], logprob_paris_hilton), "Beam search did not return the expected score"
|
84
|
+
assert torch.isclose(scores[3], logprob_garbage), "Beam search did not return the expected score"
|
81
85
|
|
82
86
|
|
83
87
|
@pytest.mark.parametrize("device", ['cpu', 'cuda'])
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{divergent_beamsearch-0.1.7 → divergent_beamsearch-0.1.8}/src/divergent_beamsearch/__init__.py
RENAMED
File without changes
|
File without changes
|