divergent-beamsearch 0.1.6__tar.gz → 0.1.7__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/PKG-INFO +1 -1
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/pyproject.toml +1 -1
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/src/divergent_beamsearch/algorithm.py +8 -1
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/tests/test_beamsearch.py +13 -8
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/.gitignore +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/.python-version +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/LICENCE +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/README.md +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/src/divergent_beamsearch/__init__.py +0 -0
- {divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/uv.lock +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: divergent-beamsearch
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.7
|
4
4
|
Summary: A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject.
|
5
5
|
License-File: LICENCE
|
6
6
|
Requires-Python: >=3.11
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "divergent-beamsearch"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.7"
|
4
4
|
description = "A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject."
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">=3.11"
|
{divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/src/divergent_beamsearch/algorithm.py
RENAMED
@@ -76,6 +76,12 @@ class AcceptEverythingParser(Parser):
|
|
76
76
|
def copy(self):
|
77
77
|
return self
|
78
78
|
|
79
|
+
def index_reduce_lists(x : torch.Tensor, indices : list[list[int]], reduce_func=torch.sum) -> torch.Tensor:
|
80
|
+
values = []
|
81
|
+
for i, index in enumerate(indices):
|
82
|
+
values.append(reduce_func(x[i, index], dim=-1))
|
83
|
+
return torch.tensor(values, dtype=x.dtype, device=x.device, requires_grad=x.requires_grad)
|
84
|
+
|
79
85
|
@torch.no_grad()
|
80
86
|
def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int, max_length : int, parser : Parser, pad_token_id : int, batch_size=32, num_solutions = None, end_symb=DEFAULT_END_SYMB) -> tuple[torch.Tensor, torch.Tensor]:
|
81
87
|
assert input_ids.shape[0] == 1, "Batch size must be 1"
|
@@ -120,7 +126,8 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
|
|
120
126
|
|
121
127
|
scores_finished_current = scores_unfinished[can_end]
|
122
128
|
solutions_finished_current = solutions_unfinished[can_end]
|
123
|
-
|
129
|
+
logprob_other_ans = index_reduce_lists(logprobs[can_end], select_mask(parsers_tokens, can_end), reduce_func=torch.logsumexp).squeeze(-1)
|
130
|
+
scores_finished_current = scores_finished_current + log1mexp(logprob_other_ans)
|
124
131
|
scores_finished = torch.cat([scores_finished, scores_finished_current])
|
125
132
|
if len(solutions_finished_current):
|
126
133
|
pad = torch.full((len(scores_finished_current), solutions_finished_current.shape[1] - solutions_finished.shape[1]), pad_token_id, dtype=torch.long)
|
@@ -37,10 +37,10 @@ def fakemodel_and_tokenizer():
|
|
37
37
|
|
38
38
|
@pytest.mark.parametrize("device", ['cpu', 'cuda'])
|
39
39
|
@pytest.mark.parametrize("end_symb", TEST_END_SYMBS)
|
40
|
-
def test_divergent_beamsearch(
|
40
|
+
def test_divergent_beamsearch(model_and_tokenizer, device, end_symb):
|
41
41
|
if device == 'cuda' and not torch.cuda.is_available():
|
42
42
|
pytest.skip("CUDA is not available on this machine.")
|
43
|
-
model, tokenizer =
|
43
|
+
model, tokenizer = model_and_tokenizer
|
44
44
|
model.to(device)
|
45
45
|
prompt = "The capital of France is"
|
46
46
|
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
|
@@ -48,7 +48,7 @@ def test_divergent_beamsearch(fakemodel_and_tokenizer, device, end_symb):
|
|
48
48
|
max_length = 10
|
49
49
|
pad_token_id = tokenizer.eos_token_id
|
50
50
|
|
51
|
-
possible_answers = [' Paris', ' Paris Hilton']
|
51
|
+
possible_answers = [' Paris', ' Madrid', ' Paris Hilton']
|
52
52
|
tokenized_answers = tokenizer(possible_answers).input_ids
|
53
53
|
|
54
54
|
if end_symb == 'tokenizer':
|
@@ -56,9 +56,12 @@ def test_divergent_beamsearch(fakemodel_and_tokenizer, device, end_symb):
|
|
56
56
|
|
57
57
|
multi_choices_parser = MultiChoicesParser([tokenized_answers], end_symb=end_symb)
|
58
58
|
|
59
|
-
|
60
|
-
|
61
|
-
|
59
|
+
with torch.no_grad():
|
60
|
+
logprob_paris = model(input_ids).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[0][0]]
|
61
|
+
logprob_hilton = model(torch.cat([input_ids, torch.tensor(tokenized_answers[2][0], device=device).view(1,1)], dim=-1)).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[2][1]]
|
62
|
+
logprob_paris_hilton = logprob_paris + logprob_hilton
|
63
|
+
logprob_madrid = model(input_ids).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[1][0]]
|
64
|
+
logprob_paris_diverge = logprob_paris + log1mexp(logprob_hilton)
|
62
65
|
|
63
66
|
scores, solutions = divergent_beamsearch(
|
64
67
|
input_ids=input_ids,
|
@@ -72,8 +75,10 @@ def test_divergent_beamsearch(fakemodel_and_tokenizer, device, end_symb):
|
|
72
75
|
)
|
73
76
|
true_solutions = torch.nn.utils.rnn.pad_sequence([torch.tensor(ans) for ans in tokenized_answers], batch_first=True, padding_value=pad_token_id)
|
74
77
|
assert (solutions == true_solutions).all(), "Beam search did not return the expected solutions"
|
75
|
-
assert scores[0]
|
76
|
-
assert scores[1]
|
78
|
+
assert torch.isclose(scores[0], logprob_paris_diverge), "Beam search did not return the expected score"
|
79
|
+
assert torch.isclose(scores[1], logprob_madrid), "Beam search did not return the expected score"
|
80
|
+
assert torch.isclose(scores[2], logprob_paris_hilton), "Beam search did not return the expected score"
|
81
|
+
|
77
82
|
|
78
83
|
@pytest.mark.parametrize("device", ['cpu', 'cuda'])
|
79
84
|
@pytest.mark.parametrize("end_symb", TEST_END_SYMBS)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{divergent_beamsearch-0.1.6 → divergent_beamsearch-0.1.7}/src/divergent_beamsearch/__init__.py
RENAMED
File without changes
|
File without changes
|