divergent-beamsearch 0.1.4__tar.gz → 0.1.6__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/PKG-INFO +2 -2
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/pyproject.toml +2 -2
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/src/divergent_beamsearch/algorithm.py +11 -8
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/tests/test_beamsearch.py +45 -11
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/uv.lock +19 -19
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/.gitignore +0 -0
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/.python-version +0 -0
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/LICENCE +0 -0
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/README.md +0 -0
- {divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/src/divergent_beamsearch/__init__.py +0 -0
@@ -1,10 +1,10 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: divergent-beamsearch
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.6
|
4
4
|
Summary: A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject.
|
5
5
|
License-File: LICENCE
|
6
6
|
Requires-Python: >=3.11
|
7
|
-
Requires-Dist: multi-choices-parser>=0.9.
|
7
|
+
Requires-Dist: multi-choices-parser>=0.9.61
|
8
8
|
Requires-Dist: torch>=2.0.0
|
9
9
|
Requires-Dist: transformers>=4.47.1
|
10
10
|
Description-Content-Type: text/markdown
|
@@ -1,11 +1,11 @@
|
|
1
1
|
[project]
|
2
2
|
name = "divergent-beamsearch"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.6"
|
4
4
|
description = "A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject."
|
5
5
|
readme = "README.md"
|
6
6
|
requires-python = ">=3.11"
|
7
7
|
dependencies = [
|
8
|
-
"multi-choices-parser>=0.9.
|
8
|
+
"multi-choices-parser>=0.9.61",
|
9
9
|
"torch>=2.0.0",
|
10
10
|
"transformers>=4.47.1",
|
11
11
|
]
|
{divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/src/divergent_beamsearch/algorithm.py
RENAMED
@@ -1,7 +1,7 @@
|
|
1
1
|
import math
|
2
2
|
import torch
|
3
3
|
from transformers import GPT2LMHeadModel
|
4
|
-
from multi_choices_parser import
|
4
|
+
from multi_choices_parser import DEFAULT_END_SYMB
|
5
5
|
|
6
6
|
|
7
7
|
class Parser:
|
@@ -14,15 +14,15 @@ class Parser:
|
|
14
14
|
def copy(self):
|
15
15
|
raise NotImplementedError
|
16
16
|
|
17
|
-
def get_parsers_tokens(parsers : list[Parser]) -> tuple[list, list[int]]:
|
17
|
+
def get_parsers_tokens(parsers : list[Parser], end_symb) -> tuple[list, list[int]]:
|
18
18
|
parsers_tokens = []
|
19
19
|
can_end = []
|
20
20
|
for parser in parsers:
|
21
21
|
tokens = list(parser.next())
|
22
|
-
|
23
|
-
can_end.append(True)
|
22
|
+
try:
|
24
23
|
tokens.remove(end_symb)
|
25
|
-
|
24
|
+
can_end.append(True)
|
25
|
+
except ValueError:
|
26
26
|
can_end.append(False)
|
27
27
|
parsers_tokens.append(tokens)
|
28
28
|
return parsers_tokens, can_end
|
@@ -77,7 +77,7 @@ class AcceptEverythingParser(Parser):
|
|
77
77
|
return self
|
78
78
|
|
79
79
|
@torch.no_grad()
|
80
|
-
def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int, max_length : int, parser : Parser, pad_token_id : int, batch_size=32, num_solutions = None) -> tuple[torch.Tensor, torch.Tensor]:
|
80
|
+
def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int, max_length : int, parser : Parser, pad_token_id : int, batch_size=32, num_solutions = None, end_symb=DEFAULT_END_SYMB) -> tuple[torch.Tensor, torch.Tensor]:
|
81
81
|
assert input_ids.shape[0] == 1, "Batch size must be 1"
|
82
82
|
device = input_ids.device
|
83
83
|
input_ids = input_ids.cpu()
|
@@ -101,7 +101,7 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
|
|
101
101
|
if len(input_ids_unfinished) == 0:
|
102
102
|
break
|
103
103
|
pred = batched_inference_logits(model, input_ids_unfinished.to(device), batch_size=batch_size)[:, -1].cpu()
|
104
|
-
parsers_tokens, can_end = get_parsers_tokens(parsers_unfinished)
|
104
|
+
parsers_tokens, can_end = get_parsers_tokens(parsers_unfinished, end_symb)
|
105
105
|
logprobs = torch.log_softmax(pred, dim=-1)
|
106
106
|
logprobs_filtered = apply_mask_tokens(logprobs, parsers_tokens)
|
107
107
|
if len(logprobs_filtered):
|
@@ -140,6 +140,7 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
|
|
140
140
|
parser.step(token)
|
141
141
|
|
142
142
|
# Special case of vanilla beam search where all answers are valid
|
143
|
+
# Warning : In this case model will not stop on end_of_sentence token
|
143
144
|
if vanilla:
|
144
145
|
order = scores_unfinished.argsort(descending=True)
|
145
146
|
scores_finished = scores_unfinished[order][:num_solutions]
|
@@ -154,7 +155,9 @@ def set_slice_row(x : torch.Tensor, slices : torch.IntTensor, value) -> torch.Te
|
|
154
155
|
x[i].index_fill_(0, indices[i], 0)
|
155
156
|
|
156
157
|
@torch.no_grad()
|
157
|
-
def divergent_logprob(input_ids : torch.Tensor, attention_mask : torch.Tensor | None, model : GPT2LMHeadModel,
|
158
|
+
def divergent_logprob(input_ids : torch.Tensor, attention_mask : torch.Tensor | None, model : GPT2LMHeadModel,
|
159
|
+
parsers : Parser | list[Parser] | None, batch_size=32,
|
160
|
+
start : int | torch.IntTensor = None, end_symb=DEFAULT_END_SYMB) -> torch.FloatTensor:
|
158
161
|
if start is None:
|
159
162
|
start = 0
|
160
163
|
if isinstance(start, int):
|
@@ -1,10 +1,13 @@
|
|
1
1
|
import numpy as np
|
2
2
|
import pytest
|
3
3
|
import torch
|
4
|
-
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
4
|
+
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
|
5
5
|
from multi_choices_parser import MultiChoicesParser
|
6
6
|
from divergent_beamsearch.algorithm import divergent_beamsearch, divergent_logprob, log1mexp
|
7
|
-
from multi_choices_parser import MultiChoicesParser
|
7
|
+
from multi_choices_parser import MultiChoicesParser, DEFAULT_END_SYMB
|
8
|
+
|
9
|
+
|
10
|
+
TEST_END_SYMBS = [DEFAULT_END_SYMB, 'tokenizer']
|
8
11
|
|
9
12
|
@pytest.fixture
|
10
13
|
def model_and_tokenizer():
|
@@ -12,11 +15,32 @@ def model_and_tokenizer():
|
|
12
15
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
13
16
|
return model, tokenizer
|
14
17
|
|
18
|
+
@pytest.fixture
|
19
|
+
def fakemodel_and_tokenizer():
|
20
|
+
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
21
|
+
|
22
|
+
# Define a small GPT-2 configuration
|
23
|
+
config = GPT2Config(
|
24
|
+
vocab_size=tokenizer.vocab_size, # Use the default GPT-2 tokenizer vocab size
|
25
|
+
n_positions=64, # Maximum sequence length
|
26
|
+
n_ctx=64, # Context window size
|
27
|
+
n_embd=8, # Size of the embeddings
|
28
|
+
n_layer=1, # Number of layers
|
29
|
+
n_head=2, # Number of attention heads
|
30
|
+
)
|
31
|
+
|
32
|
+
# Instantiate a model with the custom configuration
|
33
|
+
model = GPT2LMHeadModel(config)
|
34
|
+
model.eval()
|
35
|
+
|
36
|
+
return model, tokenizer
|
37
|
+
|
15
38
|
@pytest.mark.parametrize("device", ['cpu', 'cuda'])
|
16
|
-
|
39
|
+
@pytest.mark.parametrize("end_symb", TEST_END_SYMBS)
|
40
|
+
def test_divergent_beamsearch(fakemodel_and_tokenizer, device, end_symb):
|
17
41
|
if device == 'cuda' and not torch.cuda.is_available():
|
18
42
|
pytest.skip("CUDA is not available on this machine.")
|
19
|
-
model, tokenizer =
|
43
|
+
model, tokenizer = fakemodel_and_tokenizer
|
20
44
|
model.to(device)
|
21
45
|
prompt = "The capital of France is"
|
22
46
|
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
|
@@ -26,7 +50,11 @@ def test_divergent_beamsearch(model_and_tokenizer, device):
|
|
26
50
|
|
27
51
|
possible_answers = [' Paris', ' Paris Hilton']
|
28
52
|
tokenized_answers = tokenizer(possible_answers).input_ids
|
29
|
-
|
53
|
+
|
54
|
+
if end_symb == 'tokenizer':
|
55
|
+
end_symb = tokenizer.eos_token_id
|
56
|
+
|
57
|
+
multi_choices_parser = MultiChoicesParser([tokenized_answers], end_symb=end_symb)
|
30
58
|
|
31
59
|
logprob_paris = model(input_ids).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[0][0]]
|
32
60
|
logprob_hilton = model(torch.cat([input_ids, torch.tensor(tokenized_answers[1][0], device=device).view(1,1)], dim=-1)).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[1][1]]
|
@@ -39,7 +67,8 @@ def test_divergent_beamsearch(model_and_tokenizer, device):
|
|
39
67
|
max_length=max_length,
|
40
68
|
parser=multi_choices_parser,
|
41
69
|
pad_token_id=pad_token_id,
|
42
|
-
num_solutions=10
|
70
|
+
num_solutions=10,
|
71
|
+
end_symb=end_symb
|
43
72
|
)
|
44
73
|
true_solutions = torch.nn.utils.rnn.pad_sequence([torch.tensor(ans) for ans in tokenized_answers], batch_first=True, padding_value=pad_token_id)
|
45
74
|
assert (solutions == true_solutions).all(), "Beam search did not return the expected solutions"
|
@@ -47,10 +76,11 @@ def test_divergent_beamsearch(model_and_tokenizer, device):
|
|
47
76
|
assert scores[1] == logprob_paris_hilton, "Beam search did not return the expected score"
|
48
77
|
|
49
78
|
@pytest.mark.parametrize("device", ['cpu', 'cuda'])
|
50
|
-
|
79
|
+
@pytest.mark.parametrize("end_symb", TEST_END_SYMBS)
|
80
|
+
def test_divergent_logprob(fakemodel_and_tokenizer, device, end_symb):
|
51
81
|
if device == 'cuda' and not torch.cuda.is_available():
|
52
82
|
pytest.skip("CUDA is not available on this machine.")
|
53
|
-
model, tokenizer =
|
83
|
+
model, tokenizer = fakemodel_and_tokenizer
|
54
84
|
model.to(device)
|
55
85
|
prompts = [
|
56
86
|
"The capital of France is Paris",
|
@@ -63,10 +93,14 @@ def test_divergent_logprob(model_and_tokenizer, device):
|
|
63
93
|
|
64
94
|
possible_answers = [' Paris', ' Paris Hilton']
|
65
95
|
tokenized_answers = tokenizer(possible_answers).input_ids
|
66
|
-
|
96
|
+
|
97
|
+
if end_symb == 'tokenizer':
|
98
|
+
end_symb = tokenizer.eos_token_id
|
99
|
+
|
100
|
+
multi_choices_parser = MultiChoicesParser([tokenized_answers], end_symb=end_symb)
|
67
101
|
|
68
102
|
input_len = attention_mask.sum(-1).cpu()
|
69
|
-
probs = divergent_logprob(input_ids, attention_mask, model, multi_choices_parser, start=input_len - torch.tensor([1,2]))
|
103
|
+
probs = divergent_logprob(input_ids, attention_mask, model, multi_choices_parser, start=input_len - torch.tensor([1,2]), end_symb=end_symb)
|
70
104
|
|
71
105
|
input_ids_1st = tokenizer("The capital of France is Paris Hilton", return_tensors='pt').input_ids.to(device)
|
72
106
|
logprobs_1st = model(input_ids_1st).logits.cpu().log_softmax(dim=-1)
|
@@ -156,4 +190,4 @@ def test_vanilla_beamsearch(model_and_tokenizer, device):
|
|
156
190
|
]
|
157
191
|
assert np.isclose(
|
158
192
|
scores.cpu().numpy(), np.array([-8.1361, -8.7745, -9.1053]), atol=0.0001
|
159
|
-
).all()
|
193
|
+
).all()
|
@@ -73,7 +73,7 @@ wheels = [
|
|
73
73
|
|
74
74
|
[[package]]
|
75
75
|
name = "divergent-beamsearch"
|
76
|
-
version = "0.1.
|
76
|
+
version = "0.1.5"
|
77
77
|
source = { editable = "." }
|
78
78
|
dependencies = [
|
79
79
|
{ name = "multi-choices-parser" },
|
@@ -88,7 +88,7 @@ dev = [
|
|
88
88
|
|
89
89
|
[package.metadata]
|
90
90
|
requires-dist = [
|
91
|
-
{ name = "multi-choices-parser", specifier = ">=0.9.
|
91
|
+
{ name = "multi-choices-parser", specifier = ">=0.9.61" },
|
92
92
|
{ name = "torch", specifier = ">=2.0.0" },
|
93
93
|
{ name = "transformers", specifier = ">=4.47.1" },
|
94
94
|
]
|
@@ -221,11 +221,11 @@ wheels = [
|
|
221
221
|
|
222
222
|
[[package]]
|
223
223
|
name = "multi-choices-parser"
|
224
|
-
version = "0.9.
|
224
|
+
version = "0.9.61"
|
225
225
|
source = { registry = "https://pypi.org/simple" }
|
226
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
226
|
+
sdist = { url = "https://files.pythonhosted.org/packages/e2/17/90a6125a2145c03e39c3e7f78f65121eb14dedb9de8b40aee3c8a24a709b/multi_choices_parser-0.9.61.tar.gz", hash = "sha256:be932cac4aeabe9ee057c6d4592ea4325a0a92e52758d77a9e08bafa2cd23294", size = 7889 }
|
227
227
|
wheels = [
|
228
|
-
{ url = "https://files.pythonhosted.org/packages/
|
228
|
+
{ url = "https://files.pythonhosted.org/packages/3c/4f/c5a514a510779202ff37505220edfba9154ceff31958ed71fa1878781af9/multi_choices_parser-0.9.61-py3-none-any.whl", hash = "sha256:36bc367bceb66bbfb1bea26d9a38aa9cd10273b54cef331dd7c69da582fb9c2a", size = 6870 },
|
229
229
|
]
|
230
230
|
|
231
231
|
[[package]]
|
@@ -618,21 +618,21 @@ dependencies = [
|
|
618
618
|
{ name = "fsspec" },
|
619
619
|
{ name = "jinja2" },
|
620
620
|
{ name = "networkx" },
|
621
|
-
{ name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and
|
622
|
-
{ name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and
|
623
|
-
{ name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and
|
624
|
-
{ name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and
|
625
|
-
{ name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and
|
626
|
-
{ name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and
|
627
|
-
{ name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and
|
628
|
-
{ name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and
|
629
|
-
{ name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and
|
630
|
-
{ name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and
|
631
|
-
{ name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and
|
632
|
-
{ name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and
|
621
|
+
{ name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
622
|
+
{ name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
623
|
+
{ name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
624
|
+
{ name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
625
|
+
{ name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
626
|
+
{ name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
627
|
+
{ name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
628
|
+
{ name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
629
|
+
{ name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
630
|
+
{ name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
631
|
+
{ name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
632
|
+
{ name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
633
633
|
{ name = "setuptools", marker = "python_full_version >= '3.12'" },
|
634
634
|
{ name = "sympy" },
|
635
|
-
{ name = "triton", marker = "python_full_version < '3.13' and platform_machine == 'x86_64' and
|
635
|
+
{ name = "triton", marker = "python_full_version < '3.13' and platform_machine == 'x86_64' and platform_system == 'Linux'" },
|
636
636
|
{ name = "typing-extensions" },
|
637
637
|
]
|
638
638
|
wheels = [
|
@@ -652,7 +652,7 @@ name = "tqdm"
|
|
652
652
|
version = "4.67.1"
|
653
653
|
source = { registry = "https://pypi.org/simple" }
|
654
654
|
dependencies = [
|
655
|
-
{ name = "colorama", marker = "
|
655
|
+
{ name = "colorama", marker = "platform_system == 'Windows'" },
|
656
656
|
]
|
657
657
|
sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
|
658
658
|
wheels = [
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{divergent_beamsearch-0.1.4 → divergent_beamsearch-0.1.6}/src/divergent_beamsearch/__init__.py
RENAMED
File without changes
|