divergent-beamsearch 0.1.0__tar.gz → 0.1.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,5 @@
1
1
  **/__pycache__
2
2
  .pytest_cache
3
3
  .vscode
4
- .venv
4
+ .venv
5
+ dist
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: divergent-beamsearch
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject.
5
5
  License-File: LICENCE
6
6
  Requires-Python: >=3.11
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "divergent-beamsearch"
3
- version = "0.1.0"
3
+ version = "0.1.1"
4
4
  description = "A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -47,12 +47,29 @@ def log1mexp(x: torch.Tensor) -> torch.Tensor:
47
47
  (-x.exp()).log1p(),
48
48
  )
49
49
 
50
+ class AcceptEverythingParser:
51
+ def __init__(self, vocab_size : int):
52
+ self.vocab_size = vocab_size
53
+ self.tokens = tuple(range(vocab_size))
54
+
55
+ def step(self, token):
56
+ pass
57
+
58
+ def next(self):
59
+ return self.tokens
60
+
61
+ def copy(self):
62
+ return self
63
+
50
64
  @torch.no_grad()
51
65
  def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int, max_length : int, multi_choices_parser : MultiChoicesParser, pad_token_id : int, batch_size=32, num_solutions = None) -> tuple[torch.Tensor, torch.Tensor]:
52
66
  assert input_ids.shape[0] == 1, "Batch size must be 1"
53
67
 
54
68
  if num_solutions is None:
55
69
  num_solutions = beam_size
70
+ vanilla = multi_choices_parser is None
71
+ if vanilla:
72
+ multi_choices_parser = AcceptEverythingParser(model.config.vocab_size)
56
73
 
57
74
  parsers_unfinished = [multi_choices_parser]
58
75
  scores_finished = torch.tensor([], dtype=torch.float)
@@ -73,9 +90,10 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
73
90
  logprobs_filtered = apply_mask_tokens(logprobs, parsers_tokens)
74
91
  if len(logprobs_filtered):
75
92
  topk = torch.topk(logprobs_filtered, beam_size, dim=-1) # shape (batch_size, beam_size)
76
- topk_global = topk.values.flatten().topk(beam_size)
93
+ values = topk.values + scores_unfinished.unsqueeze(-1)
94
+ topk_global = values.flatten().topk(beam_size)
77
95
  best_tokens_row = topk_global.indices // beam_size
78
- best_tokens, best_tokens_logprobs = topk.indices[best_tokens_row, topk_global.indices % beam_size], topk_global.values
96
+ best_tokens, best_tokens_logprobs = topk.indices[best_tokens_row, topk_global.indices % beam_size], topk.values[best_tokens_row, topk_global.indices % beam_size]
79
97
  notinf = ~best_tokens_logprobs.isinf()
80
98
  best_tokens, best_tokens_row, best_tokens_logprobs = best_tokens[notinf], best_tokens_row[notinf], best_tokens_logprobs[notinf]
81
99
  else:
@@ -104,9 +122,11 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
104
122
  parsers_unfinished = [parsers_unfinished[row].copy() for row in best_tokens_row]
105
123
  for parser, token in zip(parsers_unfinished, best_tokens.tolist()):
106
124
  parser.step(token)
125
+
126
+ # Special case of vanilla beam search where all answers are valid
127
+ if vanilla:
128
+ order = scores_unfinished.argsort(descending=True)
129
+ scores_finished = scores_unfinished[order][:num_solutions]
130
+ solutions_finished = solutions_unfinished[order][:num_solutions]
107
131
 
108
132
  return scores_finished, solutions_finished
109
-
110
-
111
-
112
-
@@ -0,0 +1,118 @@
1
+ import numpy as np
2
+ import pytest
3
+ import torch
4
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
5
+ from multi_choices_parser import MultiChoicesParser
6
+ from divergent_beamsearch.algorithm import divergent_beamsearch, log1mexp
7
+ from multi_choices_parser import MultiChoicesParser
8
+
9
+ @pytest.fixture
10
+ def model_and_tokenizer():
11
+ model = GPT2LMHeadModel.from_pretrained("gpt2")
12
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
13
+ return model, tokenizer
14
+
15
+ def test_divergent_beamsearch(model_and_tokenizer):
16
+ model, tokenizer = model_and_tokenizer
17
+ prompt = "The capital of France is"
18
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
19
+ beam_size = 5
20
+ max_length = 10
21
+ pad_token_id = tokenizer.eos_token_id
22
+
23
+ possible_answers = [' Paris', ' Paris Hilton']
24
+ tokenized_answers = tokenizer(possible_answers).input_ids
25
+ multi_choices_parser = MultiChoicesParser([tokenized_answers])
26
+
27
+ logprob_paris = model(input_ids).logits.log_softmax(dim=-1)[0, -1, tokenized_answers[0][0]]
28
+ logprob_hilton = model(torch.cat([input_ids, torch.tensor(tokenized_answers[1][0]).view(1,1)], dim=-1)).logits.log_softmax(dim=-1)[0, -1, tokenized_answers[1][1]]
29
+ logprob_paris_hilton = logprob_paris + logprob_hilton
30
+
31
+ scores, solutions = divergent_beamsearch(
32
+ input_ids=input_ids,
33
+ model=model,
34
+ beam_size=beam_size,
35
+ max_length=max_length,
36
+ multi_choices_parser=multi_choices_parser,
37
+ pad_token_id=pad_token_id,
38
+ num_solutions=10
39
+ )
40
+ true_solutions = torch.nn.utils.rnn.pad_sequence([torch.tensor(ans) for ans in tokenized_answers], batch_first=True, padding_value=pad_token_id)
41
+ assert (solutions == true_solutions).all(), "Beam search did not return the expected solutions"
42
+ assert scores[0] == logprob_paris + log1mexp(logprob_hilton), "Beam search did not return the expected score"
43
+ assert scores[1] == logprob_paris_hilton, "Beam search did not return the expected score"
44
+
45
+ def test_vanilla_beamsearch(model_and_tokenizer):
46
+ # Verify that divergent beam search where all answers are valid is equivalent to vanilla beam search
47
+ # Results of beam search were compared with huggingface implementation (https://huggingface.co/spaces/m-ric/beam_search_visualizer)
48
+ model, tok = model_and_tokenizer
49
+ device = "cuda" if torch.cuda.is_available() else "cpu"
50
+ model.eval()
51
+ prompt = "The capital of France is"
52
+ input_ids = tok(prompt, return_tensors="pt").input_ids.to(device)
53
+ scores, sequences = divergent_beamsearch(
54
+ input_ids, model, beam_size=3, max_length=1, pad_token_id=tok.eos_token_id, num_solutions=3, multi_choices_parser=None
55
+ )
56
+ sequences = [tok.decode(s) for s in sequences]
57
+ assert sequences == [" the", " now", " a"]
58
+ assert np.isclose(
59
+ scores.cpu().numpy(), np.array([-2.4699, -3.0377, -3.0756]), atol=0.0001
60
+ ).all()
61
+
62
+ scores, sequences = divergent_beamsearch(
63
+ input_ids, model, beam_size=3, max_length=2, pad_token_id=tok.eos_token_id, num_solutions=3, multi_choices_parser=None
64
+ )
65
+ sequences = [tok.decode(s) for s in sequences]
66
+ assert sequences == [" the capital", " now home", " now the"]
67
+ assert np.isclose(
68
+ scores.cpu().numpy(), np.array([-4.2437, -5.3013, -5.3408]), atol=0.0001
69
+ ).all()
70
+
71
+ scores, sequences = divergent_beamsearch(
72
+ input_ids, model, beam_size=3, max_length=3, pad_token_id=tok.eos_token_id, num_solutions=3, multi_choices_parser=None
73
+ )
74
+ sequences = [tok.decode(s) for s in sequences]
75
+ assert sequences == [" the capital of", " now home to", " now the capital"]
76
+ assert np.isclose(
77
+ scores.cpu().numpy(), np.array([-4.3194, -5.3057, -7.7173]), atol=0.0001
78
+ ).all()
79
+
80
+ scores, sequences = divergent_beamsearch(
81
+ input_ids, model, beam_size=3, max_length=4, pad_token_id=tok.eos_token_id, num_solutions=3, multi_choices_parser=None
82
+ )
83
+ sequences = [tok.decode(s) for s in sequences]
84
+ assert sequences == [
85
+ " the capital of the",
86
+ " the capital of France",
87
+ " the capital of a",
88
+ ]
89
+ assert np.isclose(
90
+ scores.cpu().numpy(), np.array([-5.5825, -5.9150, -7.1716]), atol=0.0001
91
+ ).all()
92
+
93
+ scores, sequences = divergent_beamsearch(
94
+ input_ids, model, beam_size=3, max_length=5, pad_token_id=tok.eos_token_id, num_solutions=3, multi_choices_parser=None
95
+ )
96
+ sequences = [tok.decode(s) for s in sequences]
97
+ assert sequences == [
98
+ " the capital of France,",
99
+ " the capital of France.",
100
+ " the capital of the French",
101
+ ]
102
+ assert np.isclose(
103
+ scores.cpu().numpy(), np.array([-6.9453, -7.1549, -7.5727]), atol=0.0001
104
+ ).all()
105
+
106
+
107
+ scores, sequences = divergent_beamsearch(
108
+ input_ids, model, beam_size=3, max_length=6, pad_token_id=tok.eos_token_id, num_solutions=3, multi_choices_parser=None
109
+ )
110
+ sequences = [tok.decode(s) for s in sequences]
111
+ assert sequences == [
112
+ " the capital of France, and",
113
+ " the capital of the French Republic",
114
+ " the capital of France. It",
115
+ ]
116
+ assert np.isclose(
117
+ scores.cpu().numpy(), np.array([-8.1361, -8.7745, -9.1053]), atol=0.0001
118
+ ).all()
@@ -73,7 +73,7 @@ wheels = [
73
73
 
74
74
  [[package]]
75
75
  name = "divergent-beamsearch"
76
- version = "0.1.0"
76
+ version = "0.1.1"
77
77
  source = { editable = "." }
78
78
  dependencies = [
79
79
  { name = "multi-choices-parser" },
@@ -1,41 +0,0 @@
1
- import pytest
2
- import torch
3
- from transformers import GPT2LMHeadModel, GPT2Tokenizer
4
- from multi_choices_parser import MultiChoicesParser
5
- from divergent_beamsearch.algorithm import divergent_beamsearch, log1mexp
6
-
7
- @pytest.fixture
8
- def model_and_tokenizer():
9
- model = GPT2LMHeadModel.from_pretrained("gpt2")
10
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
11
- return model, tokenizer
12
-
13
- def test_divergent_beamsearch(model_and_tokenizer):
14
- model, tokenizer = model_and_tokenizer
15
- prompt = "The capital of France is"
16
- input_ids = tokenizer.encode(prompt, return_tensors="pt")
17
- beam_size = 5
18
- max_length = 10
19
- pad_token_id = tokenizer.eos_token_id
20
-
21
- possible_answers = [' Paris', ' Paris Hilton']
22
- tokenized_answers = tokenizer(possible_answers).input_ids
23
- multi_choices_parser = MultiChoicesParser([tokenized_answers])
24
-
25
- logprob_paris = model(input_ids).logits.log_softmax(dim=-1)[0, -1, tokenized_answers[0][0]]
26
- logprob_hilton = model(torch.cat([input_ids, torch.tensor(tokenized_answers[1][0]).view(1,1)], dim=-1)).logits.log_softmax(dim=-1)[0, -1, tokenized_answers[1][1]]
27
- logprob_paris_hilton = logprob_paris + logprob_hilton
28
-
29
- scores, solutions = divergent_beamsearch(
30
- input_ids=input_ids,
31
- model=model,
32
- beam_size=beam_size,
33
- max_length=max_length,
34
- multi_choices_parser=multi_choices_parser,
35
- pad_token_id=pad_token_id,
36
- num_solutions=10
37
- )
38
- true_solutions = torch.nn.utils.rnn.pad_sequence([torch.tensor(ans) for ans in tokenized_answers], batch_first=True, padding_value=pad_token_id)
39
- assert (solutions == true_solutions).all(), "Beam search did not return the expected solutions"
40
- assert scores[0] == logprob_paris + log1mexp(logprob_hilton), "Beam search did not return the expected score"
41
- assert scores[1] == logprob_paris_hilton, "Beam search did not return the expected score"