PyPI - divergent-beamsearch - Versions diffs - 0.1.5__tar.gz → 0.1.7__tar.gz - Mend

divergent-beamsearch 0.1.5tar.gz → 0.1.7tar.gz

Files changed (10) hide show

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/PKG-INFO RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.4
 Name: divergent-beamsearch
-Version: 0.1.5
+Version: 0.1.7
 Summary: A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject.
 License-File: LICENCE
 Requires-Python: >=3.11
-Requires-Dist: multi-choices-parser>=0.9.57
+Requires-Dist: multi-choices-parser>=0.9.61
 Requires-Dist: torch>=2.0.0
 Requires-Dist: transformers>=4.47.1
 Description-Content-Type: text/markdown

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/pyproject.toml RENAMED Viewed

@@ -1,11 +1,11 @@
 [project]
 name = "divergent-beamsearch"
-version = "0.1.5"
+version = "0.1.7"
 description = "A variant of the beam search algorithm that focuses on finding answers that maximize the probability of generating an answer before diverging into another subject."
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
-    "multi-choices-parser>=0.9.57",
+    "multi-choices-parser>=0.9.61",
     "torch>=2.0.0",
     "transformers>=4.47.1",
 ]

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/src/divergent_beamsearch/algorithm.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import math
 import torch
 from transformers import GPT2LMHeadModel
-from multi_choices_parser import MultiChoicesParser, end_symb
+from multi_choices_parser import DEFAULT_END_SYMB
 class Parser:
@@ -19,10 +19,10 @@ def get_parsers_tokens(parsers : list[Parser], end_symb) -> tuple[list, list[int
     can_end = []
     for parser in parsers:
         tokens = list(parser.next())
-        if end_symb in tokens:
-            can_end.append(True)
+        try:
             tokens.remove(end_symb)
-        else:
+            can_end.append(True)
+        except ValueError:
             can_end.append(False)
         parsers_tokens.append(tokens)
     return parsers_tokens, can_end
@@ -76,8 +76,14 @@ class AcceptEverythingParser(Parser):
     def copy(self):
         return self
+def index_reduce_lists(x : torch.Tensor, indices : list[list[int]], reduce_func=torch.sum) -> torch.Tensor:
+    values = []
+    for i, index in enumerate(indices):
+        values.append(reduce_func(x[i, index], dim=-1))
+    return torch.tensor(values, dtype=x.dtype, device=x.device, requires_grad=x.requires_grad)
 @torch.no_grad()
-def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int, max_length : int, parser : Parser, pad_token_id : int, batch_size=32, num_solutions = None, end_symb=end_symb) -> tuple[torch.Tensor, torch.Tensor]:
+def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam_size : int, max_length : int, parser : Parser, pad_token_id : int, batch_size=32, num_solutions = None, end_symb=DEFAULT_END_SYMB) -> tuple[torch.Tensor, torch.Tensor]:
     assert input_ids.shape[0] == 1, "Batch size must be 1"
     device = input_ids.device
     input_ids = input_ids.cpu()
@@ -120,7 +126,8 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
         scores_finished_current = scores_unfinished[can_end]
         solutions_finished_current = solutions_unfinished[can_end]
-        scores_finished_current = scores_finished_current + log1mexp(logprobs[can_end, select_mask(parsers_tokens, can_end)].logsumexp(dim=-1)).squeeze(-1)
+        logprob_other_ans = index_reduce_lists(logprobs[can_end], select_mask(parsers_tokens, can_end), reduce_func=torch.logsumexp).squeeze(-1)
+        scores_finished_current = scores_finished_current + log1mexp(logprob_other_ans)
         scores_finished = torch.cat([scores_finished, scores_finished_current])
         if len(solutions_finished_current):
             pad = torch.full((len(scores_finished_current), solutions_finished_current.shape[1] - solutions_finished.shape[1]), pad_token_id, dtype=torch.long)
@@ -140,6 +147,7 @@ def divergent_beamsearch(input_ids : torch.Tensor, model : GPT2LMHeadModel, beam
             parser.step(token)
     # Special case of vanilla beam search where all answers are valid
+    # Warning : In this case model will not stop on end_of_sentence token
     if vanilla:
         order = scores_unfinished.argsort(descending=True)
         scores_finished = scores_unfinished[order][:num_solutions]
@@ -154,7 +162,9 @@ def set_slice_row(x : torch.Tensor, slices : torch.IntTensor, value) -> torch.Te
         x[i].index_fill_(0, indices[i], 0)
 @torch.no_grad()
-def divergent_logprob(input_ids : torch.Tensor, attention_mask : torch.Tensor | None, model : GPT2LMHeadModel, parsers : Parser | list[Parser] | None, batch_size=32, start : int | torch.IntTensor = None) -> torch.FloatTensor:
+def divergent_logprob(input_ids : torch.Tensor, attention_mask : torch.Tensor | None, model : GPT2LMHeadModel,
+                      parsers : Parser | list[Parser] | None, batch_size=32,
+                      start : int | torch.IntTensor = None, end_symb=DEFAULT_END_SYMB) -> torch.FloatTensor:
     if start is None:
         start = 0
     if isinstance(start, int):

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/tests/test_beamsearch.py RENAMED Viewed

@@ -1,10 +1,13 @@
 import numpy as np
 import pytest
 import torch
-from transformers import GPT2LMHeadModel, GPT2Tokenizer
+from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
 from multi_choices_parser import MultiChoicesParser
 from divergent_beamsearch.algorithm import divergent_beamsearch, divergent_logprob, log1mexp
-from multi_choices_parser import MultiChoicesParser
+from multi_choices_parser import MultiChoicesParser, DEFAULT_END_SYMB
+TEST_END_SYMBS = [DEFAULT_END_SYMB, 'tokenizer']
 @pytest.fixture
 def model_and_tokenizer():
@@ -12,8 +15,29 @@ def model_and_tokenizer():
     tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
     return model, tokenizer
+@pytest.fixture
+def fakemodel_and_tokenizer():
+    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+    # Define a small GPT-2 configuration
+    config = GPT2Config(
+        vocab_size=tokenizer.vocab_size,  # Use the default GPT-2 tokenizer vocab size
+        n_positions=64,  # Maximum sequence length
+        n_ctx=64,  # Context window size
+        n_embd=8,  # Size of the embeddings
+        n_layer=1,  # Number of layers
+        n_head=2,  # Number of attention heads
+    )
+    # Instantiate a model with the custom configuration
+    model = GPT2LMHeadModel(config)
+    model.eval()
+    return model, tokenizer
 @pytest.mark.parametrize("device", ['cpu', 'cuda'])
-def test_divergent_beamsearch(model_and_tokenizer, device):
+@pytest.mark.parametrize("end_symb", TEST_END_SYMBS)
+def test_divergent_beamsearch(model_and_tokenizer, device, end_symb):
     if device == 'cuda' and not torch.cuda.is_available():
         pytest.skip("CUDA is not available on this machine.")
     model, tokenizer = model_and_tokenizer
@@ -24,13 +48,20 @@ def test_divergent_beamsearch(model_and_tokenizer, device):
     max_length = 10
     pad_token_id = tokenizer.eos_token_id
-    possible_answers = [' Paris', ' Paris Hilton']
+    possible_answers = [' Paris', ' Madrid', ' Paris Hilton']
     tokenized_answers = tokenizer(possible_answers).input_ids
-    multi_choices_parser = MultiChoicesParser([tokenized_answers])
-    logprob_paris = model(input_ids).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[0][0]]
-    logprob_hilton = model(torch.cat([input_ids, torch.tensor(tokenized_answers[1][0], device=device).view(1,1)], dim=-1)).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[1][1]]
-    logprob_paris_hilton = logprob_paris + logprob_hilton
+    if end_symb == 'tokenizer':
+        end_symb = tokenizer.eos_token_id
+    multi_choices_parser = MultiChoicesParser([tokenized_answers], end_symb=end_symb)
+    with torch.no_grad():
+        logprob_paris = model(input_ids).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[0][0]]
+        logprob_hilton = model(torch.cat([input_ids, torch.tensor(tokenized_answers[2][0], device=device).view(1,1)], dim=-1)).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[2][1]]
+        logprob_paris_hilton = logprob_paris + logprob_hilton
+        logprob_madrid = model(input_ids).logits.cpu().log_softmax(dim=-1)[0, -1, tokenized_answers[1][0]]
+        logprob_paris_diverge = logprob_paris + log1mexp(logprob_hilton)
     scores, solutions = divergent_beamsearch(
         input_ids=input_ids,
@@ -39,18 +70,22 @@ def test_divergent_beamsearch(model_and_tokenizer, device):
         max_length=max_length,
         parser=multi_choices_parser,
         pad_token_id=pad_token_id,
-        num_solutions=10
+        num_solutions=10,
+        end_symb=end_symb
     )
     true_solutions = torch.nn.utils.rnn.pad_sequence([torch.tensor(ans) for ans in tokenized_answers], batch_first=True, padding_value=pad_token_id)
     assert (solutions == true_solutions).all(), "Beam search did not return the expected solutions"
-    assert scores[0] == logprob_paris + log1mexp(logprob_hilton), "Beam search did not return the expected score"
-    assert scores[1] == logprob_paris_hilton, "Beam search did not return the expected score"
+    assert torch.isclose(scores[0], logprob_paris_diverge), "Beam search did not return the expected score"
+    assert torch.isclose(scores[1], logprob_madrid), "Beam search did not return the expected score"
+    assert torch.isclose(scores[2], logprob_paris_hilton), "Beam search did not return the expected score"
 @pytest.mark.parametrize("device", ['cpu', 'cuda'])
-def test_divergent_logprob(model_and_tokenizer, device):
+@pytest.mark.parametrize("end_symb", TEST_END_SYMBS)
+def test_divergent_logprob(fakemodel_and_tokenizer, device, end_symb):
     if device == 'cuda' and not torch.cuda.is_available():
         pytest.skip("CUDA is not available on this machine.")
-    model, tokenizer = model_and_tokenizer
+    model, tokenizer = fakemodel_and_tokenizer
     model.to(device)
     prompts = [
         "The capital of France is Paris",
@@ -63,10 +98,14 @@ def test_divergent_logprob(model_and_tokenizer, device):
     possible_answers = [' Paris', ' Paris Hilton']
     tokenized_answers = tokenizer(possible_answers).input_ids
-    multi_choices_parser = MultiChoicesParser([tokenized_answers])
+    if end_symb == 'tokenizer':
+        end_symb = tokenizer.eos_token_id
+    multi_choices_parser = MultiChoicesParser([tokenized_answers], end_symb=end_symb)
     input_len = attention_mask.sum(-1).cpu()
-    probs = divergent_logprob(input_ids, attention_mask, model, multi_choices_parser, start=input_len - torch.tensor([1,2]))
+    probs = divergent_logprob(input_ids, attention_mask, model, multi_choices_parser, start=input_len - torch.tensor([1,2]), end_symb=end_symb)
     input_ids_1st = tokenizer("The capital of France is Paris Hilton", return_tensors='pt').input_ids.to(device)
     logprobs_1st = model(input_ids_1st).logits.cpu().log_softmax(dim=-1)
@@ -156,4 +195,4 @@ def test_vanilla_beamsearch(model_and_tokenizer, device):
     ]
     assert np.isclose(
         scores.cpu().numpy(), np.array([-8.1361, -8.7745, -9.1053]), atol=0.0001
-    ).all()
+    ).all()

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/uv.lock RENAMED Viewed

@@ -73,7 +73,7 @@ wheels = [
 [[package]]
 name = "divergent-beamsearch"
-version = "0.1.1"
+version = "0.1.5"
 source = { editable = "." }
 dependencies = [
     { name = "multi-choices-parser" },
@@ -88,7 +88,7 @@ dev = [
 [package.metadata]
 requires-dist = [
-    { name = "multi-choices-parser", specifier = ">=0.9.57" },
+    { name = "multi-choices-parser", specifier = ">=0.9.61" },
     { name = "torch", specifier = ">=2.0.0" },
     { name = "transformers", specifier = ">=4.47.1" },
 ]
@@ -221,11 +221,11 @@ wheels = [
 [[package]]
 name = "multi-choices-parser"
-version = "0.9.57"
+version = "0.9.61"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/69/55/e2228a3839d46282947f4383bc0a588751b164a24c162e4642a65ffe906f/multi_choices_parser-0.9.57.tar.gz", hash = "sha256:f4f42c4a6abbaa5a2529b976d6a4d756edb8fa422a59b98f30fd2a4331995600", size = 7662 }
+sdist = { url = "https://files.pythonhosted.org/packages/e2/17/90a6125a2145c03e39c3e7f78f65121eb14dedb9de8b40aee3c8a24a709b/multi_choices_parser-0.9.61.tar.gz", hash = "sha256:be932cac4aeabe9ee057c6d4592ea4325a0a92e52758d77a9e08bafa2cd23294", size = 7889 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4d/bf/e8d829acca04bc1429ca15582321c3b4702db0e071dbddcf3246f1895956/multi_choices_parser-0.9.57-py3-none-any.whl", hash = "sha256:ff58ac7c440d3129ffe89420039c4ddd6483e54b0526f68d25933e4f62d3c8d2", size = 6702 },
+    { url = "https://files.pythonhosted.org/packages/3c/4f/c5a514a510779202ff37505220edfba9154ceff31958ed71fa1878781af9/multi_choices_parser-0.9.61-py3-none-any.whl", hash = "sha256:36bc367bceb66bbfb1bea26d9a38aa9cd10273b54cef331dd7c69da582fb9c2a", size = 6870 },
 ]
 [[package]]
@@ -618,21 +618,21 @@ dependencies = [
     { name = "fsspec" },
     { name = "jinja2" },
     { name = "networkx" },
-    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
     { name = "setuptools", marker = "python_full_version >= '3.12'" },
     { name = "sympy" },
-    { name = "triton", marker = "python_full_version < '3.13' and platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "triton", marker = "python_full_version < '3.13' and platform_machine == 'x86_64' and platform_system == 'Linux'" },
     { name = "typing-extensions" },
 ]
 wheels = [
@@ -652,7 +652,7 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "platform_system == 'Windows'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
 wheels = [

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/.gitignore RENAMED Viewed

File without changes

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/.python-version RENAMED Viewed

File without changes

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/LICENCE RENAMED Viewed

File without changes

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/README.md RENAMED Viewed

File without changes

{divergent_beamsearch-0.1.5 → divergent_beamsearch-0.1.7}/src/divergent_beamsearch/__init__.py RENAMED Viewed

File without changes

divergent-beamsearch 0.1.5__tar.gz → 0.1.7__tar.gz

divergent-beamsearch 0.1.5tar.gz → 0.1.7tar.gz