PyPI - robo-lib - Versions diffs - 0.0.11__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

robo-lib 0.0.11py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

robo_lib/__init__.py +2 -3
robo_lib/components.py +246 -269
{robo_lib-0.0.11.dist-info → robo_lib-1.0.1.dist-info}/METADATA +8 -17
robo_lib-1.0.1.dist-info/RECORD +6 -0
robo_lib-0.0.11.dist-info/RECORD +0 -6
{robo_lib-0.0.11.dist-info → robo_lib-1.0.1.dist-info}/WHEEL +0 -0
{robo_lib-0.0.11.dist-info → robo_lib-1.0.1.dist-info}/licenses/LICENSE +0 -0

robo_lib/__init__.py CHANGED Viewed

@@ -1,8 +1,7 @@
 from .components import TokenizerConstructor as TokenizerConstructor
 from .components import create_mask as create_mask
-from .components import pad as pad
-from .components import process_row as process_row
-from .components import scan_max_block_size as scan_max_block_size
+from .components import pre_process_data as pre_process_data
+from .components import safe_stack as safe_stack
 from .components import DataProcessor as DataProcessor
 from .components import get_valid_samples as get_valid_samples
 from .components import get_batch as get_batch

robo_lib/components.py CHANGED Viewed

@@ -6,30 +6,28 @@ import numpy as np
 import random
 import pickle
 import itertools
+from pathlib import Path
+import os
+from typing import List, Literal
+pre_tokenizers = Literal["Whitespace", "IndividualDigit", "Digits", "BertPreTokenizer", "ByteLevel", "Metaspace", "Punctuation", "UnicodeScripts", "WhitespaceSplit"]
 class TokenizerConstructor:
     '''
     simple assembler for tokenizer using the tokenizers library
-    tokenizer parameters can be set using strings and list[string]s
+    tokenizer parameters can be set using strings and list[str]s
     strings used for tokenizer_type, pre_tokenizers, normalizers arguments are the names of those present in the
     tokenizers library. Additionally "IndividualDigits" can be used in normalizers for tokenizers.pre_tokenizers.Digits(individual_digits=True)
-    train([paths]) function points to text files to be used for training the tokenizer instance
-    encode(string) function encodes string using trained tokenizer instance
-    decode(list[int]) function decodes list of tokenz using trained tokenizer instance
     vocab_size attribute returns the tokenizer instance's vocab_size (untrained tokenizer will have vocab_size=None)
     '''
     def __init__(self,
                  min_frequency:int=2,
-                 tokenizer_type:str="BPE",
-                 pre_tokenizers:list[str]|str=["Whitespace"],
+                 tokenizer_type:Literal["BPE", "WordLevel", "WordPiece", "Unigram"] = "BPE",
+                 pre_tokenizers: pre_tokenizers|List[pre_tokenizers]=["Whitespace"],
                  normalizers:list[str]|str=["Lowercase", "NFD", "StripAccents", "Strip"],
+                 vocab:dict[str,int] = {},
                  special_tokens:list[str]|str=[],
                  unknown_token_string:str="<unk>",
                  start_token_string:str="<sos>",
@@ -42,25 +40,29 @@ class TokenizerConstructor:
         if isinstance(special_tokens, str):
             special_tokens = [special_tokens]
-        self.special_tokens = special_tokens + [token for token in [unknown_token_string, start_token_string, end_token_string, pad_token_string, new_line_token_string] if token not in special_tokens and token != None]
-        self.unknown_token = self.special_tokens.index(unknown_token_string) if unknown_token_string != None else None
-        self.start_token = self.special_tokens.index(start_token_string) if start_token_string != None else None
-        self.end_token = self.special_tokens.index(end_token_string) if end_token_string != None else None
-        self.pad_token = self.special_tokens.index(pad_token_string) if pad_token_string != None else None
-        self.new_line_token = self.special_tokens.index(new_line_token_string) if new_line_token_string != None else None
+        self.special_tokens = special_tokens + [token for token in [unknown_token_string, start_token_string, end_token_string, pad_token_string, new_line_token_string] if token not in special_tokens and token is not None]
+        self.unknown_token = self.special_tokens.index(unknown_token_string) if unknown_token_string is not None else None
+        self.start_token = self.special_tokens.index(start_token_string) if start_token_string is not None else None
+        self.end_token = self.special_tokens.index(end_token_string) if end_token_string is not None else None
+        self.pad_token = self.special_tokens.index(pad_token_string) if pad_token_string is not None else None
+        self.pad_token_string = pad_token_string
+        self.new_line_token = self.special_tokens.index(new_line_token_string) if new_line_token_string is not None else None
         if tokenizer_type == "BPE":
             self.tokenizer_type = tokenizers.Tokenizer(tokenizers.models.BPE(unk_token=unknown_token_string))
             self.trainer = tokenizers.trainers.BpeTrainer(special_tokens=self.special_tokens, min_frequency=min_frequency, vocab_size=vocab_size)
         elif tokenizer_type == "WordLevel":
-            self.tokenizer_type = tokenizers.Tokenizer(tokenizers.models.WordLevel(unk_token=unknown_token_string))
+            self.tokenizer_type = tokenizers.Tokenizer(tokenizers.models.WordLevel(vocab = vocab, unk_token=unknown_token_string))
             self.trainer = tokenizers.trainers.WordLevelTrainer(special_tokens=self.special_tokens, min_frequency=min_frequency, vocab_size=vocab_size)
         elif tokenizer_type == "WordPiece":
-            self.tokenizer_type = tokenizers.Tokenizer(tokenizers.models.WordPiece(unk_token=unknown_token_string))
+            self.tokenizer_type = tokenizers.Tokenizer(tokenizers.models.WordPiece(vocab = vocab, unk_token=unknown_token_string))
             self.trainer = tokenizers.trainers.WordPieceTrainer(special_tokens=self.special_tokens, min_frequency=min_frequency, vocab_size=vocab_size)
         elif tokenizer_type == "Unigram":
-            self.tokenizer_type = tokenizers.Tokenizer(tokenizers.models.Unigram(unk_token=unknown_token_string))
+            self.tokenizer_type = tokenizers.Tokenizer(tokenizers.models.Unigram())
             self.trainer = tokenizers.trainers.UnigramTrainer(special_tokens=self.special_tokens, min_frequency=min_frequency, vocab_size=vocab_size)
+        if self.pad_token is not None:
+            self.tokenizer_type.enable_padding(pad_id=self.pad_token, pad_token=pad_token_string)
         if isinstance(pre_tokenizers, str):
             pre_tokenizers = [pre_tokenizers]
@@ -114,79 +116,76 @@ class TokenizerConstructor:
     def train(self, training_paths:list[str]|str) -> None:
+        '''
+        points to text files to be used for training the tokenizer instance
+        '''
         if isinstance(training_paths, str):
             training_paths = [training_paths]
         self.tokenizer_type.train(training_paths, trainer=self.trainer)
         self.vocab_size = self.tokenizer_type.get_vocab_size()
     def encode(self, inp:str) -> list[int]:
+        '''
+        encodes string using trained tokenizer instance
+        '''
         return self.tokenizer_type.encode(inp).ids
+    def encode_batch(self, inp:list[str], max_length:int=None) -> list[list[int]]:
+        '''
+        encodes strings in parallel and truncates entries with length > max_length
+        '''
+        if max_length is not None:
+            self.tokenizer_type.enable_truncation(max_length=max_length)
+            self.tokenizer_type.enable_padding(pad_id=self.pad_token, pad_token=self.pad_token_string, length=max_length)
+        out = [row.ids for row in self.tokenizer_type.encode_batch(inp)]
+        self.tokenizer_type.no_truncation()
+        self.tokenizer_type.enable_padding(pad_id=self.pad_token, pad_token=self.pad_token_string)
+        return out
     def decode(self, inp:list[int]) -> str:
+        '''
+        decodes list of tokenz using trained tokenizer instance
+        '''
         return self.tokenizer_type.decode(inp)
 def create_mask(row:list, block_size:int) -> list[bool]:
     '''
     creates a mask list of length block_size for row, asuming mask does cover the entire row input
     '''
     mask = [1]*len(row) + [0]*(block_size - len(row))
     return mask
-def pad(row:list, block_size:int, pad_token:int) -> list[int]:
-    '''
-    returns padded row. Row is padded until length block_size with specified pad_token value
-    '''
-    row.extend([pad_token]*(block_size - len(row)))
-    return row
-def process_row(row:str, tokenizer:TokenizerConstructor) -> list[int]:
+def pre_process_data(data:str, start_token_string:str, end_token_string:str) -> list[int]:
     '''
-    returns tokenized row using specified tokenizer, and adds the tokenizer's start and end tokens if they exist
+    returns data with the tokenizer's start and end tokens added to each row if they exist
     '''
-    processed_row = tokenizer.encode(row)
-    if tokenizer.start_token != None:
-        processed_row.insert(0, tokenizer.start_token)
-    if tokenizer.end_token != None:
-        processed_row.append(tokenizer.end_token)
-    return processed_row
+    if start_token_string is None and end_token_string is None:
+        return data
+    else:
+        for i in range(len(data)):
+            if start_token_string is not None:
+                data[i] = start_token_string + data[i]
+            if end_token_string is not None:
+                data[i] = data[i] + end_token_string
+    return data
-def scan_max_block_size(data:list[str], tokenizer:TokenizerConstructor) -> int:
+def safe_stack(tensor_list:list[torch.tensor]) -> torch.tensor:
     '''
-    returns max_block_size of given list of strings by taking the length of the longest process_row(row) in data
+    torch stack with check to ensure tensors are valid in input list
+    returns torch.stack(out_list) for all valid torch tensors in tensor_list. raises error if no valid tensors
     '''
-    lengths = [len(process_row(p, tokenizer)) for p in data]
-    max_block_size_scanner = max(lengths)
-    return max_block_size_scanner
+    out_list = [row for row in tensor_list if isinstance(row, torch.Tensor)]
+    if len(out_list) == 0:
+        raise ValueError("no valid tensors in list.")
+    return torch.stack(out_list)
 class DataProcessor:
     '''
     data processor can be instantiated by specifying the tokenizer(s) for decoder and encoder data
-    process_list() function processes raw data in the form of list[str] or str for decoder and encoder simultaneously and
-    saves them to save_path as .pt files.
-        - encoder and decoder input data should have matching input and outputs so enc_data[n] should have its corresponding
-        decoder data at dec_data[n].
-        - max block size can be specified for both input and output, default takes the max
-        block size provided in the data respectively.
-        - if enc/dec_block_size is specified and enc/dec_block_size_exceeded_policy is not, an error will occur if a piece
-        of data larger than enc/dec_block_size is encountered. enc/dec_block_size_exceeded_policy can be set to "skip" or
-        "trim" to skip rows larger than enc/dec_block_size or truncate the row to specified enc/dec_block_size respectively.
-        - enc/dec_create_masks saves masks tensors to save_path as .pt files.
     '''
     def __init__(self,
                  dec_tokenizer:TokenizerConstructor,
@@ -196,165 +195,153 @@ class DataProcessor:
         self.enc_tokenizer = enc_tokenizer
     def process_list(self,
-                     save_path:str,
                      dec_data:list[str]|str,
                      dec_max_block_size:int=None,
                      dec_create_masks:bool=True,
-                     dec_block_size_exceeded_policy:str=None,
                      enc_data:list[str]=None,
                      enc_max_block_size:int=None,
                      enc_create_masks:bool=True,
-                     enc_block_size_exceeded_policy:str=None
+                     save_path:str = "."
                      ) -> None:
+        '''
+        processes raw data in the form of list[str] or str for decoder and encoder simultaneously and
+        saves them to save_path as .pt files.
+            - encoder and decoder input data should have matching input and outputs so enc_data[n] should have its corresponding
+            decoder data at dec_data[n].
+            - max block size can be specified for both input and output, default takes the max
+            block size provided in the data respectively. If data length > max_length, the data is trimmed.
+        '''
         if isinstance(dec_data, str):
             dec_data = [dec_data]
         dec_data_length = len(dec_data)
-        save_path = save_path.replace(".pt", "")
-        if dec_max_block_size == None:
-            dec_max_block_size = scan_max_block_size(dec_data, self.dec_tokenizer)
-        if enc_data != None:
-            self.enc_tokenizer = self.dec_tokenizer if self.enc_tokenizer == None else self.enc_tokenizer
+        if enc_data is not None:
+            if self.enc_tokenizer is None:
+                self.enc_tokenizer = self.dec_tokenizer
             enc_data_length = len(enc_data)
             if dec_data_length != enc_data_length:
-                raise Exception(f"decoder and encoder lengths do not match. decoder_data_length is {dec_data_length}, encoder_data_length is {enc_data_length}")
+                raise Exception(f"decoder data and encoder data lengths do not match. decoder_data_length is {dec_data_length}, encoder_data_length is {enc_data_length}")
-            if enc_max_block_size == None:
-                enc_max_block_size = scan_max_block_size(enc_data, self.enc_tokenizer)
-            enc_out_list = [[]]*enc_data_length
-            enc_mask_list = [[]]*enc_data_length if enc_create_masks else []
-        else:
-            enc_out_list = []
-            enc_mask_list = []
-        dec_out_list = [[]]*dec_data_length
-        dec_mask_list = [[]]*dec_data_length if dec_create_masks else []
-        for index in range(len(dec_out_list)):
-            dec_processed_item = process_row(dec_data[index], self.dec_tokenizer)
-            if dec_max_block_size != None and len(dec_processed_item) > dec_max_block_size:
-                if dec_block_size_exceeded_policy == "trim":
-                    dec_processed_item = dec_processed_item[:dec_max_block_size]
-                elif dec_block_size_exceeded_policy == "skip":
-                    continue
-                elif dec_block_size_exceeded_policy == None:
-                    raise Exception(f"encountered item in dec_data larger than maximum block size ({dec_max_block_size})")
-            if dec_create_masks:
-                dec_mask = create_mask(dec_processed_item, dec_max_block_size)
-            dec_processed_item = pad(dec_processed_item, dec_max_block_size, self.dec_tokenizer.pad_token)
-            if enc_data != None:
-                enc_processed_item = process_row(enc_data[index], self.enc_tokenizer)
-                if enc_max_block_size != None and len(enc_processed_item) > enc_max_block_size:
-                    if enc_block_size_exceeded_policy == "trim":
-                        enc_processed_item = enc_processed_item[:enc_max_block_size]
-                    elif enc_block_size_exceeded_policy == "skip":
-                        continue
-                    elif enc_block_size_exceeded_policy == None:
-                        raise Exception(f"encountered item in enc_data larger than maximum block size ({enc_max_block_size})")
-                if enc_create_masks:
-                    enc_mask = create_mask(enc_processed_item, enc_max_block_size)
-                enc_processed_item = pad(enc_processed_item, enc_max_block_size, self.enc_tokenizer.pad_token)
-            dec_out_list[index] = torch.tensor(dec_processed_item, dtype=torch.long)
-            if dec_create_masks:
-                dec_mask_list[index] = torch.tensor(dec_mask, dtype=torch.bool)
-            if enc_data != None:
-                enc_out_list[index] = torch.tensor(enc_processed_item, dtype=torch.long)
-                if enc_create_masks:
-                    enc_mask_list[index] = torch.tensor(enc_mask, dtype=torch.bool)
-        dec_out_list = torch.stack([row for row in dec_out_list if row != []])
-        torch.save(dec_out_list, save_path + "_decoder_data.pt")
+        print("processing data")
+        dec_out_list = self.dec_tokenizer.encode_batch(dec_data, max_length=dec_max_block_size)
         if dec_create_masks:
-            dec_mask_list = torch.stack([row for row in dec_mask_list if row != []])
-            torch.save(dec_mask_list, save_path + "_decoder_mask_data.pt")
-        if enc_data != None:
-            enc_out_list = torch.stack([row for row in enc_out_list if row != []])
-            torch.save(enc_out_list, save_path + "_encoder_data.pt")
+            mask_tokenizer = TokenizerConstructor(min_frequency=1, tokenizer_type="WordLevel", vocab={str(self.dec_tokenizer.pad_token): 0, "<unk>": 1}, special_tokens=["<pad>", "<unk>"], unknown_token_string="<unk>", start_token_string=None, end_token_string=None, pad_token_string=None)
+            dec_mask_list = mask_tokenizer.encode_batch([str(i).replace("[", "").replace("]", "").replace(",", "") for i in dec_out_list])
+        if enc_data is not None:
+            enc_out_list = self.enc_tokenizer.encode_batch(enc_data, max_length=enc_max_block_size)
             if enc_create_masks:
-                enc_mask_list = torch.stack([row for row in enc_mask_list if row != []])
-                torch.save(enc_mask_list, save_path + "_encoder_mask_data.pt")
+                mask_tokenizer = TokenizerConstructor(min_frequency=1, tokenizer_type="WordLevel", vocab={str(self.enc_tokenizer.pad_token): 0, "<unk>": 1}, special_tokens=["<pad>", "<unk>"], unknown_token_string="<unk>", start_token_string=None, end_token_string=None, pad_token_string=None)
+                enc_mask_list = mask_tokenizer.encode_batch([str(i).replace("[", "").replace("]", "").replace(",", "") for i in enc_out_list])
+        dec_out_list = torch.tensor(dec_out_list, dtype=torch.long)
+        Path(save_path).mkdir(parents=True, exist_ok=True)
+        torch.save(dec_out_list, os.path.join(save_path, "decoder_data.pt"))
+        if dec_create_masks:
+            dec_mask_list = torch.tensor(dec_mask_list, dtype=torch.long)
+            torch.save(dec_mask_list, os.path.join(save_path, "decoder_mask_data.pt"))
+        if enc_data is not None:
+            enc_out_list = torch.tensor(enc_out_list, dtype=torch.long)
+            torch.save(enc_out_list, os.path.join(save_path, "encoder_data.pt"))
+            if enc_create_masks:
+                enc_mask_list = torch.tensor(enc_mask_list, dtype=torch.long)
+                torch.save(enc_mask_list, os.path.join(save_path, "encoder_mask_data.pt"))
-def get_valid_samples(random_samples:torch.tensor,
-                      masks:torch.tensor,
+def get_valid_samples(random_samples:torch.Tensor,
+                      masks:torch.Tensor,
                       block_size:int
                       ) -> list[int]:
     '''
     returns list of len(random_samples) with values corresponding to index values of masks that ensure minimum masked
     values when taking sample of length block_size
     '''
     valid_samples = [0 if sum(masks[row_num]) <= block_size else random.randint(0, sum(masks[row_num]) - block_size) for row_num in random_samples]
     return valid_samples
-def get_batch(data:torch.tensor,
-                random_samples:torch.tensor,
-                masks:torch.tensor=None,
+def get_batch(data:torch.Tensor,
+                random_samples:torch.Tensor,
+                masks:torch.Tensor=None,
                 block_size:int=None,
                 get_offset:bool=True
                 ) -> tuple[torch.tensor]:
     '''
     returns random batches from data tensor using random sample for data selection.
         - returns corresponding batch offset by 1 unless get_offset=False
         - returns corresponding masks batch if masks data is specified
     '''
     batch_size = len(random_samples)
-    if block_size != None and block_size != data.shape[1]:
+    if block_size is not None and block_size != data.shape[1]:
         if block_size >= data.shape[1]:
             raise Exception(f"specified block size ({block_size}) is larger than input tensor length ({data.shape[1]})")
-        if masks != None:
+        if masks is not None:
             random_point = get_valid_samples(random_samples, masks, block_size)
         else:
             random_point = torch.randint(data.shape[1] - block_size, (batch_size,))
         batch_in = torch.stack([data[random_samples[i]][random_point[i]:random_point[i]+block_size-int(get_offset)] for i in range(batch_size)])
-        masks_in = torch.stack([masks[random_samples[i]][random_point[i]:random_point[i]+block_size-int(get_offset)] for i in range(batch_size)]) if masks != None else None
+        masks_in = torch.stack([masks[random_samples[i]][random_point[i]:random_point[i]+block_size-int(get_offset)] for i in range(batch_size)]) if masks is not None else None
         batch_out = torch.stack([data[random_samples[i]][1+random_point[i]:random_point[i]+block_size] for i in range(batch_size)]) if get_offset else None
     else:
         block_size = data.shape[1]
         batch_in = torch.stack([data[row_num][:block_size-int(get_offset)] for row_num in random_samples])
-        masks_in = torch.stack([masks[row_num][:block_size-int(get_offset)] for row_num in random_samples]) if masks != None else None
+        masks_in = torch.stack([masks[row_num][:block_size-int(get_offset)] for row_num in random_samples]) if masks is not None else None
         batch_out = torch.stack([data[row_num][1:block_size] for row_num in random_samples]) if get_offset else None
     return batch_in, batch_out, masks_in
-def top_kp_filter(logits:torch.tensor,
-                  top_k:int,
-                  top_p:float=None
-                  ) -> torch.tensor:
+def top_kp_filter(logits: torch.Tensor,
+                  top_k: int = None,
+                  top_p: float = None
+                  ) -> torch.Tensor:
     '''
+    Returns predicted token by filtering output logits using top_k and/or top_p (nucleus) filtering.
-    returns predicted token by filtering output logits using top_k and top_p
+    Args:
+        logits: (batch_size, vocab_size) tensor of raw logits.
+        top_k: keep only top_k tokens with highest logits.
+        top_p: keep the smallest set of tokens with cumulative probability >= top_p.
     '''
-    if top_p != None:
-        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
-        cumulative_probs = torch.cumsum(sorted_logits, dim=-1)
-        filter = cumulative_probs > top_p
-        filter[..., 1:] = filter[..., :-1].clone()
-        filter[..., 0] = 0
-        indices_to_remove = filter.scatter(1, sorted_indices, filter)
-        logits[indices_to_remove] = float("-inf")
+    logits = logits.clone()  # avoid modifying input logits in-place
+    # Apply top-p filtering if specified
+    if top_p is not None:
+        sorted_logits, sorted_indices = torch.sort(logits, descending=True, dim=-1)
+        probs = F.softmax(sorted_logits, dim=-1)
+        cumulative_probs = torch.cumsum(probs, dim=-1)
-    if top_k != None:
-        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+        # Remove tokens with cumulative probability above threshold (except first token)
+        sorted_mask = cumulative_probs > top_p
+        sorted_mask[..., 1:] = sorted_mask[..., :-1].clone()
+        sorted_mask[..., 0] = False
-        sorted_logits = F.softmax(sorted_logits[:, :top_k], dim=-1)
-        sorted_indices = sorted_indices[:, :top_k].detach().cpu()
-        sorted_logits = sorted_logits.detach().cpu().numpy()
-        sorted_logits[0][0] += 1 - sum(sorted_logits[0])
+        # Mask tokens to remove by setting logits to -inf
+        indices_to_remove = sorted_mask.scatter(1, sorted_indices, sorted_mask)
+        logits[indices_to_remove] = float('-inf')
-        selected = torch.tensor(np.random.choice(sorted_indices[0], 1, p=sorted_logits[0]), dtype=torch.long)
+    # Apply top-k filtering if specified
+    if top_k is not None:
+        top_k = min(top_k, logits.size(-1))  # safety check
+        topk_logits, topk_indices = torch.topk(logits, top_k, dim=-1)
+        topk_probs = F.softmax(topk_logits, dim=-1).cpu().numpy()
+        # For each batch, sample from top_k candidates
+        selected = []
+        for i in range(topk_probs.shape[0]):
+            candidate = np.random.choice(topk_indices[i].cpu().numpy(), 1, p=topk_probs[i])
+            selected.append(candidate[0])
+        selected = torch.tensor(selected, dtype=torch.long)
+    else:
+        # If only top_p is specified, sample from entire filtered logits
+        probs = F.softmax(logits, dim=-1).cpu().numpy()
+        selected = []
+        for i in range(probs.shape[0]):
+            candidate = np.random.choice(len(probs[i]), 1, p=probs[i])
+            selected.append(candidate[0])
+        selected = torch.tensor(selected, dtype=torch.long)
     return selected
@@ -362,10 +349,8 @@ def top_kp_filter(logits:torch.tensor,
 class SelfAttention(nn.Module):
     '''
     single self attention block of size head_size.
     triangle_mask=True to apply look-ahead mask of size block_size.
     '''
     def __init__(self,
                  head_size:int,
@@ -387,16 +372,14 @@ class SelfAttention(nn.Module):
         self.dropout = nn.Dropout(dropout)
     def forward(self,
-                k:torch.tensor,
-                q:torch.tensor,
-                v:torch.tensor,
-                mask:torch.tensor=None
+                k:torch.Tensor,
+                q:torch.Tensor,
+                v:torch.Tensor,
+                mask:torch.Tensor=None
                 ) -> torch.tensor:
         '''
         k, q and v are key, tensors to get key, query and value tensors.
         custom mask tensor can be applied.
         '''
         _,T,_ = k.shape
@@ -406,7 +389,7 @@ class SelfAttention(nn.Module):
         wei = q @ k.transpose(-2,-1) * k.shape[-1]**-0.5
         if self.triangle_mask and self.block_size >= 0:
             wei = wei.masked_fill(self.tril[:T, :T] == 0, float("-inf"))
-        if mask != None:
+        if mask is not None:
             wei = wei.masked_fill(mask.unsqueeze(1)==0, float("-inf"))
         wei = F.softmax(wei, dim=-1)
         wei = self.dropout(wei)
@@ -417,12 +400,10 @@ class SelfAttention(nn.Module):
 class MultiHeadAttention(nn.Module):
     '''
         multi-head attention block consisting of num_heads SelfAttention blocks and a linear layer to
         rejoin outputs.
         specified head_size, n_embed, dropout, block_size and triangle_mask values are passed through to
         SelfAttention blocks
     '''
     def __init__(self,
                  num_heads:int,
@@ -438,16 +419,14 @@ class MultiHeadAttention(nn.Module):
         self.dropout = nn.Dropout(dropout)
     def forward(self,
-                k:torch.tensor,
-                q:torch.tensor,
-                v:torch.tensor,
-                mask:torch.tensor=None
+                k:torch.Tensor,
+                q:torch.Tensor,
+                v:torch.Tensor,
+                mask:torch.Tensor=None
                 ) -> torch.tensor:
         '''
         k, q and v are key, tensors to get key, query and value tensors.
         custom mask tensor can be applied.
         '''
         out = torch.cat([h(k, q, v, mask=mask) for h in self.heads], dim=-1)
         out = self.dropout(self.proj(out))
@@ -455,11 +434,9 @@ class MultiHeadAttention(nn.Module):
 class FeedForward(nn.Module):
     '''
     feed forward layer used after multi-head attention consisting of 2 lieanr layers with
     a ReLU in between. Linear layers expand from n_embed to n_embed * expansion_factor and
     back to n_embed.
     '''
     def __init__(self,
                  n_embed:int,
@@ -475,16 +452,14 @@ class FeedForward(nn.Module):
         )
     def forward(self,
-                x:torch.tensor
+                x:torch.Tensor
                 ) -> torch.tensor:
         return self.net(x)
 class EncoderBlock(nn.Module):
     '''
     encoder block consists of a sequence of multi-head attention, LayerNorm, feed-forward, LayerNorm
     head_size is calculated from n_embed // n_head
     '''
     def __init__(self,
                  n_embed:int,
@@ -500,8 +475,8 @@ class EncoderBlock(nn.Module):
         self.ln2 = nn.LayerNorm(n_embed)
     def forward(self,
-                x:torch.tensor,
-                mask:torch.tensor=None
+                x:torch.Tensor,
+                mask:torch.Tensor=None
                 ) -> tuple[torch.tensor]:
         att = self.sa(x, x, x, mask=mask)
         x = self.ln1(att + x)
@@ -512,13 +487,11 @@ class EncoderBlock(nn.Module):
 class DecoderBlock(nn.Module):
     '''
     decoder block consists of a sequence of multi-head attention, LayerNorm, feed-forward, LayerNorm
     if cross-attention is True, a multi-head attention block and layerNorm is added before feed-forward
     taking specified enc_k and enc_v tensors as value and key tensors. These values should be the output
     of an encoder block.
     head_size is calculated from n_embed // n_head
     '''
     def __init__(self,
                  n_embed:int,
@@ -541,15 +514,15 @@ class DecoderBlock(nn.Module):
             self.ca = None
     def forward(self,
-                x:torch.tensor,
-                enc_k:torch.tensor,
-                enc_v:torch.tensor,
+                x:torch.Tensor,
+                enc_k:torch.Tensor,
+                enc_v:torch.Tensor,
                 mask_out:bool=None,
-                mask_in:torch.tensor=None
+                mask_in:torch.Tensor=None
                 ) -> tuple[torch.tensor]:
         att = self.sa(x, x, x, mask=mask_out)
         x = self.ln1(att + x)
-        if self.ca != None:
+        if self.ca is not None:
             catt = self.ca(enc_k, x, enc_v, mask=mask_in)
             x = self.ln3(catt + x)
         ff = self.ffwd(x)
@@ -558,9 +531,7 @@ class DecoderBlock(nn.Module):
 class MySequential(nn.Sequential):
     '''
     MySequential serves the same purpose as nn.Sequential but allows for multiple inputs and outputs
     '''
     def forward(self, *input):
         for module in self._modules.values():
@@ -569,39 +540,12 @@ class MySequential(nn.Sequential):
 class RoboConstructor(nn.Module):
     '''
     RoboConstructor assembles an encoder-decoder or decoder-only transformer.
     if the enc_* variables are not specified, or enc_n_blocks==0, the transformer will be decoder-only.
         - if any of the dec_* variables are not specified (except dec_expansion_factor) an error will occur.
         - if enc_n_blocks > 0 and any of the enc_* variables are not specified (except enc_expansion_factor and enc_block_size) an error will occur.
     dropout can be specified, default=0.1.
     if device is not specified, device will default to first available among ("cuda", "mps", "cpu")
-    prep_data() function returns a batch of specified batch_size, from dec_data (and dec_masks, enc_data and enc_masks if specified)
-        - if encoder is configured in this instance, enc_data must be specified.
-        - dec_block_size must be specified.
-        - if enc_block_size is not specified, the entire block_size of enc_data will be used.
-        this function is for use in train_robo()
-    train_robo() function trains the RoboConstructor instance transformer.
-        - training parameters can be specified such as max_iters, eval_interval, batch_size, eval_iters, learning_rate, label_smoothing.
-        - paths must be specified for decoder training data (and encoder training data if encoder-decoder transformer)
-        - optional paths to specify: decoder and encoder masks, decoder and encoder validation data, decoder and encoder validation masks data
-        - if neither pad_token or tokenizer is specified (or tokenizer has no pad_token), any padding in labels will contribute towards the loss
-        which may cause unwanted results. Specifying pad_token and/or tokenizer allows loss to be calculated while ignoring any padding in labels
-        - specify save_path to save the model as a .pkl file every eval_interval iterations using the save_component function.
-    generate() function uses the tranformer model from the RoboConstructor instance to generate an output from an input.
-        - input can be in the form of a string if input tokenizer is specified (enc_tokenizer for encoder-decoder and dec_tokenizder for decoder-only),
-        otherwise, it must be in the form of a list of tokens.
-        - if dec_tokenizer is specified, output will be a string.
-        - new tokens are generated until the dec_end_token (or dec_tokenizer.end_token) is generated, or the number of tokens generated == max_new_tokens.
-        - if input tokenizer is not specified, or input tokenizer.start_token is None, enc_start_token must be specified for an encoder-decoder model.
-        - separator_token is used to separate the input and generated tokens for a decoder-only model. If this value is not specified, there
-        will be no distinction between input tokens and generated tokens to the transformer, even if dec_tokenizer is specified.
-        - if new_line_token is not specified, output will be returned in one line, without any "\n" line separators.
-        - temperature, top_k and top_p can be specified to adjust the output.
     '''
     def __init__(self,
                  n_embed:int,
@@ -628,7 +572,7 @@ class RoboConstructor(nn.Module):
         self.dec_expansion_factor = dec_expansion_factor
         self.dropout = dropout
-        if device == None:
+        if device is None:
             self.device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
         else:
             self.device = device
@@ -673,13 +617,13 @@ class RoboConstructor(nn.Module):
             torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
     def forward(self,
-                dec_in:torch.tensor,
-                dec_mask:torch.tensor=None,
-                enc_in:torch.tensor=None,
-                enc_mask:torch.tensor=None
+                dec_in:torch.Tensor,
+                dec_mask:torch.Tensor=None,
+                enc_in:torch.Tensor=None,
+                enc_mask:torch.Tensor=None
                 ) -> torch.tensor:
         _, dec_T = dec_in.shape
-        if enc_in != None:
+        if enc_in is not None:
             _, enc_T = enc_in.shape
         dec_tok_emb = self.dec_token_embedding_table(dec_in)
@@ -714,17 +658,24 @@ class RoboConstructor(nn.Module):
                   enc_block_size:int=None,
                   enc_masks:str=None
                   ) -> tuple[torch.tensor]:
+        '''
+        returns a batch of specified batch_size, from dec_data (and dec_masks, enc_data and enc_masks if specified)
+            - if encoder is configured in this instance, enc_data must be specified.
+            - dec_block_size must be specified.
+            - if enc_block_size is not specified, the entire block_size of enc_data will be used.
+            this method is for use in train_robo()
+        '''
         random_samples = torch.randint(dec_data.shape[0], (batch_size,))
         dec_train_batch_in, dec_train_batch_out, dec_train_masks_in = get_batch(dec_data, random_samples, masks=dec_masks, block_size=dec_block_size, get_offset=True)
         dec_train_batch_in = dec_train_batch_in.to(self.device)
-        dec_train_batch_out = dec_train_batch_out.to(self.device) if dec_train_batch_out != None else None
-        dec_train_masks_in = dec_train_masks_in.to(self.device) if dec_train_masks_in != None else None
+        dec_train_batch_out = dec_train_batch_out.to(self.device) if dec_train_batch_out is not None else None
+        dec_train_masks_in = dec_train_masks_in.to(self.device) if dec_train_masks_in is not None else None
         if self.cross_attention:
             enc_train_batch_in, _, enc_train_masks_in = get_batch(enc_data, random_samples, masks=enc_masks, block_size=enc_block_size, get_offset=False)
             enc_train_batch_in = enc_train_batch_in.to(self.device)
-            enc_train_masks_in = enc_train_masks_in.to(self.device) if enc_train_masks_in != None else None
+            enc_train_masks_in = enc_train_masks_in.to(self.device) if enc_train_masks_in is not None else None
         else:
             enc_train_batch_in = None
             enc_train_masks_in = None
@@ -736,14 +687,8 @@ class RoboConstructor(nn.Module):
               max_iters:int,
               eval_interval:int,
               batch_size:int,
-              dec_training_path:str,
-              dec_eval_path:str=None,
-              dec_training_masks_path:str=None,
-              dec_eval_masks_path:str=None,
-              enc_training_path:str=None,
-              enc_eval_path:str=None,
-              enc_training_masks_path:str=None,
-              enc_eval_masks_path:str=None,
+              training_dir_path:str,
+              eval_dir_path:str=None,
               eval_iters:int=3,
               learning_rate:float=1e-4,
               pad_token:int=None,
@@ -751,22 +696,46 @@ class RoboConstructor(nn.Module):
               save_path:str=None,
               label_smoothing:float=0.1
               ) -> None:
+        '''
+        trains the RoboConstructor instance transformer.
+            - training parameters can be specified such as max_iters, eval_interval, batch_size, eval_iters, learning_rate, label_smoothing.
+            - paths must be specified for decoder training data (and encoder training data if encoder-decoder transformer)
+            - optional paths to specify: decoder and encoder masks, decoder and encoder validation data, decoder and encoder validation masks data
+            - if neither pad_token or tokenizer is specified (or tokenizer has no pad_token), any padding in labels will contribute towards the loss
+            which may cause unwanted results. Specifying pad_token and/or tokenizer allows loss to be calculated while ignoring any padding in labels
+            - specify save_path to save the model as a .pkl file every eval_interval iterations using the save_component function.
+        '''
+        dec_training_path = os.path.join(training_dir_path, "decoder_data.pt")
         dec_training_data = torch.load(dec_training_path, weights_only=True)
-        dec_eval_data = torch.load(dec_eval_path, weights_only=True) if dec_eval_path != None else None
-        dec_training_masks_data = torch.load(dec_training_masks_path, weights_only=True) if dec_training_masks_path != None else None
-        dec_eval_masks_data = torch.load(dec_eval_masks_path, weights_only=True) if dec_eval_masks_path != None else None
-        enc_training_data = torch.load(enc_training_path, weights_only=True) if enc_training_path != None else None
-        enc_eval_data = torch.load(enc_eval_path, weights_only=True) if enc_eval_path != None else None
-        enc_training_masks_data = torch.load(enc_training_masks_path, weights_only=True) if enc_training_masks_path != None else None
-        enc_eval_masks_data = torch.load(enc_eval_masks_path, weights_only=True) if enc_eval_masks_path != None else None
-        if pad_token == None and dec_tokenizer != None:
+        dec_eval_path = os.path.join(eval_dir_path, "decoder_data.pt")
+        dec_eval_data = torch.load(dec_eval_path, weights_only=True) if os.path.isfile(dec_eval_path) else None
+        dec_training_masks_path = os.path.join(training_dir_path, "decoder_mask_data.pt")
+        dec_training_masks_data = torch.load(dec_training_masks_path, weights_only=True) if os.path.isfile(dec_training_masks_path) else None
+        dec_eval_masks_path = os.path.join(eval_dir_path, "decoder_mask_data.pt")
+        dec_eval_masks_data = torch.load(dec_eval_masks_path, weights_only=True) if os.path.isfile(dec_eval_masks_path) else None
+        enc_training_path = os.path.join(training_dir_path, "encoder_data.pt")
+        enc_training_data = torch.load(enc_training_path, weights_only=True) if os.path.isfile(enc_training_path) else None
+        enc_eval_path = os.path.join(eval_dir_path, "encoder_data.pt")
+        enc_eval_data = torch.load(enc_eval_path, weights_only=True) if os.path.isfile(enc_eval_path) else None
+        enc_training_masks_path = os.path.join(training_dir_path, "encoder_mask_data.pt")
+        enc_training_masks_data = torch.load(enc_training_masks_path, weights_only=True) if os.path.isfile(enc_training_masks_path) else None
+        enc_eval_masks_path = os.path.join(eval_dir_path, "encoder_mask_data.pt")
+        enc_eval_masks_data = torch.load(enc_eval_masks_path, weights_only=True) if os.path.isfile(enc_eval_masks_path) else None
+        if pad_token is None and dec_tokenizer is not None:
             pad_token = dec_tokenizer.pad_token
         self.to(self.device)
-        if pad_token != None:
+        if pad_token is not None:
             loss_fn = nn.CrossEntropyLoss(ignore_index=pad_token, label_smoothing=label_smoothing).to(self.device)
         else:
             loss_fn = nn.CrossEntropyLoss(label_smoothing=label_smoothing).to(self.device)
@@ -782,7 +751,7 @@ class RoboConstructor(nn.Module):
                 proj_output = self.forward(dec_x, dec_mask, enc_x, enc_mask)
                 losses[k] = loss_fn(proj_output.view(-1, self.dec_vocab_size), dec_y.view(-1))
             out["train"] = losses.mean()
-            if dec_eval_data != None:
+            if dec_eval_data is not None:
                 for k in range(eval_iters):
                     dec_x, dec_y, dec_mask, enc_x, enc_mask = self.prep_data(batch_size, dec_eval_data, dec_masks=dec_eval_masks_data, dec_block_size=self.dec_block_size, enc_data=enc_eval_data, enc_masks=enc_eval_masks_data, enc_block_size=self.enc_block_size)
                     proj_output = self.forward(dec_x, dec_mask, enc_x, enc_mask)
@@ -798,7 +767,7 @@ class RoboConstructor(nn.Module):
             if iter % eval_interval == 0 or iter == max_iters-1:
                 losses = estimate_loss()
                 print(f"step {iter}: train loss {losses['train']:.4f}, eval loss {losses['eval']:.4f}")
-                if save_path != None:
+                if save_path is not None:
                     save_component(self, save_path=save_path)
             dec_x, dec_y, dec_mask, enc_x, enc_mask = self.prep_data(batch_size, dec_training_data, dec_masks=dec_training_masks_data, dec_block_size=self.dec_block_size, enc_data=enc_training_data, enc_masks=enc_training_masks_data, enc_block_size=self.enc_block_size)
@@ -825,25 +794,37 @@ class RoboConstructor(nn.Module):
                 top_k:int=None,
                 top_p:float=None
                 ) -> list[int]|str:
-        max_new_tokens = self.dec_block_size if max_new_tokens == None else max_new_tokens
+        '''
+        uses the tranformer model from the RoboConstructor instance to generate an output from an input.
+            - input can be in the form of a string if input tokenizer is specified (enc_tokenizer for encoder-decoder and dec_tokenizder for decoder-only),
+            otherwise, it must be in the form of a list of tokens.
+            - if dec_tokenizer is specified, output will be a string.
+            - new tokens are generated until the dec_end_token (or dec_tokenizer.end_token) is generated, or the number of tokens generated == max_new_tokens.
+            - if input tokenizer is not specified, or input tokenizer.start_token is None, enc_start_token must be specified for an encoder-decoder model.
+            - separator_token is used to separate the input and generated tokens for a decoder-only model. If this value is not specified, there
+            will be no distinction between input tokens and generated tokens to the transformer, even if dec_tokenizer is specified.
+            - if new_line_token is not specified, output will be returned in one line, without any "\n" line separators.
+            - temperature, top_k and top_p can be specified to adjust the output.
+        '''
+        max_new_tokens = self.dec_block_size if max_new_tokens is None else max_new_tokens
         if self.cross_attention:
-            if enc_tokenizer != None:
-                if enc_start_token == None:
+            if enc_tokenizer is not None:
+                if enc_start_token is None:
                     enc_start_token = enc_tokenizer.start_token
-                if enc_end_token == None:
+                if enc_end_token is None:
                     enc_end_token = enc_tokenizer.end_token
                 if isinstance(inputs, str):
                     inputs = enc_tokenizer.encode(inputs)
-        if dec_tokenizer != None:
-            if dec_start_token == None:
+        if dec_tokenizer is not None:
+            if dec_start_token is None:
                 dec_start_token = dec_tokenizer.start_token
-            if dec_end_token == None:
+            if dec_end_token is None:
                 dec_end_token = dec_tokenizer.end_token
-            if new_line_token == None:
+            if new_line_token is None:
                 new_line_token = dec_tokenizer.new_line_token
-        if self.cross_attention == False and isinstance(inputs, str):
+        if not self.cross_attention and isinstance(inputs, str):
             inputs = dec_tokenizer.encode(inputs)
@@ -852,7 +833,7 @@ class RoboConstructor(nn.Module):
             idx = torch.tensor([[dec_start_token]], dtype=torch.long, device=self.device)
         else:
             enc_input = None
-            if separator_token != None:
+            if separator_token is not None:
                 idx = torch.tensor([[dec_start_token] + inputs + [separator_token]], dtype=torch.long, device=self.device)
             else:
                 idx = torch.tensor([[dec_start_token] + inputs], dtype=torch.long, device=self.device)
@@ -866,7 +847,7 @@ class RoboConstructor(nn.Module):
             logits = proj_output[:, -1, :]
             probabilities = F.log_softmax(logits/temperature, dim=-1)
-            if top_k == None and top_p == None:
+            if top_k is None and top_p is None:
                 idx_next = torch.max(probabilities, dim=-1).indices.unsqueeze(0)
             else:
                 idx_next = top_kp_filter(probabilities, top_k=top_k, top_p=top_p).unsqueeze(0).to(self.device)
@@ -874,10 +855,10 @@ class RoboConstructor(nn.Module):
             if idx_next[0] == dec_end_token:
                 break
-        if dec_tokenizer == None:
+        if dec_tokenizer is None:
             return idx[0].tolist()
         else:
-            if new_line_token != None:
+            if new_line_token is not None:
                 return "\n".join([dec_tokenizer.decode(list(y)) for x, y in itertools.groupby(idx[0].tolist(), lambda z: z == 0) if not x])
             else:
                 return dec_tokenizer.decode(idx[0].tolist())
@@ -885,9 +866,7 @@ class RoboConstructor(nn.Module):
 def save_component(component, save_path:str) -> None:
     '''
     saves component (such as TokenizerConstructor or RoboConstructor) as .pkl file.
     '''
     save_path = save_path + ".pkl" if save_path[-4:] != ".pkl" else save_path
     with open(save_path, "wb") as comp:
@@ -895,9 +874,7 @@ def save_component(component, save_path:str) -> None:
 def load_component(load_path:str):
     '''
     loads saved .pkl file into variable.
     '''
     load_path = load_path + ".pkl" if load_path[-4:] != ".pkl" else load_path
     with open(load_path, "rb") as comp:

{robo_lib-0.0.11.dist-info → robo_lib-1.0.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: robo_lib
-Version: 0.0.11
+Version: 1.0.1
 Summary: A package to create, configure, and train transformer models.
 Project-URL: Homepage, https://github.com/hamburgerfish/robo_pack
 Project-URL: Issues, https://github.com/hamburgerfish/robo_pack/issues
@@ -13,6 +13,7 @@ Requires-Python: >=3.8
 Requires-Dist: numpy
 Requires-Dist: tokenizers
 Requires-Dist: torch
+Requires-Dist: typing
 Description-Content-Type: text/markdown
 # robo-lib
@@ -83,10 +84,8 @@ proc.process_list(
     save_path="data/training",
     dec_data=french_train,
     dec_max_block_size=100,
-    dec_block_size_exceeded_policy="skip",
     enc_data=english_train,
-    enc_max_block_size=100,
-    enc_block_size_exceeded_policy="skip"
+    enc_max_block_size=100
 )
 # process and save validation data as data/validation*.pt
@@ -94,10 +93,8 @@ proc.process_list(
     save_path="data/validation",
     dec_data=french_val,
     dec_max_block_size=100,
-    dec_block_size_exceeded_policy="skip",
     enc_data=english_val,
-    enc_max_block_size=100,
-    enc_block_size_exceeded_policy="skip"
+    enc_max_block_size=100
 )
 ```
 - The `RoboConstructor` class is used to create and configure transformer models before trainin.
@@ -128,14 +125,8 @@ robo.train_robo(
     max_iters=20000,
     eval_interval=200,
     batch_size=128,
-    dec_training_path="data/training_decoder_data.pt",
-    dec_eval_path="data/validation_decoder_data.pt",
-    dec_training_masks_path="data/training_decoder_mask_data.pt",
-    dec_eval_masks_path="data/validation_decoder_mask_data.pt",
-    enc_training_path="data/training_encoder_data.pt",
-    enc_eval_path="data/validation_encoder_data.pt",
-    enc_training_masks_path="data/training_encoder_mask_data.pt",
-    enc_eval_masks_path="data/validation_encoder_mask_data.pt",
+    training_dir_path="data/training",
+    eval_dir_path="data/validation",
     dec_tokenizer=decoder_tok,
     save_path="models/eng_to_fr_robo.pkl"
 )
@@ -223,8 +214,8 @@ robo.train(
     max_iters=20000,
     eval_interval=200,
     batch_size=64,
-    dec_training_path="data/shakespeare_train_decoder_data.pt",
-    dec_eval_path="data/shakespeare_valid_decoder_data.pt",
+    training_dir_path="data/shakespeare_train",
+    eval_dir_path="data/shakespeare_valid",
     dec_tokenizer=tok,
     save_path="models/shakespeare_robo.pkl"
 )

robo_lib-1.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+robo_lib/__init__.py,sha256=NnzWHWwpFcSJD_XRMWKKPQFAIrRBFYiCFN0pgUGPygc,968
+robo_lib/components.py,sha256=mfvdNC77d1k1vmlNwG3ri2MbfmEn3haACAnRf56b_c4,43164
+robo_lib-1.0.1.dist-info/METADATA,sha256=4CG07VLULgAcGlfNeNXS9Pjzs7SXP5gNf95ddgGbWqc,9051
+robo_lib-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+robo_lib-1.0.1.dist-info/licenses/LICENSE,sha256=4XzkkpFqPzH0GH3zxOqRTqc7xUKSEe7dWPOuJYW95ac,1089
+robo_lib-1.0.1.dist-info/RECORD,,

robo_lib-0.0.11.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-robo_lib/__init__.py,sha256=iVOAsANj0lScVW9KKMxCULYmpp0cv4sv1k3sHjBSlE0,1012
-robo_lib/components.py,sha256=L_GUEHdKC_-Xn56ObQ9-DH8T1ywaz0M8jlWv227gZBs,42591
-robo_lib-0.0.11.dist-info/METADATA,sha256=ePF06l2FXzo0qjK8v9Vob4WnOQ61KVd0mUqd7JVG7j4,9634
-robo_lib-0.0.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-robo_lib-0.0.11.dist-info/licenses/LICENSE,sha256=4XzkkpFqPzH0GH3zxOqRTqc7xUKSEe7dWPOuJYW95ac,1089
-robo_lib-0.0.11.dist-info/RECORD,,

{robo_lib-0.0.11.dist-info → robo_lib-1.0.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{robo_lib-0.0.11.dist-info → robo_lib-1.0.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

robo-lib 0.0.11__py3-none-any.whl → 1.0.1__py3-none-any.whl

robo-lib 0.0.11py3-none-any.whl → 1.0.1py3-none-any.whl