PyPI - biolmai - Versions diffs - 0.1.7__py2.py3-none-any.whl → 0.1.9__py2.py3-none-any.whl - Mend

biolmai 0.1.7py2.py3-none-any.whl → 0.1.9py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biolmai might be problematic. Click here for more details.

Files changed (15) hide show

biolmai/__init__.py +2 -1
biolmai/api.py +121 -84
biolmai/asynch.py +16 -8
biolmai/cls.py +123 -44
biolmai/const.py +2 -1
biolmai/payloads.py +13 -2
biolmai/validate.py +100 -75
{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/METADATA +1 -1
biolmai-0.1.9.dist-info/RECORD +18 -0
biolmai-0.1.7.dist-info/RECORD +0 -18
{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/AUTHORS.rst +0 -0
{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/LICENSE +0 -0
{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/WHEEL +0 -0
{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/entry_points.txt +0 -0
{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/top_level.txt +0 -0

biolmai/__init__.py CHANGED Viewed

@@ -1,7 +1,8 @@
 """Top-level package for BioLM AI."""
 __author__ = """Nikhil Haas"""
 __email__ = "nikhil@biolm.ai"
-__version__ = '0.1.7'
+__version__ = '0.1.9'
+from biolmai.cls import *
 __all__ = []

biolmai/api.py CHANGED Viewed

@@ -15,7 +15,7 @@ import biolmai.auth
 from biolmai.asynch import async_api_call_wrapper
 from biolmai.biolmai import log
 from biolmai.const import MULTIPROCESS_THREADS
-from biolmai.payloads import INST_DAT_TXT, predict_resp_many_in_one_to_many_singles
+from biolmai.payloads import INST_DAT_TXT, PARAMS_ITEMS, predict_resp_many_in_one_to_many_singles
 @lru_cache(maxsize=64)
@@ -35,65 +35,82 @@ def text_validator(text, c):
     except Exception as e:
         return str(e)
+def combine_validation(x, y):
+    if x is None and y is None:
+        return None
+    elif isinstance(x, str) and y is None:
+        return x
+    elif x is None and isinstance(y, str):
+        return y
+    elif isinstance(x, str) and isinstance(y, str):
+        return f"{x}\n{y}"
+def validate_action(action):
+    def validate(f):
+        def wrapper(*args, **kwargs):
+            # Get class instance at runtime, so you can access not just
+            # APIEndpoints, but any *parent* classes of that,
+            # like ESMFoldSinglechain.
+            class_obj_self = args[0]
+            try:
+                is_method = inspect.getfullargspec(f)[0][0] == "self"
+            except Exception:
+                is_method = False
-def validate(f):
-    def wrapper(*args, **kwargs):
-        # Get class instance at runtime, so you can access not just
-        # APIEndpoints, but any *parent* classes of that,
-        # like ESMFoldSinglechain.
-        class_obj_self = args[0]
-        try:
-            is_method = inspect.getfullargspec(f)[0][0] == "self"
-        except Exception:
-            is_method = False
-        # Is the function we decorated a class method?
-        if is_method:
-            name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
-        else:
-            name = f"{f.__module__}.{f.__name__}"
-        if is_method:
-            # Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
-            action_method_name = name.split(".")[-1]
-            validate_endpoint_action(
-                class_obj_self.action_class_strings,
-                action_method_name,
-                class_obj_self.__class__.__name__,
-            )
-        input_data = args[1]
-        # Validate each row's text/input based on class attribute `seq_classes`
-        for c in class_obj_self.seq_classes:
-            # Validate input data against regex
-            if class_obj_self.multiprocess_threads:
-                validation = input_data.text.apply(text_validator, args=(c,))
+            # Is the function we decorated a class method?
+            if is_method:
+                name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
             else:
-                validation = input_data.text.apply(text_validator, args=(c,))
-            if "validation" not in input_data.columns:
-                input_data["validation"] = validation
-            else:
-                input_data["validation"] = input_data["validation"].str.cat(
-                    validation, sep="\n", na_rep=""
+                name = f"{f.__module__}.{f.__name__}"
+            if is_method:
+                # Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
+                action_method_name = name.split(".")[-1]
+                validate_endpoint_action(
+                    class_obj_self.action_class_strings,
+                    action_method_name,
+                    class_obj_self.__class__.__name__,
                 )
-        # Mark your batches, excluding invalid rows
-        valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
-        N = class_obj_self.batch_size  # N rows will go per API request
-        # JOIN back, which is by index
-        if valid_dat.shape[0] != input_data.shape[0]:
-            valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
-            input_data = input_data.merge(
-                valid_dat.batch, left_index=True, right_index=True, how="left"
-            )
-        else:
-            input_data["batch"] = np.arange(input_data.shape[0]) // N
-        res = f(class_obj_self, input_data, **kwargs)
-        return res
-    return wrapper
+            input_data = args[1]
+            # Validate each row's text/input based on class attribute `seq_classes`
+            if action == "predict":
+                input_classes = class_obj_self.predict_input_classes
+            elif action == "encode":
+                input_classes = class_obj_self.encode_input_classes
+            elif action == "generate":
+                input_classes = class_obj_self.generate_input_classes
+            elif action == "transform":
+                input_classes = class_obj_self.transform_input_classes
+            for c in input_classes:
+                # Validate input data against regex
+                if class_obj_self.multiprocess_threads:
+                    validation = input_data.text.apply(text_validator, args=(c,))
+                else:
+                    validation = input_data.text.apply(text_validator, args=(c,))
+                if "validation" not in input_data.columns:
+                    input_data["validation"] = validation
+                else:
+                    # masking and loc may be more performant option
+                    input_data["validation"] = input_data["validation"].combine(validation, combine_validation)
+            # Mark your batches, excluding invalid rows
+            valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
+            N = class_obj_self.batch_size  # N rows will go per API request
+            # JOIN back, which is by index
+            if valid_dat.shape[0] != input_data.shape[0]:
+                valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
+                input_data = input_data.merge(
+                    valid_dat.batch, left_index=True, right_index=True, how="left"
+                )
+            else:
+                input_data["batch"] = np.arange(input_data.shape[0]) // N
+            res = f(class_obj_self, input_data, **kwargs)
+            return res
+        return wrapper
+    return validate
 def convert_input(f):
     def wrapper(*args, **kwargs):
@@ -123,7 +140,20 @@ def convert_input(f):
 class APIEndpoint:
-    batch_size = 3  # Overwrite in parent classes as needed
+     # Overwrite in parent classes as needed
+    batch_size = 3
+    params = None
+    action_classes = ()
+    api_version = 2
+    predict_input_key = "sequence"
+    encode_input_key = "sequence"
+    generate_input_key = "context"
+    predict_input_classes = ()
+    encode_input_classes = ()
+    generate_input_classes = ()
+    transform_input_classes = ()
     def __init__(self, multiprocess_threads=None):
         # Check for instance-specific threads, otherwise read from env var
@@ -137,7 +167,7 @@ class APIEndpoint:
             [c.__name__.replace("Action", "").lower() for c in self.action_classes]
         )
-    def post_batches(self, dat, slug, action, payload_maker, resp_key):
+    def post_batches(self, dat, slug, action, payload_maker, resp_key, key="sequence", params=None):
         keep_batches = dat.loc[~dat.batch.isnull(), ["text", "batch"]]
         if keep_batches.shape[0] == 0:
             pass  # Do nothing - we made nice JSON errors to return in the DF
@@ -145,7 +175,7 @@ class APIEndpoint:
             # raise AssertionError(err)
         if keep_batches.shape[0] > 0:
             api_resps = async_api_call_wrapper(
-                keep_batches, slug, action, payload_maker, resp_key
+                keep_batches, slug, action, payload_maker, resp_key, api_version=self.api_version, key=key,  params=params,
             )
             if isinstance(api_resps, pd.DataFrame):
                 batch_res = api_resps.explode("api_resp")  # Should be lists of results
@@ -170,11 +200,11 @@ class APIEndpoint:
             dat["api_resp"] = None
         return dat
-    def unpack_local_validations(self, dat):
+    def unpack_local_validations(self, dat, response_key):
         dat.loc[dat.api_resp.isnull(), "api_resp"] = (
             dat.loc[~dat.validation.isnull(), "validation"]
             .apply(
-                predict_resp_many_in_one_to_many_singles, args=(None, None, True, None)
+                predict_resp_many_in_one_to_many_singles, args=(None, None, True, None), response_key=response_key
             )
             .explode()
         )
@@ -182,39 +212,46 @@ class APIEndpoint:
         return dat
     @convert_input
-    @validate
-    def predict(self, dat):
-        dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
-        dat = self.unpack_local_validations(dat)
+    @validate_action("predict")
+    def predict(self, dat, params=None):
+        if self.api_version == 1:
+            dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
+            dat = self.unpack_local_validations(dat, "predictions")
+        else:
+            dat = self.post_batches(dat, self.slug, "predict", PARAMS_ITEMS, "results", key=self.predict_input_key, params=params)
+            dat = self.unpack_local_validations(dat,"results")
         return dat.api_resp.replace(np.nan, None).tolist()
-    def infer(self, dat):
-        return self.predict(dat)
+    def infer(self, dat, params=None):
+        return self.predict(dat, params)
     @convert_input
-    @validate
+    @validate_action("transform")  # api v1 legacy action
     def transform(self, dat):
         dat = self.post_batches(
             dat, self.slug, "transform", INST_DAT_TXT, "predictions"
         )
-        dat = self.unpack_local_validations(dat)
+        dat = self.unpack_local_validations(dat,"predictions")
         return dat.api_resp.replace(np.nan, None).tolist()
-    # @convert_input
-    # @validate
-    # def encode(self, dat):
-    #     # NOTE: we defined this for the specific case of ESM2
-    #     # TODO: this will be need again in v2 of API contract
-    #     dat = self.post_batches(dat, self.slug, "transform",
-    #                             INST_DAT_TXT, "embeddings")
-    #     dat = self.unpack_local_validations(dat)
-    #     return dat.api_resp.replace(np.nan, None).tolist()
+    @convert_input
+    @validate_action("encode")
+    def encode(self, dat, params=None):
+        dat = self.post_batches(dat, self.slug, "encode", PARAMS_ITEMS, "results", key=self.encode_input_key, params=params)
+        dat = self.unpack_local_validations(dat, "results")
+        return dat.api_resp.replace(np.nan, None).tolist()
     @convert_input
-    @validate
-    def generate(self, dat):
-        dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
-        dat = self.unpack_local_validations(dat)
+    @validate_action("generate")
+    def generate(self, dat, params=None):
+        if self.api_version == 1:
+            dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
+            dat = self.unpack_local_validations(dat, "predictions")
+        else:
+            dat = self.post_batches(dat, self.slug, "generate", PARAMS_ITEMS, "results", key=self.generate_input_key, params=params)
+            dat = self.unpack_local_validations(dat, "results")
         return dat.api_resp.replace(np.nan, None).tolist()
@@ -290,9 +327,9 @@ class TransformAction:
         return "TransformAction"
-# class EncodeAction:
-#     def __str__(self):
-#         return "EncodeAction"
+class EncodeAction:
+     def __str__(self):
+         return "EncodeAction"
 class ExplainAction:

biolmai/asynch.py CHANGED Viewed

@@ -7,7 +7,7 @@ import aiohttp.resolver
 from aiohttp import ClientSession
 from biolmai.auth import get_user_auth_header
-from biolmai.const import BASE_API_URL, MULTIPROCESS_THREADS
+from biolmai.const import BASE_API_URL, BASE_API_URL_V1, MULTIPROCESS_THREADS
 aiohttp.resolver.DefaultResolver = aiohttp.resolver.AsyncResolver
@@ -146,11 +146,14 @@ async def async_main(urls, concurrency) -> list:
     return await get_all(urls, concurrency)
-async def async_api_calls(model_name, action, headers, payloads, response_key=None):
+async def async_api_calls(model_name, action, headers, payloads, response_key=None, api_version=2):
     """Hit an arbitrary BioLM model inference API."""
     # Normally would POST multiple sequences at once for greater efficiency,
     # but for simplicity sake will do one at at time right now
-    url = f"{BASE_API_URL}/models/{model_name}/{action}/"
+    if api_version == 1:
+        url = f"{BASE_API_URL_V1}/models/{model_name}/{action}/"
+    else:
+        url = f"{BASE_API_URL}/{model_name}/{action}/"
     if not isinstance(payloads, (list, dict)):
         err = "API request payload must be a list or dict, got {}"
@@ -180,15 +183,20 @@ async def async_api_calls(model_name, action, headers, payloads, response_key=No
     # return response
-def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key):
+def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key, api_version=2, key="sequence", params=None):
     """Wrap API calls to assist with sequence validation as a pre-cursor to
     each API call.
     """
     model_name = slug
     # payload = payload_maker(grouped_df)
-    init_ploads = grouped_df.groupby("batch").apply(
-        payload_maker, include_batch_size=True
-    )
+    if api_version == 1:
+        init_ploads = grouped_df.groupby("batch").apply(
+            payload_maker, include_batch_size=True
+        )
+    else:
+        init_ploads = grouped_df.groupby("batch").apply(
+            payload_maker, key=key, params=params, include_batch_size=True
+        )
     ploads = init_ploads.to_list()
     init_ploads = init_ploads.to_frame(name="pload")
     init_ploads["batch"] = init_ploads.index
@@ -208,7 +216,7 @@ def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key
     #     "https://python.org",
     # ]
     # concurrency = 3
-    api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key))
+    api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key, api_version))
     api_resp = [item for sublist in api_resp for item in sublist]
     api_resp = sorted(api_resp, key=lambda x: x["batch_id"])
     # print(api_resp)

biolmai/cls.py CHANGED Viewed

@@ -1,97 +1,176 @@
 """API inference classes."""
-from biolmai.api import APIEndpoint, GenerateAction, PredictAction, TransformAction
-from biolmai.validate import ExtendedAAPlusExtra, SingleOccurrenceOf, UnambiguousAA
+from biolmai.api import APIEndpoint, GenerateAction, PredictAction, TransformAction, EncodeAction
+from biolmai.validate import (AAExtended,
+                              AAExtendedPlusExtra,
+                              AAUnambiguous,
+                              AAUnambiguousPlusExtra,
+                              DNAUnambiguous,
+                              SingleOrMoreOccurrencesOf,
+                              SingleOccurrenceOf,
+                              PDB,
+                              AAUnambiguousEmpty
+                              )
 class ESMFoldSingleChain(APIEndpoint):
     slug = "esmfold-singlechain"
     action_classes = (PredictAction,)
-    seq_classes = (UnambiguousAA(),)
+    predict_input_classes = (AAUnambiguous(),)
     batch_size = 2
 class ESMFoldMultiChain(APIEndpoint):
     slug = "esmfold-multichain"
     action_classes = (PredictAction,)
-    seq_classes = (ExtendedAAPlusExtra(extra=[":"]),)
+    predict_input_classes = (AAExtendedPlusExtra(extra=[":"]),)
     batch_size = 2
-class ESM2Embeddings(APIEndpoint):
+class ESM2(APIEndpoint):
     """Example.
     .. highlight:: python
     .. code-block:: python
        {
-         "instances": [{
-           "data": {"text": "MSILVTRPSPAGEELVSRLRTLGQVAWHFPLIEFSPGQQLPQ"}
+         "items": [{
+           "sequence": "MSILVTRPSPAGEELVSRLRTLGQVAWHFPLIEFSPGQQLPQ"
          }]
        }
     """
-    slug = "esm2_t33_650M_UR50D"
-    action_classes = (TransformAction,)
-    seq_classes = (UnambiguousAA(),)
+    action_classes = (EncodeAction, PredictAction, )
+    encode_input_classes = (AAUnambiguous(),)
+    predict_input_classes = (SingleOrMoreOccurrencesOf(token="<mask>"), AAExtendedPlusExtra(extra=["<mask>"]))
     batch_size = 1
+class ESM2_8M(ESM2):
+    slug = "esm2-8m"
+class ESM2_35M(ESM2):
+    slug = "esm2-35m"
+class ESM2_150M(ESM2):
+    slug = "esm2-150m"
+class ESM2_650M(ESM2):
+    slug = "esm2-650m"
+class ESM2_3B(ESM2):
+    slug = "esm2-3b"
-class ESM1v1(APIEndpoint):
+class ESM1v(APIEndpoint):
     """Example.
     .. highlight:: python
     .. code-block:: python
        {
-          "instances": [{
-            "data": {"text": "QERLEUTGR<mask>SLGYNIVAT"}
+          "items": [{
+            "sequence": "QERLEUTGR<mask>SLGYNIVAT"
           }]
        }
     """
-    slug = "esm1v_t33_650M_UR90S_1"
     action_classes = (PredictAction,)
-    seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
+    predict_input_classes = (SingleOccurrenceOf("<mask>"), AAExtendedPlusExtra(extra=["<mask>"]))
     batch_size = 5
+class ESM1v1(ESM1v):
+    slug = "esm1v-n1"
-class ESM1v2(APIEndpoint):
-    slug = "esm1v_t33_650M_UR90S_2"
-    action_classes = (PredictAction,)
-    seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
-    batch_size = 5
+class ESM1v2(ESM1v):
+    slug = "esm1v-n2"
+class ESM1v3(ESM1v):
+    slug = "esm1v-n3"
-class ESM1v3(APIEndpoint):
-    slug = "esm1v_t33_650M_UR90S_3"
-    action_classes = (PredictAction,)
-    seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
-    batch_size = 5
+class ESM1v4(ESM1v):
+    slug = "esm1v-n4"
+class ESM1v5(ESM1v):
+    slug = "esm1v-n5"
-class ESM1v4(APIEndpoint):
-    slug = "esm1v_t33_650M_UR90S_4"
-    action_classes = (PredictAction,)
-    seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
-    batch_size = 5
-class ESM1v5(APIEndpoint):
-    slug = "esm1v_t33_650M_UR90S_5"
-    action_classes = (PredictAction,)
-    seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
-    batch_size = 5
+class ESM1vAll(ESM1v):
+    slug = "esm1v-all"
 class ESMIF1(APIEndpoint):
-    slug = "esmif1"
+    slug = "esm-if1"
     action_classes = (GenerateAction,)
-    seq_classes = ()
+    generate_input_classes = PDB
     batch_size = 2
+    generate_input_key = "pdb"
-class Progen2(APIEndpoint):
-    slug = "progen2"
+class ProGen2(APIEndpoint):
     action_classes = (GenerateAction,)
-    seq_classes = ()
+    generate_input_classes = (AAUnambiguousEmpty(),)
     batch_size = 1
+class ProGen2Oas(ProGen2):
+    slug = "progen2-oas"
+class ProGen2Medium(ProGen2):
+    slug = "progen2-medium"
+class ProGen2Large(ProGen2):
+    slug = "progen2-large"
+class ProGen2BFD90(ProGen2):
+    slug = "progen2-bfd90"
+class AbLang(APIEndpoint):
+    action_classes = (PredictAction, EncodeAction, GenerateAction,)
+    predict_input_classes = (AAUnambiguous(),)
+    encode_input_classes = (AAUnambiguous(),)
+    generate_input_classes = (SingleOrMoreOccurrencesOf(token="*"), AAUnambiguousPlusExtra(extra=["*"]))
+    batch_size = 32
+    generate_input_key = "sequence"
+class AbLangHeavy(AbLang):
+    slug = "ablang-heavy"
+class AbLangLight(AbLang):
+    slug = "ablang-light"
+class DNABERT(APIEndpoint):
+    slug = "dnabert"
+    action_classes = (EncodeAction,)
+    encode_input_classes = (DNAUnambiguous(),)
+    batch_size = 10
+class DNABERT2(APIEndpoint):
+    slug = "dnabert2"
+    action_classes = (EncodeAction,)
+    encode_input_classes = (DNAUnambiguous(),)
+    batch_size = 10
+class BioLMToxV1(APIEndpoint):
+    """Example.
+    .. highlight:: python
+    .. code-block:: python
+       {
+         "instances": [{
+           "data": {"text": "MSILVTRPSPAGEELVSRLRTLGQVAWHFPLIEFSPGQQLPQ"}
+         }]
+       }
+    """
+    slug = "biolmtox_v1"
+    action_classes = (TransformAction, PredictAction,)
+    predict_input_classes = (AAUnambiguous(),)
+    transform_input_classes = (AAUnambiguous(),)
+    batch_size = 1
+    api_version = 1
+class ProteInfer(APIEndpoint):
+    action_classes = (PredictAction,)
+    predict_input_classes = (AAExtended(),)
+    batch_size = 64
+class ProteInferEC(ProteInfer):
+    slug = "proteinfer-ec"
+class ProteInferGO(ProteInfer):
+    slug = "proteinfer-go"

biolmai/const.py CHANGED Viewed

@@ -26,4 +26,5 @@ if int(MULTIPROCESS_THREADS) > max_threads or int(MULTIPROCESS_THREADS) > 128:
 elif int(MULTIPROCESS_THREADS) <= 0:
     err = "Environment variable BIOLMAI_THREADS must be a positive integer."
     raise ValueError(err)
-BASE_API_URL = f"{BASE_DOMAIN}/api/v1"
+BASE_API_URL_V1 = f"{BASE_DOMAIN}/api/v1"
+BASE_API_URL = f"{BASE_DOMAIN}/api/v2"

biolmai/payloads.py CHANGED Viewed

@@ -7,11 +7,22 @@ def INST_DAT_TXT(batch, include_batch_size=False):
         d["batch_size"] = len(d["instances"])
     return d
+def PARAMS_ITEMS(batch, key="sequence", params=None, include_batch_size=False):
+    d = {"items": []}
+    for _, row in batch.iterrows():
+        inst = {key: row.text}
+        d["items"].append(inst)
+    if include_batch_size is True:
+        d["batch_size"] = len(d["items"])
+    if isinstance(params, dict):
+        d["params"] = params
+    return d
 def predict_resp_many_in_one_to_many_singles(
-    resp_json, status_code, batch_id, local_err, batch_size
+    resp_json, status_code, batch_id, local_err, batch_size, response_key = "results"
 ):
-    expected_root_key = "predictions"
+    expected_root_key = response_key
     to_ret = []
     if not local_err and status_code and status_code == 200:
         list_of_individual_seq_results = resp_json[expected_root_key]

biolmai/validate.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import re
+from typing import List
 UNAMBIGUOUS_AA = (
     "A",
@@ -22,113 +23,137 @@ UNAMBIGUOUS_AA = (
     "W",
     "Y",
 )
-AAs = "".join(UNAMBIGUOUS_AA)
-# Let's use extended list for ESM-1v
-AAs_EXTENDED = "ACDEFGHIKLMNPQRSTVWYBXZJUO"
+aa_unambiguous = "ACDEFGHIKLMNPQRSTVWY"
+aa_extended = aa_unambiguous + "BXZUO"
-UNAMBIGUOUS_DNA = ("A", "C", "T", "G")
-AMBIGUOUS_DNA = ("A", "C", "T", "G", "X", "N", "U")
+dna_unambiguous = "ACTG"
+dna_ambiguous = dna_unambiguous + "XNU"
 regexes = {
-    "empty_or_unambiguous_aa_validator": re.compile(f"^[{AAs}]*$"),
-    "empty_or_unambiguous_dna_validator": re.compile(r"^[ACGT]*$"),
-    "extended_aa_validator": re.compile(f"^[{AAs_EXTENDED}]+$"),
-    "unambiguous_aa_validator": re.compile(f"^[{AAs}]+$"),
-    "unambiguous_dna_validator": re.compile(r"^[ACGT]+$"),
+    "empty_or_aa_unambiguous_validator": re.compile(f"^[{aa_unambiguous}]*$"),
+    "aa_extended_validator": re.compile(f"^[{aa_extended}]+$"),
+    "aa_unambiguous_validator": re.compile(f"^[{aa_unambiguous}]+$"),
+    "empty_or_dna_unambiguous_validator": re.compile(f"^[{dna_unambiguous}]*$"),
+    "dna_unambiguous_validator": re.compile(f"^[{dna_unambiguous}]+$"),
 }
-def empty_or_unambiguous_aa_validator(txt):
-    r = regexes["empty_or_unambiguous_aa_validator"]
-    if not bool(r.match(txt)):
-        err = f"Residues can only be represented with '{AAs}' characters"
-        raise AssertionError(err)
-    return txt
+def empty_or_aa_unambiguous_validator(text: str) -> str:
+    if not regexes["empty_or_aa_unambiguous_validator"].match(text):
+        raise ValueError(
+            f"Residues can only be represented with '{aa_unambiguous}' characters"
+        )
+    return text
-def empty_or_unambiguous_dna_validator(txt):
-    r = regexes["empty_or_unambiguous_dna_validator"]
-    if not bool(r.match(txt)):
-        err = "Nucleotides can only be represented with 'ACTG' characters"
-        raise AssertionError(err)
-    return txt
+def empty_or_dna_unambiguous_validator(text: str) -> str:
+    if not regexes["empty_or_dna_unambiguous_validator"].match(text):
+        raise ValueError(
+            f"Nucleotides can only be represented with '{dna_unambiguous}' characters"
+        )
+    return text
-def extended_aa_validator(txt):
-    r = regexes["extended_aa_validator"]
-    if not bool(r.match(txt)):
-        err = (
-            f"Extended residues can only be represented with "
-            f"'{AAs_EXTENDED}' characters"
+def aa_extended_validator(text: str) -> str:
+    if not regexes["aa_extended_validator"].match(text):
+        raise ValueError(
+            f"Residues can only be represented with '{aa_extended}' characters"
         )
-        raise AssertionError(err)
-    return txt
+    return text
-def unambiguous_aa_validator(txt):
-    r = regexes["unambiguous_aa_validator"]
-    if not bool(r.match(txt)):
-        err = (
-            f"Unambiguous residues can only be represented with '{AAs}' " f"characters"
+def aa_unambiguous_validator(text: str) -> str:
+    if not regexes["aa_unambiguous_validator"].match(text):
+        raise ValueError(
+            f"Residues can only be represented with '{aa_unambiguous}' characters"
         )
-        raise AssertionError(err)
-    return txt
+    return text
-def unambiguous_dna_validator(txt):
-    r = regexes["unambiguous_dna_validator"]
-    if not bool(r.match(txt)):
-        err = (
-            "Unambiguous nucleotides can only be represented with 'ACTG' " "characters"
+def dna_unambiguous_validator(text: str) -> str:
+    if not regexes["dna_unambiguous_validator"].match(text):
+        raise ValueError(
+            f"Nucleotides can only be represented with '{dna_unambiguous}' characters"
         )
-        raise AssertionError(err)
-    return txt
+    return text
+def pdb_validator(text: str) -> str:
+    if "ATOM" not in text:
+        raise ValueError("PDB string does not appear to be a valid PDB")
+    return text
-class UnambiguousAA:
+class PDB:
+    def __call__(self, value):
+        _ = pdb_validator(value)
+class AAUnambiguous:
     def __call__(self, value):
-        _ = unambiguous_aa_validator(value)
+        _ = aa_unambiguous_validator(value)
+class AAExtended:
+    def __call__(self, value):
+        _ = aa_extended_validator(value)
-class UnambiguousAAPlusExtra:
-    def __init__(self, extra=None):
-        if extra is None:
-            extra = []
-        self.extra = extra
-        assert len(extra) > 0
-        assert isinstance(extra, list)
+class DNAUnambiguous:
+    def __call__(self, value):
+        _ = dna_unambiguous_validator(value)
+class AAUnambiguousEmpty:
     def __call__(self, value):
-        txt_clean = value
+        _ = empty_or_aa_unambiguous_validator(value)
+class AAUnambiguousPlusExtra:
+    def __init__(self, extra: List[str]):
+        if not extra:
+            raise ValueError("Extra cannot be empty")
+        self.extra = extra
+    def __call__(self, value: str) -> str:
+        text_clean = value
         for ex in self.extra:
-            txt_clean = value.replace(ex, "")
-        _ = unambiguous_aa_validator(txt_clean)
+            text_clean = text_clean.replace(ex, "")
+        aa_unambiguous_validator(text_clean)
+        return value
-class ExtendedAAPlusExtra:
-    def __init__(self, extra=None):
-        if extra is None:
-            extra = []
+class AAExtendedPlusExtra:
+    def __init__(self, extra: List[str]):
+        if not extra:
+            raise ValueError("Extra cannot be empty")
         self.extra = extra
-        assert len(extra) > 0
-        assert isinstance(extra, list)
-    def __call__(self, value):
-        txt_clean = value
+    def __call__(self, value: str) -> str:
+        text_clean = value
         for ex in self.extra:
-            txt_clean = value.replace(ex, "")
-        _ = extended_aa_validator(txt_clean)
+            text_clean = text_clean.replace(ex, "")
+        aa_extended_validator(text_clean)
+        return value
 class SingleOccurrenceOf:
-    def __init__(self, single_char):
-        self.single_char = single_char
+    def __init__(self, single_token: str):
+        self.single_token = single_token
+    def __call__(self, value: str) -> str:
+        count = value.count(self.single_token)
+        if count != 1:
+            raise ValueError(
+                f"Expected a single occurrence of '{self.single_token}', got {count}"
+            )
+        return value
+class SingleOrMoreOccurrencesOf:
+    def __init__(self, token: str):
+        self.token = token
+    def __call__(self, value: str) -> str:
+        count = value.count(self.token)
+        if count < 1:
+            raise ValueError(
+                f"Expected at least one occurrence of '{self.token}', got none"
+            )
+        return value
-    def __call__(self, value):
-        s = self.single_char
-        cc = value.count(s)
-        if cc != 1:
-            err = "Expected a single occurrence of '{}', got {}"
-            raise AssertionError(err.format(s, cc))

{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: biolmai
-Version: 0.1.7
+Version: 0.1.9
 Summary: Python client and SDK for https://biolm.ai
 Home-page: https://github.com/BioLM/py-biolm
 Author: Nikhil Haas

biolmai-0.1.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+biolmai/__init__.py,sha256=05laq4xekEMZnrxknETvEsq9nY4Xa-CcZZs3ekK2aoA,162
+biolmai/api.py,sha256=1T38KUoOiPl8IjXfxsypIKGraLNcjtlDbtkrvohEZJU,12959
+biolmai/asynch.py,sha256=BVypJhhEEK2Bek2AhqNGn7FIRJehAbJflUdeeslbXFE,9073
+biolmai/auth.py,sha256=flI9KAD90qdXyLDnpJTrc9voKsiK0uWtD2ehsPBn8r4,6329
+biolmai/biolmai.py,sha256=xwjAvuw6AtmQdkRf_usSGUZ-k2oU-fjl82_WAgfSvVE,74
+biolmai/cli.py,sha256=bdb4q8QlN73A6Ttz0e-dBIwoct7PYqy5WSc52jCMIyU,1967
+biolmai/cls.py,sha256=Hiy_Qoj2Eb43oltnEUdJfMPCsOeFKZ-GUNljF-yShug,4287
+biolmai/const.py,sha256=vCSj-itsusZWoLR27DYQSpuq024GQz3-uKJuDUoPF0Y,1153
+biolmai/ltc.py,sha256=al7HZc5tLyUR5fmpIb95hOz5ctudVsc0xzjd_c2Ew3M,49
+biolmai/payloads.py,sha256=BOhEKl9kWkKMXy1YiNw2_eC6MJ4Dn6vKNvkhEBsM7Lw,1735
+biolmai/validate.py,sha256=58XMWrdWoDRmfiNAayWqrYaH3_bjRmEpG_yx6XSjTrM,4168
+biolmai-0.1.9.dist-info/AUTHORS.rst,sha256=TB_ACuFPgVmxn1NspYwksTdT6jdZeShcxfafmi-XWKQ,158
+biolmai-0.1.9.dist-info/LICENSE,sha256=8yt0SdP38I7a3g0zWqZjNe0VSDQhJA4bWLQSqqKtAVg,583
+biolmai-0.1.9.dist-info/METADATA,sha256=mEmPMicZdXQVKMsayll4CCaVdi2hWAxkpqL9ZbYqKKc,1929
+biolmai-0.1.9.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
+biolmai-0.1.9.dist-info/entry_points.txt,sha256=ylQnDpCYrxF1F9z_T7NRQcYMWYF5ia_KsTUuboxjEAM,44
+biolmai-0.1.9.dist-info/top_level.txt,sha256=jyQO45JN3g_jbdI8WqMnb0aEIzf4h1MrmPAZkKgfnwY,8
+biolmai-0.1.9.dist-info/RECORD,,

biolmai-0.1.7.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-biolmai/__init__.py,sha256=lJ7PiA_IyjKhz3dI8nrnqy8S_wqAHtEM3iN3v3eArr0,136
-biolmai/api.py,sha256=3DcXeTFwXdn2KpHrGPxFGN6bvzdFjK6_4KUZuaRe64w,10974
-biolmai/asynch.py,sha256=ZLCiNdGDR2XvijM6jFB2IFl3bG7ROp4PxKbo1rI5s7A,8698
-biolmai/auth.py,sha256=flI9KAD90qdXyLDnpJTrc9voKsiK0uWtD2ehsPBn8r4,6329
-biolmai/biolmai.py,sha256=xwjAvuw6AtmQdkRf_usSGUZ-k2oU-fjl82_WAgfSvVE,74
-biolmai/cli.py,sha256=bdb4q8QlN73A6Ttz0e-dBIwoct7PYqy5WSc52jCMIyU,1967
-biolmai/cls.py,sha256=yacZIwDyDq3sgU3FSc-l8uld83lkwSTh4wiS-vGNT4I,2425
-biolmai/const.py,sha256=kbpmBEm-bw7lhGIJcMFeq1pfsIYeRk01_JwBufjupXc,1111
-biolmai/ltc.py,sha256=al7HZc5tLyUR5fmpIb95hOz5ctudVsc0xzjd_c2Ew3M,49
-biolmai/payloads.py,sha256=WmFN9JUojbrdvd_By8WWURS6Gm5Bh1fPYK0UjLDCbzU,1356
-biolmai/validate.py,sha256=QdPDuZodHn85p1Y7KGkxCDMuRcXBOzAB9lkNZpigw9g,3311
-biolmai-0.1.7.dist-info/AUTHORS.rst,sha256=TB_ACuFPgVmxn1NspYwksTdT6jdZeShcxfafmi-XWKQ,158
-biolmai-0.1.7.dist-info/LICENSE,sha256=8yt0SdP38I7a3g0zWqZjNe0VSDQhJA4bWLQSqqKtAVg,583
-biolmai-0.1.7.dist-info/METADATA,sha256=S2JBm8gzzRm_Xsb0aY3LozcW9TSocbqFLZd8BsA7gQw,1929
-biolmai-0.1.7.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
-biolmai-0.1.7.dist-info/entry_points.txt,sha256=ylQnDpCYrxF1F9z_T7NRQcYMWYF5ia_KsTUuboxjEAM,44
-biolmai-0.1.7.dist-info/top_level.txt,sha256=jyQO45JN3g_jbdI8WqMnb0aEIzf4h1MrmPAZkKgfnwY,8
-biolmai-0.1.7.dist-info/RECORD,,

{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/AUTHORS.rst RENAMED Viewed

File without changes

{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/LICENSE RENAMED Viewed

File without changes

{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{biolmai-0.1.7.dist-info → biolmai-0.1.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

biolmai 0.1.7__py2.py3-none-any.whl → 0.1.9__py2.py3-none-any.whl

Potentially problematic release.

biolmai 0.1.7py2.py3-none-any.whl → 0.1.9py2.py3-none-any.whl