PyPI - biolmai - Versions diffs - 0.1.4__tar.gz → 0.1.7__tar.gz - Mend

biolmai 0.1.4tar.gz → 0.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biolmai might be problematic. Click here for more details.

Files changed (85) hide show

{biolmai-0.1.4 → biolmai-0.1.7}/PKG-INFO +1 -1
biolmai-0.1.7/biolmai/__init__.py +7 -0
biolmai-0.1.7/biolmai/api.py +310 -0
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai/asynch.py +90 -53
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai/auth.py +75 -29
biolmai-0.1.7/biolmai/biolmai.py +5 -0
biolmai-0.1.7/biolmai/cli.py +75 -0
biolmai-0.1.7/biolmai/cls.py +97 -0
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai/const.py +13 -11
biolmai-0.1.7/biolmai/payloads.py +33 -0
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai/validate.py +55 -28
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai.egg-info/PKG-INFO +1 -1
biolmai-0.1.7/biolmai.egg-info/SOURCES.txt +64 -0
biolmai-0.1.7/docs/_static/api_reference_icon.png +0 -0
biolmai-0.1.7/docs/_static/chat_agents_icon.png +0 -0
biolmai-0.1.7/docs/_static/jupyter_notebooks_icon.png +0 -0
biolmai-0.1.7/docs/_static/model_docs_icon.png +0 -0
biolmai-0.1.7/docs/_static/python_sdk_icon.png +0 -0
biolmai-0.1.7/docs/_static/tutorials_icon.png +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/docs/conf.py +32 -44
biolmai-0.1.7/docs/index.rst +107 -0
biolmai-0.1.7/docs/model-docs/DNABERT.rst +640 -0
biolmai-0.1.7/docs/model-docs/ESM-1v.rst +362 -0
biolmai-0.1.7/docs/model-docs/ESM2_Embeddings.rst +242 -0
biolmai-0.1.4/docs/model-docs/esm2_fold.rst → biolmai-0.1.7/docs/model-docs/ESMFold.rst +62 -63
biolmai-0.1.7/docs/model-docs/ESM_InverseFold.rst +278 -0
biolmai-0.1.7/docs/model-docs/ProtGPT2.rst +609 -0
biolmai-0.1.7/docs/model-docs/ProteInfer_EC.rst +249 -0
biolmai-0.1.7/docs/model-docs/ProteInfer_GO.rst +329 -0
biolmai-0.1.7/docs/model-docs/index.rst +13 -0
biolmai-0.1.7/docs/model-docs/progen2/ProGen2_BFD90.rst +251 -0
biolmai-0.1.7/docs/model-docs/progen2/ProGen2_Medium.rst +248 -0
biolmai-0.1.7/docs/model-docs/progen2/ProGen2_OAS.rst +246 -0
biolmai-0.1.7/docs/model-docs/progen2/index.rst +10 -0
biolmai-0.1.7/docs/python-client/get_started/authorization.rst +9 -0
{biolmai-0.1.4/docs/python-client → biolmai-0.1.7/docs/python-client/get_started}/quickstart.rst +7 -0
biolmai-0.1.7/docs/python-client/index.rst +18 -0
biolmai-0.1.7/docs/python-client/usage.rst +7 -0
biolmai-0.1.7/docs/tutorials_use_cases/notebooks.rst +9 -0
biolmai-0.1.7/pyproject.toml +44 -0
{biolmai-0.1.4 → biolmai-0.1.7}/setup.cfg +7 -2
biolmai-0.1.7/setup.py +53 -0
biolmai-0.1.7/tests/test_biolmai.py +263 -0
biolmai-0.1.4/biolmai/__init__.py +0 -15
biolmai-0.1.4/biolmai/api.py +0 -394
biolmai-0.1.4/biolmai/biolmai.py +0 -153
biolmai-0.1.4/biolmai/cli.py +0 -67
biolmai-0.1.4/biolmai/cls.py +0 -1
biolmai-0.1.4/biolmai/payloads.py +0 -8
biolmai-0.1.4/biolmai.egg-info/SOURCES.txt +0 -51
biolmai-0.1.4/docs/index.rst +0 -74
biolmai-0.1.4/docs/model-docs/admonitions.rst +0 -39
biolmai-0.1.4/docs/model-docs/esm2_embeddings.rst +0 -10
biolmai-0.1.4/docs/python-client/authors.rst +0 -1
biolmai-0.1.4/docs/python-client/contributing.rst +0 -1
biolmai-0.1.4/docs/python-client/history.rst +0 -1
biolmai-0.1.4/docs/python-client/readme.rst +0 -1
biolmai-0.1.4/docs/python-client/usage.rst +0 -8
biolmai-0.1.4/docs/tutorials_use_cases/bulk_protein_folding.rst +0 -3
biolmai-0.1.4/docs/tutorials_use_cases/dna_tutorials.rst +0 -8
biolmai-0.1.4/docs/tutorials_use_cases/protein_tutorials.rst +0 -15
biolmai-0.1.4/setup.py +0 -51
biolmai-0.1.4/tests/test_biolmai.py +0 -226
{biolmai-0.1.4 → biolmai-0.1.7}/AUTHORS.rst +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/CONTRIBUTING.rst +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/HISTORY.rst +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/LICENSE +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/MANIFEST.in +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/README.rst +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai/ltc.py +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai.egg-info/dependency_links.txt +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai.egg-info/entry_points.txt +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai.egg-info/not-zip-safe +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai.egg-info/requires.txt +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/biolmai.egg-info/top_level.txt +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/docs/Makefile +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/docs/_static/biolm_docs_logo_dark.png +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/docs/_static/biolm_docs_logo_light.png +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/docs/biolmai.rst +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/docs/make.bat +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/docs/model-docs/img/book_icon.png +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/docs/model-docs/img/esmfold_perf.png +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/docs/modules.rst +0 -0
{biolmai-0.1.4/docs/python-client → biolmai-0.1.7/docs/python-client/get_started}/installation.rst +0 -0
{biolmai-0.1.4 → biolmai-0.1.7}/tests/__init__.py +0 -0

{biolmai-0.1.4 → biolmai-0.1.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: biolmai
-Version: 0.1.4
+Version: 0.1.7
 Summary: Python client and SDK for https://biolm.ai
 Home-page: https://github.com/BioLM/py-biolm
 Author: Nikhil Haas

biolmai-0.1.7/biolmai/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Top-level package for BioLM AI."""
+__author__ = """Nikhil Haas"""
+__email__ = "nikhil@biolm.ai"
+__version__ = '0.1.7'
+__all__ = []

biolmai-0.1.7/biolmai/api.py ADDED Viewed

@@ -0,0 +1,310 @@
+"""References to API endpoints."""
+import datetime
+import inspect
+import time
+from functools import lru_cache
+import numpy as np
+import pandas as pd
+import requests
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
+import biolmai
+import biolmai.auth
+from biolmai.asynch import async_api_call_wrapper
+from biolmai.biolmai import log
+from biolmai.const import MULTIPROCESS_THREADS
+from biolmai.payloads import INST_DAT_TXT, predict_resp_many_in_one_to_many_singles
+@lru_cache(maxsize=64)
+def validate_endpoint_action(allowed_classes, method_name, api_class_name):
+    action_method_name = method_name.split(".")[-1]
+    if action_method_name not in allowed_classes:
+        err = "Only {} supported on {}"
+        err = err.format(list(allowed_classes), api_class_name)
+        raise AssertionError(err)
+def text_validator(text, c):
+    """Validate some text against a class-based validator, returning a string
+    if invalid, or None otherwise."""
+    try:
+        c(text)
+    except Exception as e:
+        return str(e)
+def validate(f):
+    def wrapper(*args, **kwargs):
+        # Get class instance at runtime, so you can access not just
+        # APIEndpoints, but any *parent* classes of that,
+        # like ESMFoldSinglechain.
+        class_obj_self = args[0]
+        try:
+            is_method = inspect.getfullargspec(f)[0][0] == "self"
+        except Exception:
+            is_method = False
+        # Is the function we decorated a class method?
+        if is_method:
+            name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
+        else:
+            name = f"{f.__module__}.{f.__name__}"
+        if is_method:
+            # Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
+            action_method_name = name.split(".")[-1]
+            validate_endpoint_action(
+                class_obj_self.action_class_strings,
+                action_method_name,
+                class_obj_self.__class__.__name__,
+            )
+        input_data = args[1]
+        # Validate each row's text/input based on class attribute `seq_classes`
+        for c in class_obj_self.seq_classes:
+            # Validate input data against regex
+            if class_obj_self.multiprocess_threads:
+                validation = input_data.text.apply(text_validator, args=(c,))
+            else:
+                validation = input_data.text.apply(text_validator, args=(c,))
+            if "validation" not in input_data.columns:
+                input_data["validation"] = validation
+            else:
+                input_data["validation"] = input_data["validation"].str.cat(
+                    validation, sep="\n", na_rep=""
+                )
+        # Mark your batches, excluding invalid rows
+        valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
+        N = class_obj_self.batch_size  # N rows will go per API request
+        # JOIN back, which is by index
+        if valid_dat.shape[0] != input_data.shape[0]:
+            valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
+            input_data = input_data.merge(
+                valid_dat.batch, left_index=True, right_index=True, how="left"
+            )
+        else:
+            input_data["batch"] = np.arange(input_data.shape[0]) // N
+        res = f(class_obj_self, input_data, **kwargs)
+        return res
+    return wrapper
+def convert_input(f):
+    def wrapper(*args, **kwargs):
+        # Get the user-input data argument to the decorated function
+        # class_obj_self = args[0]
+        input_data = args[1]
+        # Make sure we have expected input types
+        acceptable_inputs = (str, list, tuple, np.ndarray, pd.DataFrame)
+        if not isinstance(input_data, acceptable_inputs):
+            err = "Input must be one or many DNA or protein strings"
+            raise ValueError(err)
+        # Convert single-sequence input to list
+        if isinstance(input_data, str):
+            input_data = [input_data]
+        # Make sure we don't have a matrix
+        if isinstance(input_data, np.ndarray) and len(input_data.shape) > 1:
+            err = "Detected Numpy matrix - input a single vector or array"
+            raise AssertionError(err)
+        # Make sure we don't have a >=2D DF
+        if isinstance(input_data, pd.DataFrame) and len(input_data.shape) > 1:
+            err = "Detected Pandas DataFrame - input a single vector or Series"
+            raise AssertionError(err)
+        input_data = pd.DataFrame(input_data, columns=["text"])
+        return f(args[0], input_data, **kwargs)
+    return wrapper
+class APIEndpoint:
+    batch_size = 3  # Overwrite in parent classes as needed
+    def __init__(self, multiprocess_threads=None):
+        # Check for instance-specific threads, otherwise read from env var
+        if multiprocess_threads is not None:
+            self.multiprocess_threads = multiprocess_threads
+        else:
+            self.multiprocess_threads = MULTIPROCESS_THREADS  # Could be False
+        # Get correct auth-like headers
+        self.auth_headers = biolmai.auth.get_user_auth_header()
+        self.action_class_strings = tuple(
+            [c.__name__.replace("Action", "").lower() for c in self.action_classes]
+        )
+    def post_batches(self, dat, slug, action, payload_maker, resp_key):
+        keep_batches = dat.loc[~dat.batch.isnull(), ["text", "batch"]]
+        if keep_batches.shape[0] == 0:
+            pass  # Do nothing - we made nice JSON errors to return in the DF
+            # err = "No inputs found following local validation"
+            # raise AssertionError(err)
+        if keep_batches.shape[0] > 0:
+            api_resps = async_api_call_wrapper(
+                keep_batches, slug, action, payload_maker, resp_key
+            )
+            if isinstance(api_resps, pd.DataFrame):
+                batch_res = api_resps.explode("api_resp")  # Should be lists of results
+                len_res = batch_res.shape[0]
+            else:
+                batch_res = pd.DataFrame({"api_resp": api_resps})
+                len_res = batch_res.shape[0]
+            orig_request_rows = keep_batches.shape[0]
+            if len_res != orig_request_rows:
+                err = "Response rows ({}) mismatch with input rows ({})"
+                err = err.format(len_res, orig_request_rows)
+                raise AssertionError(err)
+            # Stack the results horizontally w/ original rows of batches
+            keep_batches["prev_idx"] = keep_batches.index
+            keep_batches.reset_index(drop=False, inplace=True)
+            batch_res.reset_index(drop=True, inplace=True)
+            keep_batches["api_resp"] = batch_res
+            keep_batches.set_index("prev_idx", inplace=True)
+            dat = dat.join(keep_batches.reindex(["api_resp"], axis=1))
+        else:
+            dat["api_resp"] = None
+        return dat
+    def unpack_local_validations(self, dat):
+        dat.loc[dat.api_resp.isnull(), "api_resp"] = (
+            dat.loc[~dat.validation.isnull(), "validation"]
+            .apply(
+                predict_resp_many_in_one_to_many_singles, args=(None, None, True, None)
+            )
+            .explode()
+        )
+        return dat
+    @convert_input
+    @validate
+    def predict(self, dat):
+        dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
+        dat = self.unpack_local_validations(dat)
+        return dat.api_resp.replace(np.nan, None).tolist()
+    def infer(self, dat):
+        return self.predict(dat)
+    @convert_input
+    @validate
+    def transform(self, dat):
+        dat = self.post_batches(
+            dat, self.slug, "transform", INST_DAT_TXT, "predictions"
+        )
+        dat = self.unpack_local_validations(dat)
+        return dat.api_resp.replace(np.nan, None).tolist()
+    # @convert_input
+    # @validate
+    # def encode(self, dat):
+    #     # NOTE: we defined this for the specific case of ESM2
+    #     # TODO: this will be need again in v2 of API contract
+    #     dat = self.post_batches(dat, self.slug, "transform",
+    #                             INST_DAT_TXT, "embeddings")
+    #     dat = self.unpack_local_validations(dat)
+    #     return dat.api_resp.replace(np.nan, None).tolist()
+    @convert_input
+    @validate
+    def generate(self, dat):
+        dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
+        dat = self.unpack_local_validations(dat)
+        return dat.api_resp.replace(np.nan, None).tolist()
+def retry_minutes(sess, URL, HEADERS, dat, timeout, mins):
+    """Retry for N minutes."""
+    HEADERS.update({"Content-Type": "application/json"})
+    attempts, max_attempts = 0, 5
+    try:
+        now = datetime.datetime.now()
+        try_until = now + datetime.timedelta(minutes=mins)
+        while datetime.datetime.now() < try_until and attempts < max_attempts:
+            response = None
+            try:
+                log.info(f"Trying {datetime.datetime.now()}")
+                response = sess.post(URL, headers=HEADERS, data=dat, timeout=timeout)
+                if response.status_code not in (400, 404):
+                    response.raise_for_status()
+                if "error" in response.json():
+                    raise ValueError(response.json().dumps())
+                else:
+                    break
+            except Exception as e:
+                log.warning(e)
+                if response:
+                    log.warning(response.text)
+                time.sleep(5)  # Wait 5 seconds between tries
+            attempts += 1
+        if response is None:
+            err = "Got Nonetype response"
+            raise ValueError(err)
+        elif "Server Error" in response.text:
+            err = "Got Server Error"
+            raise ValueError(err)
+    except Exception:
+        return response
+    return response
+def requests_retry_session(
+    retries=3,
+    backoff_factor=0.3,
+    status_forcelist=None,
+    session=None,
+):
+    if status_forcelist is None:
+        status_forcelist = list(range(400, 599))
+    session = session or requests.Session()
+    retry = Retry(
+        total=retries,
+        read=retries,
+        connect=retries,
+        backoff_factor=backoff_factor,
+        status_forcelist=status_forcelist,
+    )
+    adapter = HTTPAdapter(max_retries=retry)
+    session.mount("http://", adapter)
+    session.mount("https://", adapter)
+    return session
+class PredictAction:
+    def __str__(self):
+        return "PredictAction"
+class GenerateAction:
+    def __str__(self):
+        return "GenerateAction"
+class TransformAction:
+    def __str__(self):
+        return "TransformAction"
+# class EncodeAction:
+#     def __str__(self):
+#         return "EncodeAction"
+class ExplainAction:
+    def __str__(self):
+        return "ExplainAction"
+class SimilarityAction:
+    def __str__(self):
+        return "SimilarityAction"
+class FinetuneAction:
+    def __str__(self):
+        return "FinetuneAction"

{biolmai-0.1.4 → biolmai-0.1.7}/biolmai/asynch.py RENAMED Viewed

@@ -1,23 +1,15 @@
+import asyncio
+from asyncio import create_task, gather, run
+from itertools import zip_longest
+from typing import Dict, List
 import aiohttp.resolver
+from aiohttp import ClientSession
+from biolmai.auth import get_user_auth_header
 from biolmai.const import BASE_API_URL, MULTIPROCESS_THREADS
 aiohttp.resolver.DefaultResolver = aiohttp.resolver.AsyncResolver
-from aiohttp import ClientSession, TCPConnector
-from typing import List
-import json
-import asyncio
-from asyncio import create_task, gather, run, sleep
-async def get_one(session: ClientSession, slug: str, action: str,
-                  payload: dict, response_key: str):
-    pass
-from aiohttp import ClientSession
 async def get_one(session: ClientSession, url: str) -> None:
@@ -30,25 +22,31 @@ async def get_one(session: ClientSession, url: str) -> None:
         return text_resp
-async def get_one_biolm(session: ClientSession,
-                        url: str,
-                        pload: dict,
-                        headers: dict,
-                        response_key: str = None) -> None:
+async def get_one_biolm(
+    session: ClientSession,
+    url: str,
+    pload: dict,
+    headers: dict,
+    response_key: str = None,
+) -> None:
     print("Requesting", url)
-    pload_batch = pload.pop('batch')
-    pload_batch_size = pload.pop('batch_size')
+    pload_batch = pload.pop("batch")
+    pload_batch_size = pload.pop("batch_size")
     t = aiohttp.ClientTimeout(
-        total=1200,
-        # total timeout (time consists connection establishment for a new connection or waiting for a free connection from a pool if pool connection limits are exceeded) default value is 5 minutes, set to `None` or `0` for unlimited timeout
+        total=1600,  # 27 mins
+        # total timeout (time consists connection establishment for
+        # a new connection or waiting for a free connection from a
+        # pool if pool connection limits are exceeded) default value
+        # is 5 minutes, set to `None` or `0` for unlimited timeout
         sock_connect=None,
-        # Maximal number of seconds for connecting to a peer for a new connection, not given from a pool. See also connect.
+        # Maximal number of seconds for connecting to a peer for a
+        # new connection, not given from a pool. See also connect.
         sock_read=None
         # Maximal number of seconds for reading a portion of data from a peer
     )
     async with session.post(url, headers=headers, json=pload, timeout=t) as resp:
         resp_json = await resp.json()
-        resp_json['batch'] = pload_batch
+        resp_json["batch"] = pload_batch
         status_code = resp.status
         expected_root_key = response_key
         to_ret = []
@@ -61,9 +59,7 @@ async def get_one_biolm(session: ClientSession,
         else:
             raise ValueError("Unexpected response in parser")
         for idx, item in enumerate(list_of_individual_seq_results):
-            d = {'status_code': status_code,
-                 'batch_id': pload_batch,
-                 'batch_item': idx}
+            d = {"status_code": status_code, "batch_id": pload_batch, "batch_item": idx}
             if not status_code or status_code != 200:
                 d.update(item)  # Put all resp keys at root there
             else:
@@ -77,16 +73,15 @@ async def get_one_biolm(session: ClientSession,
         # await sleep(2)  # for demo purposes
         # text_resp = text.strip().split("\n", 1)[0]
         # print("Got response from", url, text_resp)
-        return j
 async def async_range(count):
     for i in range(count):
-        yield(i)
+        yield (i)
         await asyncio.sleep(0.0)
-async def get_all(urls: List[str], num_concurrent: int) -> List:
+async def get_all(urls: List[str], num_concurrent: int) -> list:
     url_iterator = iter(urls)
     keep_going = True
     results = []
@@ -106,22 +101,26 @@ async def get_all(urls: List[str], num_concurrent: int) -> List:
     return results
-async def get_all_biolm(url: str,
-                        ploads: List[dict],
-                        headers: dict,
-                        num_concurrent: int,
-                        response_key: str = None) -> List:
+async def get_all_biolm(
+    url: str,
+    ploads: List[Dict],
+    headers: dict,
+    num_concurrent: int,
+    response_key: str = None,
+) -> list:
     ploads_iterator = iter(ploads)
     keep_going = True
     results = []
-    connector = aiohttp.TCPConnector(limit=100,
-                                     limit_per_host=50,
-                                     ttl_dns_cache=60)
+    connector = aiohttp.TCPConnector(limit=100, limit_per_host=50, ttl_dns_cache=60)
     ov_tout = aiohttp.ClientTimeout(
         total=None,
-        # total timeout (time consists connection establishment for a new connection or waiting for a free connection from a pool if pool connection limits are exceeded) default value is 5 minutes, set to `None` or `0` for unlimited timeout
+        # total timeout (time consists connection establishment for
+        # a new connection or waiting for a free connection from a
+        # pool if pool connection limits are exceeded) default value
+        # is 5 minutes, set to `None` or `0` for unlimited timeout
         sock_connect=None,
-        # Maximal number of seconds for connecting to a peer for a new connection, not given from a pool. See also connect.
+        # Maximal number of seconds for connecting to a peer for a
+        # new connection, not given from a pool. See also connect.
         sock_read=None
         # Maximal number of seconds for reading a portion of data from a peer
     )
@@ -134,35 +133,31 @@ async def get_all_biolm(url: str,
                 except StopIteration:
                     keep_going = False
                     break
-                new_task = create_task(get_one_biolm(session, url, pload,
-                                                     headers, response_key))
+                new_task = create_task(
+                    get_one_biolm(session, url, pload, headers, response_key)
+                )
                 tasks.append(new_task)
             res = await gather(*tasks)
             results.extend(res)
     return results
-async def async_main(urls, concurrency) -> List:
+async def async_main(urls, concurrency) -> list:
     return await get_all(urls, concurrency)
-async def async_api_calls(model_name,
-                          action,
-                          headers,
-                          payloads,
-                          response_key=None):
+async def async_api_calls(model_name, action, headers, payloads, response_key=None):
     """Hit an arbitrary BioLM model inference API."""
     # Normally would POST multiple sequences at once for greater efficiency,
     # but for simplicity sake will do one at at time right now
-    url = f'{BASE_API_URL}/models/{model_name}/{action}/'
+    url = f"{BASE_API_URL}/models/{model_name}/{action}/"
     if not isinstance(payloads, (list, dict)):
         err = "API request payload must be a list or dict, got {}"
         raise AssertionError(err.format(type(payloads)))
     concurrency = int(MULTIPROCESS_THREADS)
-    return await get_all_biolm(url, payloads, headers, concurrency,
-                               response_key)
+    return await get_all_biolm(url, payloads, headers, concurrency, response_key)
     # payload = json.dumps(payload)
     # session = requests_retry_session()
@@ -183,3 +178,45 @@ async def async_api_calls(model_name,
     #     headers = get_user_auth_header()  # Need to re-get these now
     #     response = retry_minutes(session, url, headers, payload, tout, mins=10)
     # return response
+def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key):
+    """Wrap API calls to assist with sequence validation as a pre-cursor to
+    each API call.
+    """
+    model_name = slug
+    # payload = payload_maker(grouped_df)
+    init_ploads = grouped_df.groupby("batch").apply(
+        payload_maker, include_batch_size=True
+    )
+    ploads = init_ploads.to_list()
+    init_ploads = init_ploads.to_frame(name="pload")
+    init_ploads["batch"] = init_ploads.index
+    init_ploads = init_ploads.reset_index(drop=True)
+    assert len(ploads) == init_ploads.shape[0]
+    for inst, b in zip_longest(ploads, init_ploads["batch"].to_list()):
+        if inst is None or b is None:
+            raise ValueError(
+                "ploads and init_ploads['batch'] are not of the same length"
+            )
+        inst["batch"] = b
+    headers = get_user_auth_header()  # Need to pull each time
+    # urls = [
+    #     "https://github.com",
+    #     "https://stackoverflow.com",
+    #     "https://python.org",
+    # ]
+    # concurrency = 3
+    api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key))
+    api_resp = [item for sublist in api_resp for item in sublist]
+    api_resp = sorted(api_resp, key=lambda x: x["batch_id"])
+    # print(api_resp)
+    # api_resp = biolmai.api_call(model_name, action, headers, payload,
+    #                             response_key)
+    # resp_json = api_resp.json()
+    # batch_id = int(grouped_df.batch.iloc[0])
+    # batch_size = grouped_df.shape[0]
+    # response = predict_resp_many_in_one_to_many_singles(
+    #     resp_json, api_resp.status_code, batch_id, None, batch_size)
+    return api_resp

biolmai 0.1.4__tar.gz → 0.1.7__tar.gz

Potentially problematic release.

biolmai 0.1.4tar.gz → 0.1.7tar.gz