PyPI - biolmai - Versions diffs - 0.1.8__tar.gz → 0.2.10__tar.gz - Mend

biolmai 0.1.8tar.gz → 0.2.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

biolmai-0.2.10/PKG-INFO +137 -0
biolmai-0.2.10/README.rst +95 -0
biolmai-0.2.10/biolmai/__init__.py +24 -0
{biolmai-0.1.8 → biolmai-0.2.10}/biolmai/api.py +130 -87
{biolmai-0.1.8 → biolmai-0.2.10}/biolmai/asynch.py +16 -8
biolmai-0.2.10/biolmai/auth.py +1043 -0
biolmai-0.2.10/biolmai/biolmai.py +122 -0
biolmai-0.2.10/biolmai/cli.py +115 -0
biolmai-0.2.10/biolmai/client.py +741 -0
biolmai-0.2.10/biolmai/cls.py +176 -0
biolmai-0.2.10/biolmai/const.py +66 -0
{biolmai-0.1.8 → biolmai-0.2.10}/biolmai/payloads.py +13 -2
biolmai-0.2.10/biolmai/seqflow_auth.py +200 -0
biolmai-0.2.10/biolmai/validate.py +159 -0
biolmai-0.2.10/biolmai.egg-info/PKG-INFO +137 -0
{biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/SOURCES.txt +28 -19
biolmai-0.2.10/biolmai.egg-info/entry_points.txt +5 -0
biolmai-0.2.10/biolmai.egg-info/requires.txt +21 -0
biolmai-0.2.10/docs/_static/biolm_logomark_transparent.png +0 -0
biolmai-0.2.10/docs/_static/biolm_logomark_transparent_for_dark.png +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/biolmai.rst +16 -0
biolmai-0.2.10/docs/conf.py +289 -0
biolmai-0.2.10/docs/index.rst +48 -0
biolmai-0.2.10/docs/python-client/api_biolm.rst +49 -0
biolmai-0.2.10/docs/python-client/api_client.rst +74 -0
biolmai-0.2.10/docs/python-client/async_sync.rst +157 -0
biolmai-0.2.10/docs/python-client/authentication.rst +137 -0
biolmai-0.2.10/docs/python-client/batching.rst +187 -0
biolmai-0.2.10/docs/python-client/disk_output.rst +66 -0
biolmai-0.2.10/docs/python-client/error_handling.rst +173 -0
biolmai-0.2.10/docs/python-client/faq.rst +38 -0
biolmai-0.2.10/docs/python-client/features.rst +22 -0
biolmai-0.2.10/docs/python-client/index.rst +29 -0
{biolmai-0.1.8/docs/python-client/get_started → biolmai-0.2.10/docs/python-client}/installation.rst +2 -2
biolmai-0.2.10/docs/python-client/overview.rst +18 -0
biolmai-0.2.10/docs/python-client/quickstart.rst +28 -0
biolmai-0.2.10/docs/python-client/rate_limiting.rst +97 -0
biolmai-0.2.10/docs/python-client/usage.rst +84 -0
biolmai-0.2.10/pyproject.toml +99 -0
{biolmai-0.1.8 → biolmai-0.2.10}/setup.cfg +5 -1
{biolmai-0.1.8 → biolmai-0.2.10}/setup.py +27 -9
biolmai-0.2.10/tests/test_abatch_calls.py +208 -0
biolmai-0.2.10/tests/test_aclient.py +296 -0
biolmai-0.2.10/tests/test_batch_error_retry.py +80 -0
biolmai-0.2.10/tests/test_batch_errors.py +47 -0
biolmai-0.2.10/tests/test_biolmai.py +211 -0
biolmai-0.2.10/tests/test_client.py +379 -0
biolmai-0.2.10/tests/test_integration.py +132 -0
biolmai-0.2.10/tests/test_max_items.py +183 -0
biolmai-0.2.10/tests/test_oauth_auth.py +286 -0
biolmai-0.2.10/tests/test_rate_limit.py +182 -0
biolmai-0.2.10/tests/test_schemas.py +30 -0
biolmai-0.1.8/PKG-INFO +0 -70
biolmai-0.1.8/README.rst +0 -37
biolmai-0.1.8/biolmai/__init__.py +0 -7
biolmai-0.1.8/biolmai/auth.py +0 -173
biolmai-0.1.8/biolmai/biolmai.py +0 -5
biolmai-0.1.8/biolmai/cli.py +0 -75
biolmai-0.1.8/biolmai/cls.py +0 -97
biolmai-0.1.8/biolmai/const.py +0 -29
biolmai-0.1.8/biolmai/validate.py +0 -134
biolmai-0.1.8/biolmai.egg-info/PKG-INFO +0 -70
biolmai-0.1.8/biolmai.egg-info/entry_points.txt +0 -2
biolmai-0.1.8/biolmai.egg-info/requires.txt +0 -4
biolmai-0.1.8/docs/conf.py +0 -163
biolmai-0.1.8/docs/index.rst +0 -107
biolmai-0.1.8/docs/model-docs/DNABERT.rst +0 -640
biolmai-0.1.8/docs/model-docs/ESM-1v.rst +0 -362
biolmai-0.1.8/docs/model-docs/ESM2_Embeddings.rst +0 -242
biolmai-0.1.8/docs/model-docs/ESMFold.rst +0 -252
biolmai-0.1.8/docs/model-docs/ESM_InverseFold.rst +0 -278
biolmai-0.1.8/docs/model-docs/ProtGPT2.rst +0 -609
biolmai-0.1.8/docs/model-docs/ProteInfer_EC.rst +0 -249
biolmai-0.1.8/docs/model-docs/ProteInfer_GO.rst +0 -329
biolmai-0.1.8/docs/model-docs/img/book_icon.png +0 -0
biolmai-0.1.8/docs/model-docs/img/esmfold_perf.png +0 -0
biolmai-0.1.8/docs/model-docs/index.rst +0 -13
biolmai-0.1.8/docs/model-docs/progen2/ProGen2_BFD90.rst +0 -251
biolmai-0.1.8/docs/model-docs/progen2/ProGen2_Medium.rst +0 -248
biolmai-0.1.8/docs/model-docs/progen2/ProGen2_OAS.rst +0 -246
biolmai-0.1.8/docs/model-docs/progen2/index.rst +0 -10
biolmai-0.1.8/docs/python-client/get_started/authorization.rst +0 -9
biolmai-0.1.8/docs/python-client/get_started/quickstart.rst +0 -15
biolmai-0.1.8/docs/python-client/index.rst +0 -18
biolmai-0.1.8/docs/python-client/usage.rst +0 -7
biolmai-0.1.8/pyproject.toml +0 -44
biolmai-0.1.8/tests/test_biolmai.py +0 -263
{biolmai-0.1.8 → biolmai-0.2.10}/AUTHORS.rst +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/CONTRIBUTING.rst +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/HISTORY.rst +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/LICENSE +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/MANIFEST.in +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/biolmai/ltc.py +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/dependency_links.txt +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/not-zip-safe +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/top_level.txt +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/Makefile +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/api_reference_icon.png +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/biolm_docs_logo_dark.png +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/biolm_docs_logo_light.png +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/chat_agents_icon.png +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/jupyter_notebooks_icon.png +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/model_docs_icon.png +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/python_sdk_icon.png +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/tutorials_icon.png +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/make.bat +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/modules.rst +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/docs/tutorials_use_cases/notebooks.rst +0 -0
{biolmai-0.1.8 → biolmai-0.2.10}/tests/__init__.py +0 -0

biolmai-0.2.10/PKG-INFO ADDED Viewed

@@ -0,0 +1,137 @@
+Metadata-Version: 2.4
+Name: biolmai
+Version: 0.2.10
+Summary: BioLM Python client
+Home-page: https://github.com/BioLM/py-biolm
+Author: BioLM
+Author-email: BioLM <support@biolm.ai>
+License: Apache Software License 2.0
+Keywords: biolmai
+Classifier: Development Status :: 2 - Pre-Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Natural Language :: English
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: >=3.7
+Description-Content-Type: text/x-rst
+License-File: LICENSE
+License-File: AUTHORS.rst
+Requires-Dist: httpx>=0.23.0
+Requires-Dist: httpcore
+Requires-Dist: Click>=6.0
+Requires-Dist: requests
+Requires-Dist: aiodns
+Requires-Dist: synchronicity>=0.5.0; python_version >= "3.9"
+Requires-Dist: synchronicity<0.5.0; python_version < "3.9"
+Requires-Dist: typing_extensions; python_version < "3.9"
+Requires-Dist: aiohttp<=3.8.6; python_version < "3.12"
+Requires-Dist: aiohttp>=3.9.0; python_version >= "3.12"
+Requires-Dist: async-lru
+Requires-Dist: aiofiles
+Requires-Dist: cryptography
+Dynamic: author
+Dynamic: home-page
+Dynamic: license-file
+Dynamic: requires-python
+========
+BioLM AI
+========
+.. image:: https://img.shields.io/pypi/v/biolmai.svg
+        :target: https://pypi.python.org/pypi/biolmai
+.. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
+        :target: https://travis-ci.org/github/BioLM/py-biolm
+.. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
+        :target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
+        :alt: Documentation Status
+Python client and SDK for `BioLM <https://biolm.ai>`_
+Install the package:
+.. code-block:: bash
+    pip install biolmai
+Basic usage:
+.. code-block:: python
+    from biolmai import biolm
+    # Encode a single sequence
+    result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
+    # Predict a batch of sequences
+    result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
+    # Write results to disk
+    biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
+Asynchronous usage:
+.. code-block:: python
+    from biolmai.client import BioLMApiClient
+    import asyncio
+    async def main():
+        model = BioLMApiClient("esmfold")
+        result = await model.predict(items=[{"sequence": "MDNELE"}])
+        print(result)
+    asyncio.run(main())
+Overview
+========
+The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
+Main features:
+- High-level BioLM constructor for quick requests
+- Sync and async interfaces
+- Automatic or custom rate limiting/throttling
+- Schema-based batch size detection
+- Flexible input formats (single key + list, or list of dicts)
+- Low memory usage via generators
+- Flexible error handling (raise, continue, or stop on error)
+- Universal HTTP client for both sync and async
+Features
+========
+- **High-level constructor**: Instantly run an API call with a single line.
+- **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
+- **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
+- **Schema-based batching**: Automatically queries API for max batch size.
+- **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
+- **Low memory**: Uses generators for validation and batching.
+- **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
+- **Disk output**: Write results as JSONL to disk.
+- **Universal HTTP client**: Efficient for both sync and async.
+- **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
+**Example endpoints and actions:**
+- `esm2-8m/encode`: Embedding for protein sequences.
+- `esmfold/predict`: Structure prediction for protein sequences.
+- `progen2-oas/generate`: Sequence generation from a context string.
+- `dnabert2/predict`: Masked prediction for protein sequences.
+- `ablang2/encode`: Embeddings for paired-chain antibodies.
+* Free software: Apache Software License 2.0
+* Documentation: https://docs.biolm.ai

biolmai-0.2.10/README.rst ADDED Viewed

@@ -0,0 +1,95 @@
+========
+BioLM AI
+========
+.. image:: https://img.shields.io/pypi/v/biolmai.svg
+        :target: https://pypi.python.org/pypi/biolmai
+.. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
+        :target: https://travis-ci.org/github/BioLM/py-biolm
+.. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
+        :target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
+        :alt: Documentation Status
+Python client and SDK for `BioLM <https://biolm.ai>`_
+Install the package:
+.. code-block:: bash
+    pip install biolmai
+Basic usage:
+.. code-block:: python
+    from biolmai import biolm
+    # Encode a single sequence
+    result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
+    # Predict a batch of sequences
+    result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
+    # Write results to disk
+    biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
+Asynchronous usage:
+.. code-block:: python
+    from biolmai.client import BioLMApiClient
+    import asyncio
+    async def main():
+        model = BioLMApiClient("esmfold")
+        result = await model.predict(items=[{"sequence": "MDNELE"}])
+        print(result)
+    asyncio.run(main())
+Overview
+========
+The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
+Main features:
+- High-level BioLM constructor for quick requests
+- Sync and async interfaces
+- Automatic or custom rate limiting/throttling
+- Schema-based batch size detection
+- Flexible input formats (single key + list, or list of dicts)
+- Low memory usage via generators
+- Flexible error handling (raise, continue, or stop on error)
+- Universal HTTP client for both sync and async
+Features
+========
+- **High-level constructor**: Instantly run an API call with a single line.
+- **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
+- **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
+- **Schema-based batching**: Automatically queries API for max batch size.
+- **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
+- **Low memory**: Uses generators for validation and batching.
+- **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
+- **Disk output**: Write results as JSONL to disk.
+- **Universal HTTP client**: Efficient for both sync and async.
+- **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
+**Example endpoints and actions:**
+- `esm2-8m/encode`: Embedding for protein sequences.
+- `esmfold/predict`: Structure prediction for protein sequences.
+- `progen2-oas/generate`: Sequence generation from a context string.
+- `dnabert2/predict`: Masked prediction for protein sequences.
+- `ablang2/encode`: Embeddings for paired-chain antibodies.
+* Free software: Apache Software License 2.0
+* Documentation: https://docs.biolm.ai

biolmai-0.2.10/biolmai/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Top-level package for BioLM AI."""
+__author__ = """Nikhil Haas"""
+__email__ = "nikhil@biolm.ai"
+__version__ = '0.2.10'
+from biolmai.client import BioLMApi, BioLMApiClient
+from biolmai.biolmai import BioLM
+from typing import Optional, Union, List, Any
+__all__ = ['biolm']
+def biolm(
+    *,
+    entity: str,
+    action: str,
+    type: Optional[str] = None,
+    items: Union[Any, List[Any]],
+    params: Optional[dict] = None,
+    api_key: Optional[str] = None,
+    **kwargs
+) -> Any:
+    """Top-level convenience function that wraps the BioLM class and returns the result."""
+    return BioLM(entity=entity, action=action, type=type, items=items, params=params, api_key=api_key, **kwargs)

{biolmai-0.1.8 → biolmai-0.2.10}/biolmai/api.py RENAMED Viewed

@@ -4,8 +4,12 @@ import inspect
 import time
 from functools import lru_cache
-import numpy as np
-import pandas as pd
+try:
+    import numpy as np
+    import pandas as pd
+except ImportError:
+    pass
 import requests
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
@@ -15,7 +19,7 @@ import biolmai.auth
 from biolmai.asynch import async_api_call_wrapper
 from biolmai.biolmai import log
 from biolmai.const import MULTIPROCESS_THREADS
-from biolmai.payloads import INST_DAT_TXT, predict_resp_many_in_one_to_many_singles
+from biolmai.payloads import INST_DAT_TXT, PARAMS_ITEMS, predict_resp_many_in_one_to_many_singles
 @lru_cache(maxsize=64)
@@ -35,65 +39,82 @@ def text_validator(text, c):
     except Exception as e:
         return str(e)
+def combine_validation(x, y):
+    if x is None and y is None:
+        return None
+    elif isinstance(x, str) and y is None:
+        return x
+    elif x is None and isinstance(y, str):
+        return y
+    elif isinstance(x, str) and isinstance(y, str):
+        return f"{x}\n{y}"
+def validate_action(action):
+    def validate(f):
+        def wrapper(*args, **kwargs):
+            # Get class instance at runtime, so you can access not just
+            # APIEndpoints, but any *parent* classes of that,
+            # like ESMFoldSinglechain.
+            class_obj_self = args[0]
+            try:
+                is_method = inspect.getfullargspec(f)[0][0] == "self"
+            except Exception:
+                is_method = False
-def validate(f):
-    def wrapper(*args, **kwargs):
-        # Get class instance at runtime, so you can access not just
-        # APIEndpoints, but any *parent* classes of that,
-        # like ESMFoldSinglechain.
-        class_obj_self = args[0]
-        try:
-            is_method = inspect.getfullargspec(f)[0][0] == "self"
-        except Exception:
-            is_method = False
-        # Is the function we decorated a class method?
-        if is_method:
-            name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
-        else:
-            name = f"{f.__module__}.{f.__name__}"
-        if is_method:
-            # Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
-            action_method_name = name.split(".")[-1]
-            validate_endpoint_action(
-                class_obj_self.action_class_strings,
-                action_method_name,
-                class_obj_self.__class__.__name__,
-            )
-        input_data = args[1]
-        # Validate each row's text/input based on class attribute `seq_classes`
-        for c in class_obj_self.seq_classes:
-            # Validate input data against regex
-            if class_obj_self.multiprocess_threads:
-                validation = input_data.text.apply(text_validator, args=(c,))
+            # Is the function we decorated a class method?
+            if is_method:
+                name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
             else:
-                validation = input_data.text.apply(text_validator, args=(c,))
-            if "validation" not in input_data.columns:
-                input_data["validation"] = validation
-            else:
-                input_data["validation"] = input_data["validation"].str.cat(
-                    validation, sep="\n", na_rep=""
+                name = f"{f.__module__}.{f.__name__}"
+            if is_method:
+                # Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
+                action_method_name = name.split(".")[-1]
+                validate_endpoint_action(
+                    class_obj_self.action_class_strings,
+                    action_method_name,
+                    class_obj_self.__class__.__name__,
                 )
-        # Mark your batches, excluding invalid rows
-        valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
-        N = class_obj_self.batch_size  # N rows will go per API request
-        # JOIN back, which is by index
-        if valid_dat.shape[0] != input_data.shape[0]:
-            valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
-            input_data = input_data.merge(
-                valid_dat.batch, left_index=True, right_index=True, how="left"
-            )
-        else:
-            input_data["batch"] = np.arange(input_data.shape[0]) // N
-        res = f(class_obj_self, input_data, **kwargs)
-        return res
-    return wrapper
+            input_data = args[1]
+            # Validate each row's text/input based on class attribute `seq_classes`
+            if action == "predict":
+                input_classes = class_obj_self.predict_input_classes
+            elif action == "encode":
+                input_classes = class_obj_self.encode_input_classes
+            elif action == "generate":
+                input_classes = class_obj_self.generate_input_classes
+            elif action == "transform":
+                input_classes = class_obj_self.transform_input_classes
+            for c in input_classes:
+                # Validate input data against regex
+                if class_obj_self.multiprocess_threads:
+                    validation = input_data.text.apply(text_validator, args=(c,))
+                else:
+                    validation = input_data.text.apply(text_validator, args=(c,))
+                if "validation" not in input_data.columns:
+                    input_data["validation"] = validation
+                else:
+                    # masking and loc may be more performant option
+                    input_data["validation"] = input_data["validation"].combine(validation, combine_validation)
+            # Mark your batches, excluding invalid rows
+            valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
+            N = class_obj_self.batch_size  # N rows will go per API request
+            # JOIN back, which is by index
+            if valid_dat.shape[0] != input_data.shape[0]:
+                valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
+                input_data = input_data.merge(
+                    valid_dat.batch, left_index=True, right_index=True, how="left"
+                )
+            else:
+                input_data["batch"] = np.arange(input_data.shape[0]) // N
+            res = f(class_obj_self, input_data, **kwargs)
+            return res
+        return wrapper
+    return validate
 def convert_input(f):
     def wrapper(*args, **kwargs):
@@ -123,7 +144,20 @@ def convert_input(f):
 class APIEndpoint:
-    batch_size = 3  # Overwrite in parent classes as needed
+     # Overwrite in parent classes as needed
+    batch_size = 3
+    params = None
+    action_classes = ()
+    api_version = 2
+    predict_input_key = "sequence"
+    encode_input_key = "sequence"
+    generate_input_key = "context"
+    predict_input_classes = ()
+    encode_input_classes = ()
+    generate_input_classes = ()
+    transform_input_classes = ()
     def __init__(self, multiprocess_threads=None):
         # Check for instance-specific threads, otherwise read from env var
@@ -137,7 +171,7 @@ class APIEndpoint:
             [c.__name__.replace("Action", "").lower() for c in self.action_classes]
         )
-    def post_batches(self, dat, slug, action, payload_maker, resp_key):
+    def post_batches(self, dat, slug, action, payload_maker, resp_key, key="sequence", params=None):
         keep_batches = dat.loc[~dat.batch.isnull(), ["text", "batch"]]
         if keep_batches.shape[0] == 0:
             pass  # Do nothing - we made nice JSON errors to return in the DF
@@ -145,7 +179,7 @@ class APIEndpoint:
             # raise AssertionError(err)
         if keep_batches.shape[0] > 0:
             api_resps = async_api_call_wrapper(
-                keep_batches, slug, action, payload_maker, resp_key
+                keep_batches, slug, action, payload_maker, resp_key, api_version=self.api_version, key=key,  params=params,
             )
             if isinstance(api_resps, pd.DataFrame):
                 batch_res = api_resps.explode("api_resp")  # Should be lists of results
@@ -154,7 +188,9 @@ class APIEndpoint:
                 batch_res = pd.DataFrame({"api_resp": api_resps})
                 len_res = batch_res.shape[0]
             orig_request_rows = keep_batches.shape[0]
-            if len_res != orig_request_rows:
+            # For 'generate' actions, models may return multiple results per item
+            # (e.g., hyper-mpnn with batch_size > 1), so skip the 1:1 check
+            if action != "generate" and len_res != orig_request_rows:
                 err = "Response rows ({}) mismatch with input rows ({})"
                 err = err.format(len_res, orig_request_rows)
                 raise AssertionError(err)
@@ -170,11 +206,11 @@ class APIEndpoint:
             dat["api_resp"] = None
         return dat
-    def unpack_local_validations(self, dat):
+    def unpack_local_validations(self, dat, response_key):
         dat.loc[dat.api_resp.isnull(), "api_resp"] = (
             dat.loc[~dat.validation.isnull(), "validation"]
             .apply(
-                predict_resp_many_in_one_to_many_singles, args=(None, None, True, None)
+                predict_resp_many_in_one_to_many_singles, args=(None, None, True, None), response_key=response_key
             )
             .explode()
         )
@@ -182,39 +218,46 @@ class APIEndpoint:
         return dat
     @convert_input
-    @validate
-    def predict(self, dat):
-        dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
-        dat = self.unpack_local_validations(dat)
+    @validate_action("predict")
+    def predict(self, dat, params=None):
+        if self.api_version == 1:
+            dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
+            dat = self.unpack_local_validations(dat, "predictions")
+        else:
+            dat = self.post_batches(dat, self.slug, "predict", PARAMS_ITEMS, "results", key=self.predict_input_key, params=params)
+            dat = self.unpack_local_validations(dat,"results")
         return dat.api_resp.replace(np.nan, None).tolist()
-    def infer(self, dat):
-        return self.predict(dat)
+    def infer(self, dat, params=None):
+        return self.predict(dat, params)
     @convert_input
-    @validate
+    @validate_action("transform")  # api v1 legacy action
     def transform(self, dat):
         dat = self.post_batches(
             dat, self.slug, "transform", INST_DAT_TXT, "predictions"
         )
-        dat = self.unpack_local_validations(dat)
+        dat = self.unpack_local_validations(dat,"predictions")
         return dat.api_resp.replace(np.nan, None).tolist()
-    # @convert_input
-    # @validate
-    # def encode(self, dat):
-    #     # NOTE: we defined this for the specific case of ESM2
-    #     # TODO: this will be need again in v2 of API contract
-    #     dat = self.post_batches(dat, self.slug, "transform",
-    #                             INST_DAT_TXT, "embeddings")
-    #     dat = self.unpack_local_validations(dat)
-    #     return dat.api_resp.replace(np.nan, None).tolist()
+    @convert_input
+    @validate_action("encode")
+    def encode(self, dat, params=None):
+        dat = self.post_batches(dat, self.slug, "encode", PARAMS_ITEMS, "results", key=self.encode_input_key, params=params)
+        dat = self.unpack_local_validations(dat, "results")
+        return dat.api_resp.replace(np.nan, None).tolist()
     @convert_input
-    @validate
-    def generate(self, dat):
-        dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
-        dat = self.unpack_local_validations(dat)
+    @validate_action("generate")
+    def generate(self, dat, params=None):
+        if self.api_version == 1:
+            dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
+            dat = self.unpack_local_validations(dat, "predictions")
+        else:
+            dat = self.post_batches(dat, self.slug, "generate", PARAMS_ITEMS, "results", key=self.generate_input_key, params=params)
+            dat = self.unpack_local_validations(dat, "results")
         return dat.api_resp.replace(np.nan, None).tolist()
@@ -290,9 +333,9 @@ class TransformAction:
         return "TransformAction"
-# class EncodeAction:
-#     def __str__(self):
-#         return "EncodeAction"
+class EncodeAction:
+     def __str__(self):
+         return "EncodeAction"
 class ExplainAction:

{biolmai-0.1.8 → biolmai-0.2.10}/biolmai/asynch.py RENAMED Viewed

@@ -7,7 +7,7 @@ import aiohttp.resolver
 from aiohttp import ClientSession
 from biolmai.auth import get_user_auth_header
-from biolmai.const import BASE_API_URL, MULTIPROCESS_THREADS
+from biolmai.const import BASE_API_URL, BASE_API_URL_V1, MULTIPROCESS_THREADS
 aiohttp.resolver.DefaultResolver = aiohttp.resolver.AsyncResolver
@@ -146,11 +146,14 @@ async def async_main(urls, concurrency) -> list:
     return await get_all(urls, concurrency)
-async def async_api_calls(model_name, action, headers, payloads, response_key=None):
+async def async_api_calls(model_name, action, headers, payloads, response_key=None, api_version=2):
     """Hit an arbitrary BioLM model inference API."""
     # Normally would POST multiple sequences at once for greater efficiency,
     # but for simplicity sake will do one at at time right now
-    url = f"{BASE_API_URL}/models/{model_name}/{action}/"
+    if api_version == 1:
+        url = f"{BASE_API_URL_V1}/models/{model_name}/{action}/"
+    else:
+        url = f"{BASE_API_URL}/{model_name}/{action}/"
     if not isinstance(payloads, (list, dict)):
         err = "API request payload must be a list or dict, got {}"
@@ -180,15 +183,20 @@ async def async_api_calls(model_name, action, headers, payloads, response_key=No
     # return response
-def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key):
+def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key, api_version=2, key="sequence", params=None):
     """Wrap API calls to assist with sequence validation as a pre-cursor to
     each API call.
     """
     model_name = slug
     # payload = payload_maker(grouped_df)
-    init_ploads = grouped_df.groupby("batch").apply(
-        payload_maker, include_batch_size=True
-    )
+    if api_version == 1:
+        init_ploads = grouped_df.groupby("batch").apply(
+            payload_maker, include_batch_size=True
+        )
+    else:
+        init_ploads = grouped_df.groupby("batch").apply(
+            payload_maker, key=key, params=params, include_batch_size=True
+        )
     ploads = init_ploads.to_list()
     init_ploads = init_ploads.to_frame(name="pload")
     init_ploads["batch"] = init_ploads.index
@@ -208,7 +216,7 @@ def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key
     #     "https://python.org",
     # ]
     # concurrency = 3
-    api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key))
+    api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key, api_version))
     api_resp = [item for sublist in api_resp for item in sublist]
     api_resp = sorted(api_resp, key=lambda x: x["batch_id"])
     # print(api_resp)

biolmai 0.1.8__tar.gz → 0.2.10__tar.gz

biolmai 0.1.8tar.gz → 0.2.10tar.gz