biolmai 0.1.2__tar.gz → 0.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolmai-0.2.10/PKG-INFO +137 -0
- biolmai-0.2.10/README.rst +95 -0
- biolmai-0.2.10/biolmai/__init__.py +24 -0
- biolmai-0.2.10/biolmai/api.py +353 -0
- biolmai-0.2.10/biolmai/asynch.py +230 -0
- biolmai-0.2.10/biolmai/auth.py +1043 -0
- biolmai-0.2.10/biolmai/biolmai.py +122 -0
- biolmai-0.2.10/biolmai/cli.py +115 -0
- biolmai-0.2.10/biolmai/client.py +741 -0
- biolmai-0.2.10/biolmai/cls.py +176 -0
- biolmai-0.2.10/biolmai/const.py +66 -0
- biolmai-0.2.10/biolmai/payloads.py +44 -0
- biolmai-0.2.10/biolmai/seqflow_auth.py +200 -0
- biolmai-0.2.10/biolmai/validate.py +159 -0
- biolmai-0.2.10/biolmai.egg-info/PKG-INFO +137 -0
- biolmai-0.2.10/biolmai.egg-info/SOURCES.txt +73 -0
- biolmai-0.2.10/biolmai.egg-info/entry_points.txt +5 -0
- biolmai-0.2.10/biolmai.egg-info/requires.txt +21 -0
- biolmai-0.2.10/docs/_static/api_reference_icon.png +0 -0
- biolmai-0.2.10/docs/_static/biolm_docs_logo_dark.png +0 -0
- biolmai-0.2.10/docs/_static/biolm_docs_logo_light.png +0 -0
- biolmai-0.2.10/docs/_static/biolm_logomark_transparent.png +0 -0
- biolmai-0.2.10/docs/_static/biolm_logomark_transparent_for_dark.png +0 -0
- biolmai-0.2.10/docs/_static/chat_agents_icon.png +0 -0
- biolmai-0.2.10/docs/_static/jupyter_notebooks_icon.png +0 -0
- biolmai-0.2.10/docs/_static/model_docs_icon.png +0 -0
- biolmai-0.2.10/docs/_static/python_sdk_icon.png +0 -0
- biolmai-0.2.10/docs/_static/tutorials_icon.png +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/docs/biolmai.rst +32 -0
- biolmai-0.2.10/docs/conf.py +289 -0
- biolmai-0.2.10/docs/index.rst +48 -0
- biolmai-0.2.10/docs/python-client/api_biolm.rst +49 -0
- biolmai-0.2.10/docs/python-client/api_client.rst +74 -0
- biolmai-0.2.10/docs/python-client/async_sync.rst +157 -0
- biolmai-0.2.10/docs/python-client/authentication.rst +137 -0
- biolmai-0.2.10/docs/python-client/batching.rst +187 -0
- biolmai-0.2.10/docs/python-client/disk_output.rst +66 -0
- biolmai-0.2.10/docs/python-client/error_handling.rst +173 -0
- biolmai-0.2.10/docs/python-client/faq.rst +38 -0
- biolmai-0.2.10/docs/python-client/features.rst +22 -0
- biolmai-0.2.10/docs/python-client/index.rst +29 -0
- {biolmai-0.1.2/docs → biolmai-0.2.10/docs/python-client}/installation.rst +2 -2
- biolmai-0.2.10/docs/python-client/overview.rst +18 -0
- biolmai-0.2.10/docs/python-client/quickstart.rst +28 -0
- biolmai-0.2.10/docs/python-client/rate_limiting.rst +97 -0
- biolmai-0.2.10/docs/python-client/usage.rst +84 -0
- biolmai-0.2.10/docs/tutorials_use_cases/notebooks.rst +9 -0
- biolmai-0.2.10/pyproject.toml +99 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/setup.cfg +11 -2
- biolmai-0.2.10/setup.py +71 -0
- biolmai-0.2.10/tests/test_abatch_calls.py +208 -0
- biolmai-0.2.10/tests/test_aclient.py +296 -0
- biolmai-0.2.10/tests/test_batch_error_retry.py +80 -0
- biolmai-0.2.10/tests/test_batch_errors.py +47 -0
- biolmai-0.2.10/tests/test_biolmai.py +211 -0
- biolmai-0.2.10/tests/test_client.py +379 -0
- biolmai-0.2.10/tests/test_integration.py +132 -0
- biolmai-0.2.10/tests/test_max_items.py +183 -0
- biolmai-0.2.10/tests/test_oauth_auth.py +286 -0
- biolmai-0.2.10/tests/test_rate_limit.py +182 -0
- biolmai-0.2.10/tests/test_schemas.py +30 -0
- biolmai-0.1.2/PKG-INFO +0 -70
- biolmai-0.1.2/README.rst +0 -37
- biolmai-0.1.2/biolmai/__init__.py +0 -15
- biolmai-0.1.2/biolmai/api.py +0 -291
- biolmai-0.1.2/biolmai/async.py +0 -6
- biolmai-0.1.2/biolmai/auth.py +0 -127
- biolmai-0.1.2/biolmai/biolmai.py +0 -153
- biolmai-0.1.2/biolmai/cli.py +0 -67
- biolmai-0.1.2/biolmai/cls.py +0 -1
- biolmai-0.1.2/biolmai/const.py +0 -13
- biolmai-0.1.2/biolmai/payloads.py +0 -6
- biolmai-0.1.2/biolmai/validate.py +0 -107
- biolmai-0.1.2/biolmai.egg-info/PKG-INFO +0 -70
- biolmai-0.1.2/biolmai.egg-info/SOURCES.txt +0 -40
- biolmai-0.1.2/biolmai.egg-info/entry_points.txt +0 -2
- biolmai-0.1.2/biolmai.egg-info/requires.txt +0 -6
- biolmai-0.1.2/docs/authors.rst +0 -1
- biolmai-0.1.2/docs/conf.py +0 -168
- biolmai-0.1.2/docs/contributing.rst +0 -1
- biolmai-0.1.2/docs/history.rst +0 -1
- biolmai-0.1.2/docs/index.rst +0 -20
- biolmai-0.1.2/docs/readme.rst +0 -1
- biolmai-0.1.2/docs/usage.rst +0 -7
- biolmai-0.1.2/setup.py +0 -52
- biolmai-0.1.2/tests/test_biolmai.py +0 -217
- {biolmai-0.1.2 → biolmai-0.2.10}/AUTHORS.rst +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/CONTRIBUTING.rst +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/HISTORY.rst +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/LICENSE +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/MANIFEST.in +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/biolmai/ltc.py +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/biolmai.egg-info/dependency_links.txt +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/biolmai.egg-info/not-zip-safe +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/biolmai.egg-info/top_level.txt +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/docs/Makefile +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/docs/make.bat +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/docs/modules.rst +0 -0
- {biolmai-0.1.2 → biolmai-0.2.10}/tests/__init__.py +0 -0
biolmai-0.2.10/PKG-INFO
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: biolmai
|
|
3
|
+
Version: 0.2.10
|
|
4
|
+
Summary: BioLM Python client
|
|
5
|
+
Home-page: https://github.com/BioLM/py-biolm
|
|
6
|
+
Author: BioLM
|
|
7
|
+
Author-email: BioLM <support@biolm.ai>
|
|
8
|
+
License: Apache Software License 2.0
|
|
9
|
+
Keywords: biolmai
|
|
10
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Natural Language :: English
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Requires-Python: >=3.7
|
|
22
|
+
Description-Content-Type: text/x-rst
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
License-File: AUTHORS.rst
|
|
25
|
+
Requires-Dist: httpx>=0.23.0
|
|
26
|
+
Requires-Dist: httpcore
|
|
27
|
+
Requires-Dist: Click>=6.0
|
|
28
|
+
Requires-Dist: requests
|
|
29
|
+
Requires-Dist: aiodns
|
|
30
|
+
Requires-Dist: synchronicity>=0.5.0; python_version >= "3.9"
|
|
31
|
+
Requires-Dist: synchronicity<0.5.0; python_version < "3.9"
|
|
32
|
+
Requires-Dist: typing_extensions; python_version < "3.9"
|
|
33
|
+
Requires-Dist: aiohttp<=3.8.6; python_version < "3.12"
|
|
34
|
+
Requires-Dist: aiohttp>=3.9.0; python_version >= "3.12"
|
|
35
|
+
Requires-Dist: async-lru
|
|
36
|
+
Requires-Dist: aiofiles
|
|
37
|
+
Requires-Dist: cryptography
|
|
38
|
+
Dynamic: author
|
|
39
|
+
Dynamic: home-page
|
|
40
|
+
Dynamic: license-file
|
|
41
|
+
Dynamic: requires-python
|
|
42
|
+
|
|
43
|
+
========
|
|
44
|
+
BioLM AI
|
|
45
|
+
========
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
.. image:: https://img.shields.io/pypi/v/biolmai.svg
|
|
49
|
+
:target: https://pypi.python.org/pypi/biolmai
|
|
50
|
+
|
|
51
|
+
.. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
|
|
52
|
+
:target: https://travis-ci.org/github/BioLM/py-biolm
|
|
53
|
+
|
|
54
|
+
.. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
|
|
55
|
+
:target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
|
|
56
|
+
:alt: Documentation Status
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
Python client and SDK for `BioLM <https://biolm.ai>`_
|
|
62
|
+
|
|
63
|
+
Install the package:
|
|
64
|
+
|
|
65
|
+
.. code-block:: bash
|
|
66
|
+
|
|
67
|
+
pip install biolmai
|
|
68
|
+
|
|
69
|
+
Basic usage:
|
|
70
|
+
|
|
71
|
+
.. code-block:: python
|
|
72
|
+
|
|
73
|
+
from biolmai import biolm
|
|
74
|
+
|
|
75
|
+
# Encode a single sequence
|
|
76
|
+
result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
|
|
77
|
+
|
|
78
|
+
# Predict a batch of sequences
|
|
79
|
+
result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
|
|
80
|
+
|
|
81
|
+
# Write results to disk
|
|
82
|
+
biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
|
|
83
|
+
|
|
84
|
+
Asynchronous usage:
|
|
85
|
+
|
|
86
|
+
.. code-block:: python
|
|
87
|
+
|
|
88
|
+
from biolmai.client import BioLMApiClient
|
|
89
|
+
import asyncio
|
|
90
|
+
|
|
91
|
+
async def main():
|
|
92
|
+
model = BioLMApiClient("esmfold")
|
|
93
|
+
result = await model.predict(items=[{"sequence": "MDNELE"}])
|
|
94
|
+
print(result)
|
|
95
|
+
|
|
96
|
+
asyncio.run(main())
|
|
97
|
+
|
|
98
|
+
Overview
|
|
99
|
+
========
|
|
100
|
+
|
|
101
|
+
The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
|
|
102
|
+
|
|
103
|
+
Main features:
|
|
104
|
+
|
|
105
|
+
- High-level BioLM constructor for quick requests
|
|
106
|
+
- Sync and async interfaces
|
|
107
|
+
- Automatic or custom rate limiting/throttling
|
|
108
|
+
- Schema-based batch size detection
|
|
109
|
+
- Flexible input formats (single key + list, or list of dicts)
|
|
110
|
+
- Low memory usage via generators
|
|
111
|
+
- Flexible error handling (raise, continue, or stop on error)
|
|
112
|
+
- Universal HTTP client for both sync and async
|
|
113
|
+
|
|
114
|
+
Features
|
|
115
|
+
========
|
|
116
|
+
|
|
117
|
+
- **High-level constructor**: Instantly run an API call with a single line.
|
|
118
|
+
- **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
|
|
119
|
+
- **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
|
|
120
|
+
- **Schema-based batching**: Automatically queries API for max batch size.
|
|
121
|
+
- **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
|
|
122
|
+
- **Low memory**: Uses generators for validation and batching.
|
|
123
|
+
- **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
|
|
124
|
+
- **Disk output**: Write results as JSONL to disk.
|
|
125
|
+
- **Universal HTTP client**: Efficient for both sync and async.
|
|
126
|
+
- **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
|
|
127
|
+
|
|
128
|
+
**Example endpoints and actions:**
|
|
129
|
+
|
|
130
|
+
- `esm2-8m/encode`: Embedding for protein sequences.
|
|
131
|
+
- `esmfold/predict`: Structure prediction for protein sequences.
|
|
132
|
+
- `progen2-oas/generate`: Sequence generation from a context string.
|
|
133
|
+
- `dnabert2/predict`: Masked prediction for protein sequences.
|
|
134
|
+
- `ablang2/encode`: Embeddings for paired-chain antibodies.
|
|
135
|
+
|
|
136
|
+
* Free software: Apache Software License 2.0
|
|
137
|
+
* Documentation: https://docs.biolm.ai
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
========
|
|
2
|
+
BioLM AI
|
|
3
|
+
========
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
.. image:: https://img.shields.io/pypi/v/biolmai.svg
|
|
7
|
+
:target: https://pypi.python.org/pypi/biolmai
|
|
8
|
+
|
|
9
|
+
.. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
|
|
10
|
+
:target: https://travis-ci.org/github/BioLM/py-biolm
|
|
11
|
+
|
|
12
|
+
.. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
|
|
13
|
+
:target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
|
|
14
|
+
:alt: Documentation Status
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
Python client and SDK for `BioLM <https://biolm.ai>`_
|
|
20
|
+
|
|
21
|
+
Install the package:
|
|
22
|
+
|
|
23
|
+
.. code-block:: bash
|
|
24
|
+
|
|
25
|
+
pip install biolmai
|
|
26
|
+
|
|
27
|
+
Basic usage:
|
|
28
|
+
|
|
29
|
+
.. code-block:: python
|
|
30
|
+
|
|
31
|
+
from biolmai import biolm
|
|
32
|
+
|
|
33
|
+
# Encode a single sequence
|
|
34
|
+
result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
|
|
35
|
+
|
|
36
|
+
# Predict a batch of sequences
|
|
37
|
+
result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
|
|
38
|
+
|
|
39
|
+
# Write results to disk
|
|
40
|
+
biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
|
|
41
|
+
|
|
42
|
+
Asynchronous usage:
|
|
43
|
+
|
|
44
|
+
.. code-block:: python
|
|
45
|
+
|
|
46
|
+
from biolmai.client import BioLMApiClient
|
|
47
|
+
import asyncio
|
|
48
|
+
|
|
49
|
+
async def main():
|
|
50
|
+
model = BioLMApiClient("esmfold")
|
|
51
|
+
result = await model.predict(items=[{"sequence": "MDNELE"}])
|
|
52
|
+
print(result)
|
|
53
|
+
|
|
54
|
+
asyncio.run(main())
|
|
55
|
+
|
|
56
|
+
Overview
|
|
57
|
+
========
|
|
58
|
+
|
|
59
|
+
The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
|
|
60
|
+
|
|
61
|
+
Main features:
|
|
62
|
+
|
|
63
|
+
- High-level BioLM constructor for quick requests
|
|
64
|
+
- Sync and async interfaces
|
|
65
|
+
- Automatic or custom rate limiting/throttling
|
|
66
|
+
- Schema-based batch size detection
|
|
67
|
+
- Flexible input formats (single key + list, or list of dicts)
|
|
68
|
+
- Low memory usage via generators
|
|
69
|
+
- Flexible error handling (raise, continue, or stop on error)
|
|
70
|
+
- Universal HTTP client for both sync and async
|
|
71
|
+
|
|
72
|
+
Features
|
|
73
|
+
========
|
|
74
|
+
|
|
75
|
+
- **High-level constructor**: Instantly run an API call with a single line.
|
|
76
|
+
- **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
|
|
77
|
+
- **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
|
|
78
|
+
- **Schema-based batching**: Automatically queries API for max batch size.
|
|
79
|
+
- **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
|
|
80
|
+
- **Low memory**: Uses generators for validation and batching.
|
|
81
|
+
- **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
|
|
82
|
+
- **Disk output**: Write results as JSONL to disk.
|
|
83
|
+
- **Universal HTTP client**: Efficient for both sync and async.
|
|
84
|
+
- **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
|
|
85
|
+
|
|
86
|
+
**Example endpoints and actions:**
|
|
87
|
+
|
|
88
|
+
- `esm2-8m/encode`: Embedding for protein sequences.
|
|
89
|
+
- `esmfold/predict`: Structure prediction for protein sequences.
|
|
90
|
+
- `progen2-oas/generate`: Sequence generation from a context string.
|
|
91
|
+
- `dnabert2/predict`: Masked prediction for protein sequences.
|
|
92
|
+
- `ablang2/encode`: Embeddings for paired-chain antibodies.
|
|
93
|
+
|
|
94
|
+
* Free software: Apache Software License 2.0
|
|
95
|
+
* Documentation: https://docs.biolm.ai
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Top-level package for BioLM AI."""
|
|
2
|
+
__author__ = """Nikhil Haas"""
|
|
3
|
+
__email__ = "nikhil@biolm.ai"
|
|
4
|
+
__version__ = '0.2.10'
|
|
5
|
+
|
|
6
|
+
from biolmai.client import BioLMApi, BioLMApiClient
|
|
7
|
+
from biolmai.biolmai import BioLM
|
|
8
|
+
from typing import Optional, Union, List, Any
|
|
9
|
+
|
|
10
|
+
__all__ = ['biolm']
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def biolm(
|
|
14
|
+
*,
|
|
15
|
+
entity: str,
|
|
16
|
+
action: str,
|
|
17
|
+
type: Optional[str] = None,
|
|
18
|
+
items: Union[Any, List[Any]],
|
|
19
|
+
params: Optional[dict] = None,
|
|
20
|
+
api_key: Optional[str] = None,
|
|
21
|
+
**kwargs
|
|
22
|
+
) -> Any:
|
|
23
|
+
"""Top-level convenience function that wraps the BioLM class and returns the result."""
|
|
24
|
+
return BioLM(entity=entity, action=action, type=type, items=items, params=params, api_key=api_key, **kwargs)
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"""References to API endpoints."""
|
|
2
|
+
import datetime
|
|
3
|
+
import inspect
|
|
4
|
+
import time
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
except ImportError:
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
import requests
|
|
14
|
+
from requests.adapters import HTTPAdapter
|
|
15
|
+
from requests.packages.urllib3.util.retry import Retry
|
|
16
|
+
|
|
17
|
+
import biolmai
|
|
18
|
+
import biolmai.auth
|
|
19
|
+
from biolmai.asynch import async_api_call_wrapper
|
|
20
|
+
from biolmai.biolmai import log
|
|
21
|
+
from biolmai.const import MULTIPROCESS_THREADS
|
|
22
|
+
from biolmai.payloads import INST_DAT_TXT, PARAMS_ITEMS, predict_resp_many_in_one_to_many_singles
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@lru_cache(maxsize=64)
|
|
26
|
+
def validate_endpoint_action(allowed_classes, method_name, api_class_name):
|
|
27
|
+
action_method_name = method_name.split(".")[-1]
|
|
28
|
+
if action_method_name not in allowed_classes:
|
|
29
|
+
err = "Only {} supported on {}"
|
|
30
|
+
err = err.format(list(allowed_classes), api_class_name)
|
|
31
|
+
raise AssertionError(err)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def text_validator(text, c):
|
|
35
|
+
"""Validate some text against a class-based validator, returning a string
|
|
36
|
+
if invalid, or None otherwise."""
|
|
37
|
+
try:
|
|
38
|
+
c(text)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
return str(e)
|
|
41
|
+
|
|
42
|
+
def combine_validation(x, y):
|
|
43
|
+
if x is None and y is None:
|
|
44
|
+
return None
|
|
45
|
+
elif isinstance(x, str) and y is None:
|
|
46
|
+
return x
|
|
47
|
+
elif x is None and isinstance(y, str):
|
|
48
|
+
return y
|
|
49
|
+
elif isinstance(x, str) and isinstance(y, str):
|
|
50
|
+
return f"{x}\n{y}"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def validate_action(action):
|
|
54
|
+
def validate(f):
|
|
55
|
+
def wrapper(*args, **kwargs):
|
|
56
|
+
# Get class instance at runtime, so you can access not just
|
|
57
|
+
# APIEndpoints, but any *parent* classes of that,
|
|
58
|
+
# like ESMFoldSinglechain.
|
|
59
|
+
class_obj_self = args[0]
|
|
60
|
+
try:
|
|
61
|
+
is_method = inspect.getfullargspec(f)[0][0] == "self"
|
|
62
|
+
except Exception:
|
|
63
|
+
is_method = False
|
|
64
|
+
|
|
65
|
+
# Is the function we decorated a class method?
|
|
66
|
+
if is_method:
|
|
67
|
+
name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
|
|
68
|
+
else:
|
|
69
|
+
name = f"{f.__module__}.{f.__name__}"
|
|
70
|
+
|
|
71
|
+
if is_method:
|
|
72
|
+
# Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
|
|
73
|
+
action_method_name = name.split(".")[-1]
|
|
74
|
+
validate_endpoint_action(
|
|
75
|
+
class_obj_self.action_class_strings,
|
|
76
|
+
action_method_name,
|
|
77
|
+
class_obj_self.__class__.__name__,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
input_data = args[1]
|
|
81
|
+
# Validate each row's text/input based on class attribute `seq_classes`
|
|
82
|
+
if action == "predict":
|
|
83
|
+
input_classes = class_obj_self.predict_input_classes
|
|
84
|
+
elif action == "encode":
|
|
85
|
+
input_classes = class_obj_self.encode_input_classes
|
|
86
|
+
elif action == "generate":
|
|
87
|
+
input_classes = class_obj_self.generate_input_classes
|
|
88
|
+
elif action == "transform":
|
|
89
|
+
input_classes = class_obj_self.transform_input_classes
|
|
90
|
+
for c in input_classes:
|
|
91
|
+
# Validate input data against regex
|
|
92
|
+
if class_obj_self.multiprocess_threads:
|
|
93
|
+
validation = input_data.text.apply(text_validator, args=(c,))
|
|
94
|
+
else:
|
|
95
|
+
validation = input_data.text.apply(text_validator, args=(c,))
|
|
96
|
+
if "validation" not in input_data.columns:
|
|
97
|
+
input_data["validation"] = validation
|
|
98
|
+
else:
|
|
99
|
+
# masking and loc may be more performant option
|
|
100
|
+
input_data["validation"] = input_data["validation"].combine(validation, combine_validation)
|
|
101
|
+
|
|
102
|
+
# Mark your batches, excluding invalid rows
|
|
103
|
+
valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
|
|
104
|
+
N = class_obj_self.batch_size # N rows will go per API request
|
|
105
|
+
# JOIN back, which is by index
|
|
106
|
+
if valid_dat.shape[0] != input_data.shape[0]:
|
|
107
|
+
valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
|
|
108
|
+
input_data = input_data.merge(
|
|
109
|
+
valid_dat.batch, left_index=True, right_index=True, how="left"
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
input_data["batch"] = np.arange(input_data.shape[0]) // N
|
|
113
|
+
res = f(class_obj_self, input_data, **kwargs)
|
|
114
|
+
return res
|
|
115
|
+
|
|
116
|
+
return wrapper
|
|
117
|
+
return validate
|
|
118
|
+
|
|
119
|
+
def convert_input(f):
|
|
120
|
+
def wrapper(*args, **kwargs):
|
|
121
|
+
# Get the user-input data argument to the decorated function
|
|
122
|
+
# class_obj_self = args[0]
|
|
123
|
+
input_data = args[1]
|
|
124
|
+
# Make sure we have expected input types
|
|
125
|
+
acceptable_inputs = (str, list, tuple, np.ndarray, pd.DataFrame)
|
|
126
|
+
if not isinstance(input_data, acceptable_inputs):
|
|
127
|
+
err = "Input must be one or many DNA or protein strings"
|
|
128
|
+
raise ValueError(err)
|
|
129
|
+
# Convert single-sequence input to list
|
|
130
|
+
if isinstance(input_data, str):
|
|
131
|
+
input_data = [input_data]
|
|
132
|
+
# Make sure we don't have a matrix
|
|
133
|
+
if isinstance(input_data, np.ndarray) and len(input_data.shape) > 1:
|
|
134
|
+
err = "Detected Numpy matrix - input a single vector or array"
|
|
135
|
+
raise AssertionError(err)
|
|
136
|
+
# Make sure we don't have a >=2D DF
|
|
137
|
+
if isinstance(input_data, pd.DataFrame) and len(input_data.shape) > 1:
|
|
138
|
+
err = "Detected Pandas DataFrame - input a single vector or Series"
|
|
139
|
+
raise AssertionError(err)
|
|
140
|
+
input_data = pd.DataFrame(input_data, columns=["text"])
|
|
141
|
+
return f(args[0], input_data, **kwargs)
|
|
142
|
+
|
|
143
|
+
return wrapper
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class APIEndpoint:
|
|
147
|
+
# Overwrite in parent classes as needed
|
|
148
|
+
batch_size = 3
|
|
149
|
+
params = None
|
|
150
|
+
action_classes = ()
|
|
151
|
+
api_version = 2
|
|
152
|
+
|
|
153
|
+
predict_input_key = "sequence"
|
|
154
|
+
encode_input_key = "sequence"
|
|
155
|
+
generate_input_key = "context"
|
|
156
|
+
|
|
157
|
+
predict_input_classes = ()
|
|
158
|
+
encode_input_classes = ()
|
|
159
|
+
generate_input_classes = ()
|
|
160
|
+
transform_input_classes = ()
|
|
161
|
+
|
|
162
|
+
def __init__(self, multiprocess_threads=None):
|
|
163
|
+
# Check for instance-specific threads, otherwise read from env var
|
|
164
|
+
if multiprocess_threads is not None:
|
|
165
|
+
self.multiprocess_threads = multiprocess_threads
|
|
166
|
+
else:
|
|
167
|
+
self.multiprocess_threads = MULTIPROCESS_THREADS # Could be False
|
|
168
|
+
# Get correct auth-like headers
|
|
169
|
+
self.auth_headers = biolmai.auth.get_user_auth_header()
|
|
170
|
+
self.action_class_strings = tuple(
|
|
171
|
+
[c.__name__.replace("Action", "").lower() for c in self.action_classes]
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def post_batches(self, dat, slug, action, payload_maker, resp_key, key="sequence", params=None):
|
|
175
|
+
keep_batches = dat.loc[~dat.batch.isnull(), ["text", "batch"]]
|
|
176
|
+
if keep_batches.shape[0] == 0:
|
|
177
|
+
pass # Do nothing - we made nice JSON errors to return in the DF
|
|
178
|
+
# err = "No inputs found following local validation"
|
|
179
|
+
# raise AssertionError(err)
|
|
180
|
+
if keep_batches.shape[0] > 0:
|
|
181
|
+
api_resps = async_api_call_wrapper(
|
|
182
|
+
keep_batches, slug, action, payload_maker, resp_key, api_version=self.api_version, key=key, params=params,
|
|
183
|
+
)
|
|
184
|
+
if isinstance(api_resps, pd.DataFrame):
|
|
185
|
+
batch_res = api_resps.explode("api_resp") # Should be lists of results
|
|
186
|
+
len_res = batch_res.shape[0]
|
|
187
|
+
else:
|
|
188
|
+
batch_res = pd.DataFrame({"api_resp": api_resps})
|
|
189
|
+
len_res = batch_res.shape[0]
|
|
190
|
+
orig_request_rows = keep_batches.shape[0]
|
|
191
|
+
# For 'generate' actions, models may return multiple results per item
|
|
192
|
+
# (e.g., hyper-mpnn with batch_size > 1), so skip the 1:1 check
|
|
193
|
+
if action != "generate" and len_res != orig_request_rows:
|
|
194
|
+
err = "Response rows ({}) mismatch with input rows ({})"
|
|
195
|
+
err = err.format(len_res, orig_request_rows)
|
|
196
|
+
raise AssertionError(err)
|
|
197
|
+
|
|
198
|
+
# Stack the results horizontally w/ original rows of batches
|
|
199
|
+
keep_batches["prev_idx"] = keep_batches.index
|
|
200
|
+
keep_batches.reset_index(drop=False, inplace=True)
|
|
201
|
+
batch_res.reset_index(drop=True, inplace=True)
|
|
202
|
+
keep_batches["api_resp"] = batch_res
|
|
203
|
+
keep_batches.set_index("prev_idx", inplace=True)
|
|
204
|
+
dat = dat.join(keep_batches.reindex(["api_resp"], axis=1))
|
|
205
|
+
else:
|
|
206
|
+
dat["api_resp"] = None
|
|
207
|
+
return dat
|
|
208
|
+
|
|
209
|
+
def unpack_local_validations(self, dat, response_key):
|
|
210
|
+
dat.loc[dat.api_resp.isnull(), "api_resp"] = (
|
|
211
|
+
dat.loc[~dat.validation.isnull(), "validation"]
|
|
212
|
+
.apply(
|
|
213
|
+
predict_resp_many_in_one_to_many_singles, args=(None, None, True, None), response_key=response_key
|
|
214
|
+
)
|
|
215
|
+
.explode()
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
return dat
|
|
219
|
+
|
|
220
|
+
@convert_input
|
|
221
|
+
@validate_action("predict")
|
|
222
|
+
def predict(self, dat, params=None):
|
|
223
|
+
if self.api_version == 1:
|
|
224
|
+
dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
|
|
225
|
+
dat = self.unpack_local_validations(dat, "predictions")
|
|
226
|
+
else:
|
|
227
|
+
dat = self.post_batches(dat, self.slug, "predict", PARAMS_ITEMS, "results", key=self.predict_input_key, params=params)
|
|
228
|
+
dat = self.unpack_local_validations(dat,"results")
|
|
229
|
+
return dat.api_resp.replace(np.nan, None).tolist()
|
|
230
|
+
|
|
231
|
+
def infer(self, dat, params=None):
|
|
232
|
+
return self.predict(dat, params)
|
|
233
|
+
|
|
234
|
+
@convert_input
|
|
235
|
+
@validate_action("transform") # api v1 legacy action
|
|
236
|
+
def transform(self, dat):
|
|
237
|
+
dat = self.post_batches(
|
|
238
|
+
dat, self.slug, "transform", INST_DAT_TXT, "predictions"
|
|
239
|
+
)
|
|
240
|
+
dat = self.unpack_local_validations(dat,"predictions")
|
|
241
|
+
return dat.api_resp.replace(np.nan, None).tolist()
|
|
242
|
+
|
|
243
|
+
@convert_input
|
|
244
|
+
@validate_action("encode")
|
|
245
|
+
def encode(self, dat, params=None):
|
|
246
|
+
|
|
247
|
+
dat = self.post_batches(dat, self.slug, "encode", PARAMS_ITEMS, "results", key=self.encode_input_key, params=params)
|
|
248
|
+
dat = self.unpack_local_validations(dat, "results")
|
|
249
|
+
return dat.api_resp.replace(np.nan, None).tolist()
|
|
250
|
+
|
|
251
|
+
@convert_input
|
|
252
|
+
@validate_action("generate")
|
|
253
|
+
def generate(self, dat, params=None):
|
|
254
|
+
if self.api_version == 1:
|
|
255
|
+
dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
|
|
256
|
+
dat = self.unpack_local_validations(dat, "predictions")
|
|
257
|
+
else:
|
|
258
|
+
dat = self.post_batches(dat, self.slug, "generate", PARAMS_ITEMS, "results", key=self.generate_input_key, params=params)
|
|
259
|
+
dat = self.unpack_local_validations(dat, "results")
|
|
260
|
+
|
|
261
|
+
return dat.api_resp.replace(np.nan, None).tolist()
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def retry_minutes(sess, URL, HEADERS, dat, timeout, mins):
|
|
265
|
+
"""Retry for N minutes."""
|
|
266
|
+
HEADERS.update({"Content-Type": "application/json"})
|
|
267
|
+
attempts, max_attempts = 0, 5
|
|
268
|
+
try:
|
|
269
|
+
now = datetime.datetime.now()
|
|
270
|
+
try_until = now + datetime.timedelta(minutes=mins)
|
|
271
|
+
while datetime.datetime.now() < try_until and attempts < max_attempts:
|
|
272
|
+
response = None
|
|
273
|
+
try:
|
|
274
|
+
log.info(f"Trying {datetime.datetime.now()}")
|
|
275
|
+
response = sess.post(URL, headers=HEADERS, data=dat, timeout=timeout)
|
|
276
|
+
if response.status_code not in (400, 404):
|
|
277
|
+
response.raise_for_status()
|
|
278
|
+
if "error" in response.json():
|
|
279
|
+
raise ValueError(response.json().dumps())
|
|
280
|
+
else:
|
|
281
|
+
break
|
|
282
|
+
except Exception as e:
|
|
283
|
+
log.warning(e)
|
|
284
|
+
if response:
|
|
285
|
+
log.warning(response.text)
|
|
286
|
+
time.sleep(5) # Wait 5 seconds between tries
|
|
287
|
+
attempts += 1
|
|
288
|
+
if response is None:
|
|
289
|
+
err = "Got Nonetype response"
|
|
290
|
+
raise ValueError(err)
|
|
291
|
+
elif "Server Error" in response.text:
|
|
292
|
+
err = "Got Server Error"
|
|
293
|
+
raise ValueError(err)
|
|
294
|
+
except Exception:
|
|
295
|
+
return response
|
|
296
|
+
return response
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def requests_retry_session(
|
|
300
|
+
retries=3,
|
|
301
|
+
backoff_factor=0.3,
|
|
302
|
+
status_forcelist=None,
|
|
303
|
+
session=None,
|
|
304
|
+
):
|
|
305
|
+
if status_forcelist is None:
|
|
306
|
+
status_forcelist = list(range(400, 599))
|
|
307
|
+
session = session or requests.Session()
|
|
308
|
+
retry = Retry(
|
|
309
|
+
total=retries,
|
|
310
|
+
read=retries,
|
|
311
|
+
connect=retries,
|
|
312
|
+
backoff_factor=backoff_factor,
|
|
313
|
+
status_forcelist=status_forcelist,
|
|
314
|
+
)
|
|
315
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
316
|
+
session.mount("http://", adapter)
|
|
317
|
+
session.mount("https://", adapter)
|
|
318
|
+
return session
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
class PredictAction:
|
|
322
|
+
def __str__(self):
|
|
323
|
+
return "PredictAction"
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
class GenerateAction:
|
|
327
|
+
def __str__(self):
|
|
328
|
+
return "GenerateAction"
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class TransformAction:
|
|
332
|
+
def __str__(self):
|
|
333
|
+
return "TransformAction"
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class EncodeAction:
|
|
337
|
+
def __str__(self):
|
|
338
|
+
return "EncodeAction"
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
class ExplainAction:
|
|
342
|
+
def __str__(self):
|
|
343
|
+
return "ExplainAction"
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class SimilarityAction:
|
|
347
|
+
def __str__(self):
|
|
348
|
+
return "SimilarityAction"
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
class FinetuneAction:
|
|
352
|
+
def __str__(self):
|
|
353
|
+
return "FinetuneAction"
|