biolmai 0.1.8__tar.gz → 0.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolmai-0.2.10/PKG-INFO +137 -0
- biolmai-0.2.10/README.rst +95 -0
- biolmai-0.2.10/biolmai/__init__.py +24 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/biolmai/api.py +130 -87
- {biolmai-0.1.8 → biolmai-0.2.10}/biolmai/asynch.py +16 -8
- biolmai-0.2.10/biolmai/auth.py +1043 -0
- biolmai-0.2.10/biolmai/biolmai.py +122 -0
- biolmai-0.2.10/biolmai/cli.py +115 -0
- biolmai-0.2.10/biolmai/client.py +741 -0
- biolmai-0.2.10/biolmai/cls.py +176 -0
- biolmai-0.2.10/biolmai/const.py +66 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/biolmai/payloads.py +13 -2
- biolmai-0.2.10/biolmai/seqflow_auth.py +200 -0
- biolmai-0.2.10/biolmai/validate.py +159 -0
- biolmai-0.2.10/biolmai.egg-info/PKG-INFO +137 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/SOURCES.txt +28 -19
- biolmai-0.2.10/biolmai.egg-info/entry_points.txt +5 -0
- biolmai-0.2.10/biolmai.egg-info/requires.txt +21 -0
- biolmai-0.2.10/docs/_static/biolm_logomark_transparent.png +0 -0
- biolmai-0.2.10/docs/_static/biolm_logomark_transparent_for_dark.png +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/biolmai.rst +16 -0
- biolmai-0.2.10/docs/conf.py +289 -0
- biolmai-0.2.10/docs/index.rst +48 -0
- biolmai-0.2.10/docs/python-client/api_biolm.rst +49 -0
- biolmai-0.2.10/docs/python-client/api_client.rst +74 -0
- biolmai-0.2.10/docs/python-client/async_sync.rst +157 -0
- biolmai-0.2.10/docs/python-client/authentication.rst +137 -0
- biolmai-0.2.10/docs/python-client/batching.rst +187 -0
- biolmai-0.2.10/docs/python-client/disk_output.rst +66 -0
- biolmai-0.2.10/docs/python-client/error_handling.rst +173 -0
- biolmai-0.2.10/docs/python-client/faq.rst +38 -0
- biolmai-0.2.10/docs/python-client/features.rst +22 -0
- biolmai-0.2.10/docs/python-client/index.rst +29 -0
- {biolmai-0.1.8/docs/python-client/get_started → biolmai-0.2.10/docs/python-client}/installation.rst +2 -2
- biolmai-0.2.10/docs/python-client/overview.rst +18 -0
- biolmai-0.2.10/docs/python-client/quickstart.rst +28 -0
- biolmai-0.2.10/docs/python-client/rate_limiting.rst +97 -0
- biolmai-0.2.10/docs/python-client/usage.rst +84 -0
- biolmai-0.2.10/pyproject.toml +99 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/setup.cfg +5 -1
- {biolmai-0.1.8 → biolmai-0.2.10}/setup.py +27 -9
- biolmai-0.2.10/tests/test_abatch_calls.py +208 -0
- biolmai-0.2.10/tests/test_aclient.py +296 -0
- biolmai-0.2.10/tests/test_batch_error_retry.py +80 -0
- biolmai-0.2.10/tests/test_batch_errors.py +47 -0
- biolmai-0.2.10/tests/test_biolmai.py +211 -0
- biolmai-0.2.10/tests/test_client.py +379 -0
- biolmai-0.2.10/tests/test_integration.py +132 -0
- biolmai-0.2.10/tests/test_max_items.py +183 -0
- biolmai-0.2.10/tests/test_oauth_auth.py +286 -0
- biolmai-0.2.10/tests/test_rate_limit.py +182 -0
- biolmai-0.2.10/tests/test_schemas.py +30 -0
- biolmai-0.1.8/PKG-INFO +0 -70
- biolmai-0.1.8/README.rst +0 -37
- biolmai-0.1.8/biolmai/__init__.py +0 -7
- biolmai-0.1.8/biolmai/auth.py +0 -173
- biolmai-0.1.8/biolmai/biolmai.py +0 -5
- biolmai-0.1.8/biolmai/cli.py +0 -75
- biolmai-0.1.8/biolmai/cls.py +0 -97
- biolmai-0.1.8/biolmai/const.py +0 -29
- biolmai-0.1.8/biolmai/validate.py +0 -134
- biolmai-0.1.8/biolmai.egg-info/PKG-INFO +0 -70
- biolmai-0.1.8/biolmai.egg-info/entry_points.txt +0 -2
- biolmai-0.1.8/biolmai.egg-info/requires.txt +0 -4
- biolmai-0.1.8/docs/conf.py +0 -163
- biolmai-0.1.8/docs/index.rst +0 -107
- biolmai-0.1.8/docs/model-docs/DNABERT.rst +0 -640
- biolmai-0.1.8/docs/model-docs/ESM-1v.rst +0 -362
- biolmai-0.1.8/docs/model-docs/ESM2_Embeddings.rst +0 -242
- biolmai-0.1.8/docs/model-docs/ESMFold.rst +0 -252
- biolmai-0.1.8/docs/model-docs/ESM_InverseFold.rst +0 -278
- biolmai-0.1.8/docs/model-docs/ProtGPT2.rst +0 -609
- biolmai-0.1.8/docs/model-docs/ProteInfer_EC.rst +0 -249
- biolmai-0.1.8/docs/model-docs/ProteInfer_GO.rst +0 -329
- biolmai-0.1.8/docs/model-docs/img/book_icon.png +0 -0
- biolmai-0.1.8/docs/model-docs/img/esmfold_perf.png +0 -0
- biolmai-0.1.8/docs/model-docs/index.rst +0 -13
- biolmai-0.1.8/docs/model-docs/progen2/ProGen2_BFD90.rst +0 -251
- biolmai-0.1.8/docs/model-docs/progen2/ProGen2_Medium.rst +0 -248
- biolmai-0.1.8/docs/model-docs/progen2/ProGen2_OAS.rst +0 -246
- biolmai-0.1.8/docs/model-docs/progen2/index.rst +0 -10
- biolmai-0.1.8/docs/python-client/get_started/authorization.rst +0 -9
- biolmai-0.1.8/docs/python-client/get_started/quickstart.rst +0 -15
- biolmai-0.1.8/docs/python-client/index.rst +0 -18
- biolmai-0.1.8/docs/python-client/usage.rst +0 -7
- biolmai-0.1.8/pyproject.toml +0 -44
- biolmai-0.1.8/tests/test_biolmai.py +0 -263
- {biolmai-0.1.8 → biolmai-0.2.10}/AUTHORS.rst +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/CONTRIBUTING.rst +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/HISTORY.rst +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/LICENSE +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/MANIFEST.in +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/biolmai/ltc.py +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/dependency_links.txt +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/not-zip-safe +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/top_level.txt +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/Makefile +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/api_reference_icon.png +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/biolm_docs_logo_dark.png +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/biolm_docs_logo_light.png +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/chat_agents_icon.png +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/jupyter_notebooks_icon.png +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/model_docs_icon.png +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/python_sdk_icon.png +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/tutorials_icon.png +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/make.bat +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/modules.rst +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/docs/tutorials_use_cases/notebooks.rst +0 -0
- {biolmai-0.1.8 → biolmai-0.2.10}/tests/__init__.py +0 -0
biolmai-0.2.10/PKG-INFO
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: biolmai
|
|
3
|
+
Version: 0.2.10
|
|
4
|
+
Summary: BioLM Python client
|
|
5
|
+
Home-page: https://github.com/BioLM/py-biolm
|
|
6
|
+
Author: BioLM
|
|
7
|
+
Author-email: BioLM <support@biolm.ai>
|
|
8
|
+
License: Apache Software License 2.0
|
|
9
|
+
Keywords: biolmai
|
|
10
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Natural Language :: English
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Requires-Python: >=3.7
|
|
22
|
+
Description-Content-Type: text/x-rst
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
License-File: AUTHORS.rst
|
|
25
|
+
Requires-Dist: httpx>=0.23.0
|
|
26
|
+
Requires-Dist: httpcore
|
|
27
|
+
Requires-Dist: Click>=6.0
|
|
28
|
+
Requires-Dist: requests
|
|
29
|
+
Requires-Dist: aiodns
|
|
30
|
+
Requires-Dist: synchronicity>=0.5.0; python_version >= "3.9"
|
|
31
|
+
Requires-Dist: synchronicity<0.5.0; python_version < "3.9"
|
|
32
|
+
Requires-Dist: typing_extensions; python_version < "3.9"
|
|
33
|
+
Requires-Dist: aiohttp<=3.8.6; python_version < "3.12"
|
|
34
|
+
Requires-Dist: aiohttp>=3.9.0; python_version >= "3.12"
|
|
35
|
+
Requires-Dist: async-lru
|
|
36
|
+
Requires-Dist: aiofiles
|
|
37
|
+
Requires-Dist: cryptography
|
|
38
|
+
Dynamic: author
|
|
39
|
+
Dynamic: home-page
|
|
40
|
+
Dynamic: license-file
|
|
41
|
+
Dynamic: requires-python
|
|
42
|
+
|
|
43
|
+
========
|
|
44
|
+
BioLM AI
|
|
45
|
+
========
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
.. image:: https://img.shields.io/pypi/v/biolmai.svg
|
|
49
|
+
:target: https://pypi.python.org/pypi/biolmai
|
|
50
|
+
|
|
51
|
+
.. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
|
|
52
|
+
:target: https://travis-ci.org/github/BioLM/py-biolm
|
|
53
|
+
|
|
54
|
+
.. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
|
|
55
|
+
:target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
|
|
56
|
+
:alt: Documentation Status
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
Python client and SDK for `BioLM <https://biolm.ai>`_
|
|
62
|
+
|
|
63
|
+
Install the package:
|
|
64
|
+
|
|
65
|
+
.. code-block:: bash
|
|
66
|
+
|
|
67
|
+
pip install biolmai
|
|
68
|
+
|
|
69
|
+
Basic usage:
|
|
70
|
+
|
|
71
|
+
.. code-block:: python
|
|
72
|
+
|
|
73
|
+
from biolmai import biolm
|
|
74
|
+
|
|
75
|
+
# Encode a single sequence
|
|
76
|
+
result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
|
|
77
|
+
|
|
78
|
+
# Predict a batch of sequences
|
|
79
|
+
result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
|
|
80
|
+
|
|
81
|
+
# Write results to disk
|
|
82
|
+
biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
|
|
83
|
+
|
|
84
|
+
Asynchronous usage:
|
|
85
|
+
|
|
86
|
+
.. code-block:: python
|
|
87
|
+
|
|
88
|
+
from biolmai.client import BioLMApiClient
|
|
89
|
+
import asyncio
|
|
90
|
+
|
|
91
|
+
async def main():
|
|
92
|
+
model = BioLMApiClient("esmfold")
|
|
93
|
+
result = await model.predict(items=[{"sequence": "MDNELE"}])
|
|
94
|
+
print(result)
|
|
95
|
+
|
|
96
|
+
asyncio.run(main())
|
|
97
|
+
|
|
98
|
+
Overview
|
|
99
|
+
========
|
|
100
|
+
|
|
101
|
+
The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
|
|
102
|
+
|
|
103
|
+
Main features:
|
|
104
|
+
|
|
105
|
+
- High-level BioLM constructor for quick requests
|
|
106
|
+
- Sync and async interfaces
|
|
107
|
+
- Automatic or custom rate limiting/throttling
|
|
108
|
+
- Schema-based batch size detection
|
|
109
|
+
- Flexible input formats (single key + list, or list of dicts)
|
|
110
|
+
- Low memory usage via generators
|
|
111
|
+
- Flexible error handling (raise, continue, or stop on error)
|
|
112
|
+
- Universal HTTP client for both sync and async
|
|
113
|
+
|
|
114
|
+
Features
|
|
115
|
+
========
|
|
116
|
+
|
|
117
|
+
- **High-level constructor**: Instantly run an API call with a single line.
|
|
118
|
+
- **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
|
|
119
|
+
- **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
|
|
120
|
+
- **Schema-based batching**: Automatically queries API for max batch size.
|
|
121
|
+
- **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
|
|
122
|
+
- **Low memory**: Uses generators for validation and batching.
|
|
123
|
+
- **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
|
|
124
|
+
- **Disk output**: Write results as JSONL to disk.
|
|
125
|
+
- **Universal HTTP client**: Efficient for both sync and async.
|
|
126
|
+
- **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
|
|
127
|
+
|
|
128
|
+
**Example endpoints and actions:**
|
|
129
|
+
|
|
130
|
+
- `esm2-8m/encode`: Embedding for protein sequences.
|
|
131
|
+
- `esmfold/predict`: Structure prediction for protein sequences.
|
|
132
|
+
- `progen2-oas/generate`: Sequence generation from a context string.
|
|
133
|
+
- `dnabert2/predict`: Masked prediction for protein sequences.
|
|
134
|
+
- `ablang2/encode`: Embeddings for paired-chain antibodies.
|
|
135
|
+
|
|
136
|
+
* Free software: Apache Software License 2.0
|
|
137
|
+
* Documentation: https://docs.biolm.ai
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
========
|
|
2
|
+
BioLM AI
|
|
3
|
+
========
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
.. image:: https://img.shields.io/pypi/v/biolmai.svg
|
|
7
|
+
:target: https://pypi.python.org/pypi/biolmai
|
|
8
|
+
|
|
9
|
+
.. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
|
|
10
|
+
:target: https://travis-ci.org/github/BioLM/py-biolm
|
|
11
|
+
|
|
12
|
+
.. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
|
|
13
|
+
:target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
|
|
14
|
+
:alt: Documentation Status
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
Python client and SDK for `BioLM <https://biolm.ai>`_
|
|
20
|
+
|
|
21
|
+
Install the package:
|
|
22
|
+
|
|
23
|
+
.. code-block:: bash
|
|
24
|
+
|
|
25
|
+
pip install biolmai
|
|
26
|
+
|
|
27
|
+
Basic usage:
|
|
28
|
+
|
|
29
|
+
.. code-block:: python
|
|
30
|
+
|
|
31
|
+
from biolmai import biolm
|
|
32
|
+
|
|
33
|
+
# Encode a single sequence
|
|
34
|
+
result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
|
|
35
|
+
|
|
36
|
+
# Predict a batch of sequences
|
|
37
|
+
result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
|
|
38
|
+
|
|
39
|
+
# Write results to disk
|
|
40
|
+
biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
|
|
41
|
+
|
|
42
|
+
Asynchronous usage:
|
|
43
|
+
|
|
44
|
+
.. code-block:: python
|
|
45
|
+
|
|
46
|
+
from biolmai.client import BioLMApiClient
|
|
47
|
+
import asyncio
|
|
48
|
+
|
|
49
|
+
async def main():
|
|
50
|
+
model = BioLMApiClient("esmfold")
|
|
51
|
+
result = await model.predict(items=[{"sequence": "MDNELE"}])
|
|
52
|
+
print(result)
|
|
53
|
+
|
|
54
|
+
asyncio.run(main())
|
|
55
|
+
|
|
56
|
+
Overview
|
|
57
|
+
========
|
|
58
|
+
|
|
59
|
+
The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
|
|
60
|
+
|
|
61
|
+
Main features:
|
|
62
|
+
|
|
63
|
+
- High-level BioLM constructor for quick requests
|
|
64
|
+
- Sync and async interfaces
|
|
65
|
+
- Automatic or custom rate limiting/throttling
|
|
66
|
+
- Schema-based batch size detection
|
|
67
|
+
- Flexible input formats (single key + list, or list of dicts)
|
|
68
|
+
- Low memory usage via generators
|
|
69
|
+
- Flexible error handling (raise, continue, or stop on error)
|
|
70
|
+
- Universal HTTP client for both sync and async
|
|
71
|
+
|
|
72
|
+
Features
|
|
73
|
+
========
|
|
74
|
+
|
|
75
|
+
- **High-level constructor**: Instantly run an API call with a single line.
|
|
76
|
+
- **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
|
|
77
|
+
- **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
|
|
78
|
+
- **Schema-based batching**: Automatically queries API for max batch size.
|
|
79
|
+
- **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
|
|
80
|
+
- **Low memory**: Uses generators for validation and batching.
|
|
81
|
+
- **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
|
|
82
|
+
- **Disk output**: Write results as JSONL to disk.
|
|
83
|
+
- **Universal HTTP client**: Efficient for both sync and async.
|
|
84
|
+
- **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
|
|
85
|
+
|
|
86
|
+
**Example endpoints and actions:**
|
|
87
|
+
|
|
88
|
+
- `esm2-8m/encode`: Embedding for protein sequences.
|
|
89
|
+
- `esmfold/predict`: Structure prediction for protein sequences.
|
|
90
|
+
- `progen2-oas/generate`: Sequence generation from a context string.
|
|
91
|
+
- `dnabert2/predict`: Masked prediction for protein sequences.
|
|
92
|
+
- `ablang2/encode`: Embeddings for paired-chain antibodies.
|
|
93
|
+
|
|
94
|
+
* Free software: Apache Software License 2.0
|
|
95
|
+
* Documentation: https://docs.biolm.ai
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Top-level package for BioLM AI."""
|
|
2
|
+
__author__ = """Nikhil Haas"""
|
|
3
|
+
__email__ = "nikhil@biolm.ai"
|
|
4
|
+
__version__ = '0.2.10'
|
|
5
|
+
|
|
6
|
+
from biolmai.client import BioLMApi, BioLMApiClient
|
|
7
|
+
from biolmai.biolmai import BioLM
|
|
8
|
+
from typing import Optional, Union, List, Any
|
|
9
|
+
|
|
10
|
+
__all__ = ['biolm']
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def biolm(
|
|
14
|
+
*,
|
|
15
|
+
entity: str,
|
|
16
|
+
action: str,
|
|
17
|
+
type: Optional[str] = None,
|
|
18
|
+
items: Union[Any, List[Any]],
|
|
19
|
+
params: Optional[dict] = None,
|
|
20
|
+
api_key: Optional[str] = None,
|
|
21
|
+
**kwargs
|
|
22
|
+
) -> Any:
|
|
23
|
+
"""Top-level convenience function that wraps the BioLM class and returns the result."""
|
|
24
|
+
return BioLM(entity=entity, action=action, type=type, items=items, params=params, api_key=api_key, **kwargs)
|
|
@@ -4,8 +4,12 @@ import inspect
|
|
|
4
4
|
import time
|
|
5
5
|
from functools import lru_cache
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
import
|
|
7
|
+
try:
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
except ImportError:
|
|
11
|
+
pass
|
|
12
|
+
|
|
9
13
|
import requests
|
|
10
14
|
from requests.adapters import HTTPAdapter
|
|
11
15
|
from requests.packages.urllib3.util.retry import Retry
|
|
@@ -15,7 +19,7 @@ import biolmai.auth
|
|
|
15
19
|
from biolmai.asynch import async_api_call_wrapper
|
|
16
20
|
from biolmai.biolmai import log
|
|
17
21
|
from biolmai.const import MULTIPROCESS_THREADS
|
|
18
|
-
from biolmai.payloads import INST_DAT_TXT, predict_resp_many_in_one_to_many_singles
|
|
22
|
+
from biolmai.payloads import INST_DAT_TXT, PARAMS_ITEMS, predict_resp_many_in_one_to_many_singles
|
|
19
23
|
|
|
20
24
|
|
|
21
25
|
@lru_cache(maxsize=64)
|
|
@@ -35,65 +39,82 @@ def text_validator(text, c):
|
|
|
35
39
|
except Exception as e:
|
|
36
40
|
return str(e)
|
|
37
41
|
|
|
42
|
+
def combine_validation(x, y):
|
|
43
|
+
if x is None and y is None:
|
|
44
|
+
return None
|
|
45
|
+
elif isinstance(x, str) and y is None:
|
|
46
|
+
return x
|
|
47
|
+
elif x is None and isinstance(y, str):
|
|
48
|
+
return y
|
|
49
|
+
elif isinstance(x, str) and isinstance(y, str):
|
|
50
|
+
return f"{x}\n{y}"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def validate_action(action):
|
|
54
|
+
def validate(f):
|
|
55
|
+
def wrapper(*args, **kwargs):
|
|
56
|
+
# Get class instance at runtime, so you can access not just
|
|
57
|
+
# APIEndpoints, but any *parent* classes of that,
|
|
58
|
+
# like ESMFoldSinglechain.
|
|
59
|
+
class_obj_self = args[0]
|
|
60
|
+
try:
|
|
61
|
+
is_method = inspect.getfullargspec(f)[0][0] == "self"
|
|
62
|
+
except Exception:
|
|
63
|
+
is_method = False
|
|
38
64
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
# APIEndpoints, but any *parent* classes of that,
|
|
43
|
-
# like ESMFoldSinglechain.
|
|
44
|
-
class_obj_self = args[0]
|
|
45
|
-
try:
|
|
46
|
-
is_method = inspect.getfullargspec(f)[0][0] == "self"
|
|
47
|
-
except Exception:
|
|
48
|
-
is_method = False
|
|
49
|
-
|
|
50
|
-
# Is the function we decorated a class method?
|
|
51
|
-
if is_method:
|
|
52
|
-
name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
|
|
53
|
-
else:
|
|
54
|
-
name = f"{f.__module__}.{f.__name__}"
|
|
55
|
-
|
|
56
|
-
if is_method:
|
|
57
|
-
# Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
|
|
58
|
-
action_method_name = name.split(".")[-1]
|
|
59
|
-
validate_endpoint_action(
|
|
60
|
-
class_obj_self.action_class_strings,
|
|
61
|
-
action_method_name,
|
|
62
|
-
class_obj_self.__class__.__name__,
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
input_data = args[1]
|
|
66
|
-
# Validate each row's text/input based on class attribute `seq_classes`
|
|
67
|
-
for c in class_obj_self.seq_classes:
|
|
68
|
-
# Validate input data against regex
|
|
69
|
-
if class_obj_self.multiprocess_threads:
|
|
70
|
-
validation = input_data.text.apply(text_validator, args=(c,))
|
|
65
|
+
# Is the function we decorated a class method?
|
|
66
|
+
if is_method:
|
|
67
|
+
name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
|
|
71
68
|
else:
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
69
|
+
name = f"{f.__module__}.{f.__name__}"
|
|
70
|
+
|
|
71
|
+
if is_method:
|
|
72
|
+
# Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
|
|
73
|
+
action_method_name = name.split(".")[-1]
|
|
74
|
+
validate_endpoint_action(
|
|
75
|
+
class_obj_self.action_class_strings,
|
|
76
|
+
action_method_name,
|
|
77
|
+
class_obj_self.__class__.__name__,
|
|
78
78
|
)
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
80
|
+
input_data = args[1]
|
|
81
|
+
# Validate each row's text/input based on class attribute `seq_classes`
|
|
82
|
+
if action == "predict":
|
|
83
|
+
input_classes = class_obj_self.predict_input_classes
|
|
84
|
+
elif action == "encode":
|
|
85
|
+
input_classes = class_obj_self.encode_input_classes
|
|
86
|
+
elif action == "generate":
|
|
87
|
+
input_classes = class_obj_self.generate_input_classes
|
|
88
|
+
elif action == "transform":
|
|
89
|
+
input_classes = class_obj_self.transform_input_classes
|
|
90
|
+
for c in input_classes:
|
|
91
|
+
# Validate input data against regex
|
|
92
|
+
if class_obj_self.multiprocess_threads:
|
|
93
|
+
validation = input_data.text.apply(text_validator, args=(c,))
|
|
94
|
+
else:
|
|
95
|
+
validation = input_data.text.apply(text_validator, args=(c,))
|
|
96
|
+
if "validation" not in input_data.columns:
|
|
97
|
+
input_data["validation"] = validation
|
|
98
|
+
else:
|
|
99
|
+
# masking and loc may be more performant option
|
|
100
|
+
input_data["validation"] = input_data["validation"].combine(validation, combine_validation)
|
|
101
|
+
|
|
102
|
+
# Mark your batches, excluding invalid rows
|
|
103
|
+
valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
|
|
104
|
+
N = class_obj_self.batch_size # N rows will go per API request
|
|
105
|
+
# JOIN back, which is by index
|
|
106
|
+
if valid_dat.shape[0] != input_data.shape[0]:
|
|
107
|
+
valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
|
|
108
|
+
input_data = input_data.merge(
|
|
109
|
+
valid_dat.batch, left_index=True, right_index=True, how="left"
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
input_data["batch"] = np.arange(input_data.shape[0]) // N
|
|
113
|
+
res = f(class_obj_self, input_data, **kwargs)
|
|
114
|
+
return res
|
|
96
115
|
|
|
116
|
+
return wrapper
|
|
117
|
+
return validate
|
|
97
118
|
|
|
98
119
|
def convert_input(f):
|
|
99
120
|
def wrapper(*args, **kwargs):
|
|
@@ -123,7 +144,20 @@ def convert_input(f):
|
|
|
123
144
|
|
|
124
145
|
|
|
125
146
|
class APIEndpoint:
|
|
126
|
-
|
|
147
|
+
# Overwrite in parent classes as needed
|
|
148
|
+
batch_size = 3
|
|
149
|
+
params = None
|
|
150
|
+
action_classes = ()
|
|
151
|
+
api_version = 2
|
|
152
|
+
|
|
153
|
+
predict_input_key = "sequence"
|
|
154
|
+
encode_input_key = "sequence"
|
|
155
|
+
generate_input_key = "context"
|
|
156
|
+
|
|
157
|
+
predict_input_classes = ()
|
|
158
|
+
encode_input_classes = ()
|
|
159
|
+
generate_input_classes = ()
|
|
160
|
+
transform_input_classes = ()
|
|
127
161
|
|
|
128
162
|
def __init__(self, multiprocess_threads=None):
|
|
129
163
|
# Check for instance-specific threads, otherwise read from env var
|
|
@@ -137,7 +171,7 @@ class APIEndpoint:
|
|
|
137
171
|
[c.__name__.replace("Action", "").lower() for c in self.action_classes]
|
|
138
172
|
)
|
|
139
173
|
|
|
140
|
-
def post_batches(self, dat, slug, action, payload_maker, resp_key):
|
|
174
|
+
def post_batches(self, dat, slug, action, payload_maker, resp_key, key="sequence", params=None):
|
|
141
175
|
keep_batches = dat.loc[~dat.batch.isnull(), ["text", "batch"]]
|
|
142
176
|
if keep_batches.shape[0] == 0:
|
|
143
177
|
pass # Do nothing - we made nice JSON errors to return in the DF
|
|
@@ -145,7 +179,7 @@ class APIEndpoint:
|
|
|
145
179
|
# raise AssertionError(err)
|
|
146
180
|
if keep_batches.shape[0] > 0:
|
|
147
181
|
api_resps = async_api_call_wrapper(
|
|
148
|
-
keep_batches, slug, action, payload_maker, resp_key
|
|
182
|
+
keep_batches, slug, action, payload_maker, resp_key, api_version=self.api_version, key=key, params=params,
|
|
149
183
|
)
|
|
150
184
|
if isinstance(api_resps, pd.DataFrame):
|
|
151
185
|
batch_res = api_resps.explode("api_resp") # Should be lists of results
|
|
@@ -154,7 +188,9 @@ class APIEndpoint:
|
|
|
154
188
|
batch_res = pd.DataFrame({"api_resp": api_resps})
|
|
155
189
|
len_res = batch_res.shape[0]
|
|
156
190
|
orig_request_rows = keep_batches.shape[0]
|
|
157
|
-
|
|
191
|
+
# For 'generate' actions, models may return multiple results per item
|
|
192
|
+
# (e.g., hyper-mpnn with batch_size > 1), so skip the 1:1 check
|
|
193
|
+
if action != "generate" and len_res != orig_request_rows:
|
|
158
194
|
err = "Response rows ({}) mismatch with input rows ({})"
|
|
159
195
|
err = err.format(len_res, orig_request_rows)
|
|
160
196
|
raise AssertionError(err)
|
|
@@ -170,11 +206,11 @@ class APIEndpoint:
|
|
|
170
206
|
dat["api_resp"] = None
|
|
171
207
|
return dat
|
|
172
208
|
|
|
173
|
-
def unpack_local_validations(self, dat):
|
|
209
|
+
def unpack_local_validations(self, dat, response_key):
|
|
174
210
|
dat.loc[dat.api_resp.isnull(), "api_resp"] = (
|
|
175
211
|
dat.loc[~dat.validation.isnull(), "validation"]
|
|
176
212
|
.apply(
|
|
177
|
-
predict_resp_many_in_one_to_many_singles, args=(None, None, True, None)
|
|
213
|
+
predict_resp_many_in_one_to_many_singles, args=(None, None, True, None), response_key=response_key
|
|
178
214
|
)
|
|
179
215
|
.explode()
|
|
180
216
|
)
|
|
@@ -182,39 +218,46 @@ class APIEndpoint:
|
|
|
182
218
|
return dat
|
|
183
219
|
|
|
184
220
|
@convert_input
|
|
185
|
-
@
|
|
186
|
-
def predict(self, dat):
|
|
187
|
-
|
|
188
|
-
|
|
221
|
+
@validate_action("predict")
|
|
222
|
+
def predict(self, dat, params=None):
|
|
223
|
+
if self.api_version == 1:
|
|
224
|
+
dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
|
|
225
|
+
dat = self.unpack_local_validations(dat, "predictions")
|
|
226
|
+
else:
|
|
227
|
+
dat = self.post_batches(dat, self.slug, "predict", PARAMS_ITEMS, "results", key=self.predict_input_key, params=params)
|
|
228
|
+
dat = self.unpack_local_validations(dat,"results")
|
|
189
229
|
return dat.api_resp.replace(np.nan, None).tolist()
|
|
190
230
|
|
|
191
|
-
def infer(self, dat):
|
|
192
|
-
return self.predict(dat)
|
|
231
|
+
def infer(self, dat, params=None):
|
|
232
|
+
return self.predict(dat, params)
|
|
193
233
|
|
|
194
234
|
@convert_input
|
|
195
|
-
@
|
|
235
|
+
@validate_action("transform") # api v1 legacy action
|
|
196
236
|
def transform(self, dat):
|
|
197
237
|
dat = self.post_batches(
|
|
198
238
|
dat, self.slug, "transform", INST_DAT_TXT, "predictions"
|
|
199
239
|
)
|
|
200
|
-
dat = self.unpack_local_validations(dat)
|
|
240
|
+
dat = self.unpack_local_validations(dat,"predictions")
|
|
201
241
|
return dat.api_resp.replace(np.nan, None).tolist()
|
|
202
242
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
# dat = self.unpack_local_validations(dat)
|
|
211
|
-
# return dat.api_resp.replace(np.nan, None).tolist()
|
|
243
|
+
@convert_input
|
|
244
|
+
@validate_action("encode")
|
|
245
|
+
def encode(self, dat, params=None):
|
|
246
|
+
|
|
247
|
+
dat = self.post_batches(dat, self.slug, "encode", PARAMS_ITEMS, "results", key=self.encode_input_key, params=params)
|
|
248
|
+
dat = self.unpack_local_validations(dat, "results")
|
|
249
|
+
return dat.api_resp.replace(np.nan, None).tolist()
|
|
212
250
|
|
|
213
251
|
@convert_input
|
|
214
|
-
@
|
|
215
|
-
def generate(self, dat):
|
|
216
|
-
|
|
217
|
-
|
|
252
|
+
@validate_action("generate")
|
|
253
|
+
def generate(self, dat, params=None):
|
|
254
|
+
if self.api_version == 1:
|
|
255
|
+
dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
|
|
256
|
+
dat = self.unpack_local_validations(dat, "predictions")
|
|
257
|
+
else:
|
|
258
|
+
dat = self.post_batches(dat, self.slug, "generate", PARAMS_ITEMS, "results", key=self.generate_input_key, params=params)
|
|
259
|
+
dat = self.unpack_local_validations(dat, "results")
|
|
260
|
+
|
|
218
261
|
return dat.api_resp.replace(np.nan, None).tolist()
|
|
219
262
|
|
|
220
263
|
|
|
@@ -290,9 +333,9 @@ class TransformAction:
|
|
|
290
333
|
return "TransformAction"
|
|
291
334
|
|
|
292
335
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
336
|
+
class EncodeAction:
|
|
337
|
+
def __str__(self):
|
|
338
|
+
return "EncodeAction"
|
|
296
339
|
|
|
297
340
|
|
|
298
341
|
class ExplainAction:
|
|
@@ -7,7 +7,7 @@ import aiohttp.resolver
|
|
|
7
7
|
from aiohttp import ClientSession
|
|
8
8
|
|
|
9
9
|
from biolmai.auth import get_user_auth_header
|
|
10
|
-
from biolmai.const import BASE_API_URL, MULTIPROCESS_THREADS
|
|
10
|
+
from biolmai.const import BASE_API_URL, BASE_API_URL_V1, MULTIPROCESS_THREADS
|
|
11
11
|
|
|
12
12
|
aiohttp.resolver.DefaultResolver = aiohttp.resolver.AsyncResolver
|
|
13
13
|
|
|
@@ -146,11 +146,14 @@ async def async_main(urls, concurrency) -> list:
|
|
|
146
146
|
return await get_all(urls, concurrency)
|
|
147
147
|
|
|
148
148
|
|
|
149
|
-
async def async_api_calls(model_name, action, headers, payloads, response_key=None):
|
|
149
|
+
async def async_api_calls(model_name, action, headers, payloads, response_key=None, api_version=2):
|
|
150
150
|
"""Hit an arbitrary BioLM model inference API."""
|
|
151
151
|
# Normally would POST multiple sequences at once for greater efficiency,
|
|
152
152
|
# but for simplicity sake will do one at at time right now
|
|
153
|
-
|
|
153
|
+
if api_version == 1:
|
|
154
|
+
url = f"{BASE_API_URL_V1}/models/{model_name}/{action}/"
|
|
155
|
+
else:
|
|
156
|
+
url = f"{BASE_API_URL}/{model_name}/{action}/"
|
|
154
157
|
|
|
155
158
|
if not isinstance(payloads, (list, dict)):
|
|
156
159
|
err = "API request payload must be a list or dict, got {}"
|
|
@@ -180,15 +183,20 @@ async def async_api_calls(model_name, action, headers, payloads, response_key=No
|
|
|
180
183
|
# return response
|
|
181
184
|
|
|
182
185
|
|
|
183
|
-
def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key):
|
|
186
|
+
def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key, api_version=2, key="sequence", params=None):
|
|
184
187
|
"""Wrap API calls to assist with sequence validation as a pre-cursor to
|
|
185
188
|
each API call.
|
|
186
189
|
"""
|
|
187
190
|
model_name = slug
|
|
188
191
|
# payload = payload_maker(grouped_df)
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
+
if api_version == 1:
|
|
193
|
+
init_ploads = grouped_df.groupby("batch").apply(
|
|
194
|
+
payload_maker, include_batch_size=True
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
init_ploads = grouped_df.groupby("batch").apply(
|
|
198
|
+
payload_maker, key=key, params=params, include_batch_size=True
|
|
199
|
+
)
|
|
192
200
|
ploads = init_ploads.to_list()
|
|
193
201
|
init_ploads = init_ploads.to_frame(name="pload")
|
|
194
202
|
init_ploads["batch"] = init_ploads.index
|
|
@@ -208,7 +216,7 @@ def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key
|
|
|
208
216
|
# "https://python.org",
|
|
209
217
|
# ]
|
|
210
218
|
# concurrency = 3
|
|
211
|
-
api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key))
|
|
219
|
+
api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key, api_version))
|
|
212
220
|
api_resp = [item for sublist in api_resp for item in sublist]
|
|
213
221
|
api_resp = sorted(api_resp, key=lambda x: x["batch_id"])
|
|
214
222
|
# print(api_resp)
|