biolmai 0.1.8__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. biolmai-0.2.10/PKG-INFO +137 -0
  2. biolmai-0.2.10/README.rst +95 -0
  3. biolmai-0.2.10/biolmai/__init__.py +24 -0
  4. {biolmai-0.1.8 → biolmai-0.2.10}/biolmai/api.py +130 -87
  5. {biolmai-0.1.8 → biolmai-0.2.10}/biolmai/asynch.py +16 -8
  6. biolmai-0.2.10/biolmai/auth.py +1043 -0
  7. biolmai-0.2.10/biolmai/biolmai.py +122 -0
  8. biolmai-0.2.10/biolmai/cli.py +115 -0
  9. biolmai-0.2.10/biolmai/client.py +741 -0
  10. biolmai-0.2.10/biolmai/cls.py +176 -0
  11. biolmai-0.2.10/biolmai/const.py +66 -0
  12. {biolmai-0.1.8 → biolmai-0.2.10}/biolmai/payloads.py +13 -2
  13. biolmai-0.2.10/biolmai/seqflow_auth.py +200 -0
  14. biolmai-0.2.10/biolmai/validate.py +159 -0
  15. biolmai-0.2.10/biolmai.egg-info/PKG-INFO +137 -0
  16. {biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/SOURCES.txt +28 -19
  17. biolmai-0.2.10/biolmai.egg-info/entry_points.txt +5 -0
  18. biolmai-0.2.10/biolmai.egg-info/requires.txt +21 -0
  19. biolmai-0.2.10/docs/_static/biolm_logomark_transparent.png +0 -0
  20. biolmai-0.2.10/docs/_static/biolm_logomark_transparent_for_dark.png +0 -0
  21. {biolmai-0.1.8 → biolmai-0.2.10}/docs/biolmai.rst +16 -0
  22. biolmai-0.2.10/docs/conf.py +289 -0
  23. biolmai-0.2.10/docs/index.rst +48 -0
  24. biolmai-0.2.10/docs/python-client/api_biolm.rst +49 -0
  25. biolmai-0.2.10/docs/python-client/api_client.rst +74 -0
  26. biolmai-0.2.10/docs/python-client/async_sync.rst +157 -0
  27. biolmai-0.2.10/docs/python-client/authentication.rst +137 -0
  28. biolmai-0.2.10/docs/python-client/batching.rst +187 -0
  29. biolmai-0.2.10/docs/python-client/disk_output.rst +66 -0
  30. biolmai-0.2.10/docs/python-client/error_handling.rst +173 -0
  31. biolmai-0.2.10/docs/python-client/faq.rst +38 -0
  32. biolmai-0.2.10/docs/python-client/features.rst +22 -0
  33. biolmai-0.2.10/docs/python-client/index.rst +29 -0
  34. {biolmai-0.1.8/docs/python-client/get_started → biolmai-0.2.10/docs/python-client}/installation.rst +2 -2
  35. biolmai-0.2.10/docs/python-client/overview.rst +18 -0
  36. biolmai-0.2.10/docs/python-client/quickstart.rst +28 -0
  37. biolmai-0.2.10/docs/python-client/rate_limiting.rst +97 -0
  38. biolmai-0.2.10/docs/python-client/usage.rst +84 -0
  39. biolmai-0.2.10/pyproject.toml +99 -0
  40. {biolmai-0.1.8 → biolmai-0.2.10}/setup.cfg +5 -1
  41. {biolmai-0.1.8 → biolmai-0.2.10}/setup.py +27 -9
  42. biolmai-0.2.10/tests/test_abatch_calls.py +208 -0
  43. biolmai-0.2.10/tests/test_aclient.py +296 -0
  44. biolmai-0.2.10/tests/test_batch_error_retry.py +80 -0
  45. biolmai-0.2.10/tests/test_batch_errors.py +47 -0
  46. biolmai-0.2.10/tests/test_biolmai.py +211 -0
  47. biolmai-0.2.10/tests/test_client.py +379 -0
  48. biolmai-0.2.10/tests/test_integration.py +132 -0
  49. biolmai-0.2.10/tests/test_max_items.py +183 -0
  50. biolmai-0.2.10/tests/test_oauth_auth.py +286 -0
  51. biolmai-0.2.10/tests/test_rate_limit.py +182 -0
  52. biolmai-0.2.10/tests/test_schemas.py +30 -0
  53. biolmai-0.1.8/PKG-INFO +0 -70
  54. biolmai-0.1.8/README.rst +0 -37
  55. biolmai-0.1.8/biolmai/__init__.py +0 -7
  56. biolmai-0.1.8/biolmai/auth.py +0 -173
  57. biolmai-0.1.8/biolmai/biolmai.py +0 -5
  58. biolmai-0.1.8/biolmai/cli.py +0 -75
  59. biolmai-0.1.8/biolmai/cls.py +0 -97
  60. biolmai-0.1.8/biolmai/const.py +0 -29
  61. biolmai-0.1.8/biolmai/validate.py +0 -134
  62. biolmai-0.1.8/biolmai.egg-info/PKG-INFO +0 -70
  63. biolmai-0.1.8/biolmai.egg-info/entry_points.txt +0 -2
  64. biolmai-0.1.8/biolmai.egg-info/requires.txt +0 -4
  65. biolmai-0.1.8/docs/conf.py +0 -163
  66. biolmai-0.1.8/docs/index.rst +0 -107
  67. biolmai-0.1.8/docs/model-docs/DNABERT.rst +0 -640
  68. biolmai-0.1.8/docs/model-docs/ESM-1v.rst +0 -362
  69. biolmai-0.1.8/docs/model-docs/ESM2_Embeddings.rst +0 -242
  70. biolmai-0.1.8/docs/model-docs/ESMFold.rst +0 -252
  71. biolmai-0.1.8/docs/model-docs/ESM_InverseFold.rst +0 -278
  72. biolmai-0.1.8/docs/model-docs/ProtGPT2.rst +0 -609
  73. biolmai-0.1.8/docs/model-docs/ProteInfer_EC.rst +0 -249
  74. biolmai-0.1.8/docs/model-docs/ProteInfer_GO.rst +0 -329
  75. biolmai-0.1.8/docs/model-docs/img/book_icon.png +0 -0
  76. biolmai-0.1.8/docs/model-docs/img/esmfold_perf.png +0 -0
  77. biolmai-0.1.8/docs/model-docs/index.rst +0 -13
  78. biolmai-0.1.8/docs/model-docs/progen2/ProGen2_BFD90.rst +0 -251
  79. biolmai-0.1.8/docs/model-docs/progen2/ProGen2_Medium.rst +0 -248
  80. biolmai-0.1.8/docs/model-docs/progen2/ProGen2_OAS.rst +0 -246
  81. biolmai-0.1.8/docs/model-docs/progen2/index.rst +0 -10
  82. biolmai-0.1.8/docs/python-client/get_started/authorization.rst +0 -9
  83. biolmai-0.1.8/docs/python-client/get_started/quickstart.rst +0 -15
  84. biolmai-0.1.8/docs/python-client/index.rst +0 -18
  85. biolmai-0.1.8/docs/python-client/usage.rst +0 -7
  86. biolmai-0.1.8/pyproject.toml +0 -44
  87. biolmai-0.1.8/tests/test_biolmai.py +0 -263
  88. {biolmai-0.1.8 → biolmai-0.2.10}/AUTHORS.rst +0 -0
  89. {biolmai-0.1.8 → biolmai-0.2.10}/CONTRIBUTING.rst +0 -0
  90. {biolmai-0.1.8 → biolmai-0.2.10}/HISTORY.rst +0 -0
  91. {biolmai-0.1.8 → biolmai-0.2.10}/LICENSE +0 -0
  92. {biolmai-0.1.8 → biolmai-0.2.10}/MANIFEST.in +0 -0
  93. {biolmai-0.1.8 → biolmai-0.2.10}/biolmai/ltc.py +0 -0
  94. {biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/dependency_links.txt +0 -0
  95. {biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/not-zip-safe +0 -0
  96. {biolmai-0.1.8 → biolmai-0.2.10}/biolmai.egg-info/top_level.txt +0 -0
  97. {biolmai-0.1.8 → biolmai-0.2.10}/docs/Makefile +0 -0
  98. {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/api_reference_icon.png +0 -0
  99. {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/biolm_docs_logo_dark.png +0 -0
  100. {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/biolm_docs_logo_light.png +0 -0
  101. {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/chat_agents_icon.png +0 -0
  102. {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/jupyter_notebooks_icon.png +0 -0
  103. {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/model_docs_icon.png +0 -0
  104. {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/python_sdk_icon.png +0 -0
  105. {biolmai-0.1.8 → biolmai-0.2.10}/docs/_static/tutorials_icon.png +0 -0
  106. {biolmai-0.1.8 → biolmai-0.2.10}/docs/make.bat +0 -0
  107. {biolmai-0.1.8 → biolmai-0.2.10}/docs/modules.rst +0 -0
  108. {biolmai-0.1.8 → biolmai-0.2.10}/docs/tutorials_use_cases/notebooks.rst +0 -0
  109. {biolmai-0.1.8 → biolmai-0.2.10}/tests/__init__.py +0 -0
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.4
2
+ Name: biolmai
3
+ Version: 0.2.10
4
+ Summary: BioLM Python client
5
+ Home-page: https://github.com/BioLM/py-biolm
6
+ Author: BioLM
7
+ Author-email: BioLM <support@biolm.ai>
8
+ License: Apache Software License 2.0
9
+ Keywords: biolmai
10
+ Classifier: Development Status :: 2 - Pre-Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Natural Language :: English
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Requires-Python: >=3.7
22
+ Description-Content-Type: text/x-rst
23
+ License-File: LICENSE
24
+ License-File: AUTHORS.rst
25
+ Requires-Dist: httpx>=0.23.0
26
+ Requires-Dist: httpcore
27
+ Requires-Dist: Click>=6.0
28
+ Requires-Dist: requests
29
+ Requires-Dist: aiodns
30
+ Requires-Dist: synchronicity>=0.5.0; python_version >= "3.9"
31
+ Requires-Dist: synchronicity<0.5.0; python_version < "3.9"
32
+ Requires-Dist: typing_extensions; python_version < "3.9"
33
+ Requires-Dist: aiohttp<=3.8.6; python_version < "3.12"
34
+ Requires-Dist: aiohttp>=3.9.0; python_version >= "3.12"
35
+ Requires-Dist: async-lru
36
+ Requires-Dist: aiofiles
37
+ Requires-Dist: cryptography
38
+ Dynamic: author
39
+ Dynamic: home-page
40
+ Dynamic: license-file
41
+ Dynamic: requires-python
42
+
43
+ ========
44
+ BioLM AI
45
+ ========
46
+
47
+
48
+ .. image:: https://img.shields.io/pypi/v/biolmai.svg
49
+ :target: https://pypi.python.org/pypi/biolmai
50
+
51
+ .. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
52
+ :target: https://travis-ci.org/github/BioLM/py-biolm
53
+
54
+ .. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
55
+ :target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
56
+ :alt: Documentation Status
57
+
58
+
59
+
60
+
61
+ Python client and SDK for `BioLM <https://biolm.ai>`_
62
+
63
+ Install the package:
64
+
65
+ .. code-block:: bash
66
+
67
+ pip install biolmai
68
+
69
+ Basic usage:
70
+
71
+ .. code-block:: python
72
+
73
+ from biolmai import biolm
74
+
75
+ # Encode a single sequence
76
+ result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
77
+
78
+ # Predict a batch of sequences
79
+ result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
80
+
81
+ # Write results to disk
82
+ biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
83
+
84
+ Asynchronous usage:
85
+
86
+ .. code-block:: python
87
+
88
+ from biolmai.client import BioLMApiClient
89
+ import asyncio
90
+
91
+ async def main():
92
+ model = BioLMApiClient("esmfold")
93
+ result = await model.predict(items=[{"sequence": "MDNELE"}])
94
+ print(result)
95
+
96
+ asyncio.run(main())
97
+
98
+ Overview
99
+ ========
100
+
101
+ The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
102
+
103
+ Main features:
104
+
105
+ - High-level BioLM constructor for quick requests
106
+ - Sync and async interfaces
107
+ - Automatic or custom rate limiting/throttling
108
+ - Schema-based batch size detection
109
+ - Flexible input formats (single key + list, or list of dicts)
110
+ - Low memory usage via generators
111
+ - Flexible error handling (raise, continue, or stop on error)
112
+ - Universal HTTP client for both sync and async
113
+
114
+ Features
115
+ ========
116
+
117
+ - **High-level constructor**: Instantly run an API call with a single line.
118
+ - **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
119
+ - **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
120
+ - **Schema-based batching**: Automatically queries API for max batch size.
121
+ - **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
122
+ - **Low memory**: Uses generators for validation and batching.
123
+ - **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
124
+ - **Disk output**: Write results as JSONL to disk.
125
+ - **Universal HTTP client**: Efficient for both sync and async.
126
+ - **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
127
+
128
+ **Example endpoints and actions:**
129
+
130
+ - `esm2-8m/encode`: Embedding for protein sequences.
131
+ - `esmfold/predict`: Structure prediction for protein sequences.
132
+ - `progen2-oas/generate`: Sequence generation from a context string.
133
+ - `dnabert2/predict`: Masked prediction for protein sequences.
134
+ - `ablang2/encode`: Embeddings for paired-chain antibodies.
135
+
136
+ * Free software: Apache Software License 2.0
137
+ * Documentation: https://docs.biolm.ai
@@ -0,0 +1,95 @@
1
+ ========
2
+ BioLM AI
3
+ ========
4
+
5
+
6
+ .. image:: https://img.shields.io/pypi/v/biolmai.svg
7
+ :target: https://pypi.python.org/pypi/biolmai
8
+
9
+ .. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
10
+ :target: https://travis-ci.org/github/BioLM/py-biolm
11
+
12
+ .. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
13
+ :target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
14
+ :alt: Documentation Status
15
+
16
+
17
+
18
+
19
+ Python client and SDK for `BioLM <https://biolm.ai>`_
20
+
21
+ Install the package:
22
+
23
+ .. code-block:: bash
24
+
25
+ pip install biolmai
26
+
27
+ Basic usage:
28
+
29
+ .. code-block:: python
30
+
31
+ from biolmai import biolm
32
+
33
+ # Encode a single sequence
34
+ result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
35
+
36
+ # Predict a batch of sequences
37
+ result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
38
+
39
+ # Write results to disk
40
+ biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
41
+
42
+ Asynchronous usage:
43
+
44
+ .. code-block:: python
45
+
46
+ from biolmai.client import BioLMApiClient
47
+ import asyncio
48
+
49
+ async def main():
50
+ model = BioLMApiClient("esmfold")
51
+ result = await model.predict(items=[{"sequence": "MDNELE"}])
52
+ print(result)
53
+
54
+ asyncio.run(main())
55
+
56
+ Overview
57
+ ========
58
+
59
+ The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
60
+
61
+ Main features:
62
+
63
+ - High-level BioLM constructor for quick requests
64
+ - Sync and async interfaces
65
+ - Automatic or custom rate limiting/throttling
66
+ - Schema-based batch size detection
67
+ - Flexible input formats (single key + list, or list of dicts)
68
+ - Low memory usage via generators
69
+ - Flexible error handling (raise, continue, or stop on error)
70
+ - Universal HTTP client for both sync and async
71
+
72
+ Features
73
+ ========
74
+
75
+ - **High-level constructor**: Instantly run an API call with a single line.
76
+ - **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
77
+ - **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
78
+ - **Schema-based batching**: Automatically queries API for max batch size.
79
+ - **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
80
+ - **Low memory**: Uses generators for validation and batching.
81
+ - **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
82
+ - **Disk output**: Write results as JSONL to disk.
83
+ - **Universal HTTP client**: Efficient for both sync and async.
84
+ - **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
85
+
86
+ **Example endpoints and actions:**
87
+
88
+ - `esm2-8m/encode`: Embedding for protein sequences.
89
+ - `esmfold/predict`: Structure prediction for protein sequences.
90
+ - `progen2-oas/generate`: Sequence generation from a context string.
91
+ - `dnabert2/predict`: Masked prediction for protein sequences.
92
+ - `ablang2/encode`: Embeddings for paired-chain antibodies.
93
+
94
+ * Free software: Apache Software License 2.0
95
+ * Documentation: https://docs.biolm.ai
@@ -0,0 +1,24 @@
1
+ """Top-level package for BioLM AI."""
2
+ __author__ = """Nikhil Haas"""
3
+ __email__ = "nikhil@biolm.ai"
4
+ __version__ = '0.2.10'
5
+
6
+ from biolmai.client import BioLMApi, BioLMApiClient
7
+ from biolmai.biolmai import BioLM
8
+ from typing import Optional, Union, List, Any
9
+
10
+ __all__ = ['biolm']
11
+
12
+
13
+ def biolm(
14
+ *,
15
+ entity: str,
16
+ action: str,
17
+ type: Optional[str] = None,
18
+ items: Union[Any, List[Any]],
19
+ params: Optional[dict] = None,
20
+ api_key: Optional[str] = None,
21
+ **kwargs
22
+ ) -> Any:
23
+ """Top-level convenience function that wraps the BioLM class and returns the result."""
24
+ return BioLM(entity=entity, action=action, type=type, items=items, params=params, api_key=api_key, **kwargs)
@@ -4,8 +4,12 @@ import inspect
4
4
  import time
5
5
  from functools import lru_cache
6
6
 
7
- import numpy as np
8
- import pandas as pd
7
+ try:
8
+ import numpy as np
9
+ import pandas as pd
10
+ except ImportError:
11
+ pass
12
+
9
13
  import requests
10
14
  from requests.adapters import HTTPAdapter
11
15
  from requests.packages.urllib3.util.retry import Retry
@@ -15,7 +19,7 @@ import biolmai.auth
15
19
  from biolmai.asynch import async_api_call_wrapper
16
20
  from biolmai.biolmai import log
17
21
  from biolmai.const import MULTIPROCESS_THREADS
18
- from biolmai.payloads import INST_DAT_TXT, predict_resp_many_in_one_to_many_singles
22
+ from biolmai.payloads import INST_DAT_TXT, PARAMS_ITEMS, predict_resp_many_in_one_to_many_singles
19
23
 
20
24
 
21
25
  @lru_cache(maxsize=64)
@@ -35,65 +39,82 @@ def text_validator(text, c):
35
39
  except Exception as e:
36
40
  return str(e)
37
41
 
42
+ def combine_validation(x, y):
43
+ if x is None and y is None:
44
+ return None
45
+ elif isinstance(x, str) and y is None:
46
+ return x
47
+ elif x is None and isinstance(y, str):
48
+ return y
49
+ elif isinstance(x, str) and isinstance(y, str):
50
+ return f"{x}\n{y}"
51
+
52
+
53
+ def validate_action(action):
54
+ def validate(f):
55
+ def wrapper(*args, **kwargs):
56
+ # Get class instance at runtime, so you can access not just
57
+ # APIEndpoints, but any *parent* classes of that,
58
+ # like ESMFoldSinglechain.
59
+ class_obj_self = args[0]
60
+ try:
61
+ is_method = inspect.getfullargspec(f)[0][0] == "self"
62
+ except Exception:
63
+ is_method = False
38
64
 
39
- def validate(f):
40
- def wrapper(*args, **kwargs):
41
- # Get class instance at runtime, so you can access not just
42
- # APIEndpoints, but any *parent* classes of that,
43
- # like ESMFoldSinglechain.
44
- class_obj_self = args[0]
45
- try:
46
- is_method = inspect.getfullargspec(f)[0][0] == "self"
47
- except Exception:
48
- is_method = False
49
-
50
- # Is the function we decorated a class method?
51
- if is_method:
52
- name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
53
- else:
54
- name = f"{f.__module__}.{f.__name__}"
55
-
56
- if is_method:
57
- # Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
58
- action_method_name = name.split(".")[-1]
59
- validate_endpoint_action(
60
- class_obj_self.action_class_strings,
61
- action_method_name,
62
- class_obj_self.__class__.__name__,
63
- )
64
-
65
- input_data = args[1]
66
- # Validate each row's text/input based on class attribute `seq_classes`
67
- for c in class_obj_self.seq_classes:
68
- # Validate input data against regex
69
- if class_obj_self.multiprocess_threads:
70
- validation = input_data.text.apply(text_validator, args=(c,))
65
+ # Is the function we decorated a class method?
66
+ if is_method:
67
+ name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
71
68
  else:
72
- validation = input_data.text.apply(text_validator, args=(c,))
73
- if "validation" not in input_data.columns:
74
- input_data["validation"] = validation
75
- else:
76
- input_data["validation"] = input_data["validation"].str.cat(
77
- validation, sep="\n", na_rep=""
69
+ name = f"{f.__module__}.{f.__name__}"
70
+
71
+ if is_method:
72
+ # Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
73
+ action_method_name = name.split(".")[-1]
74
+ validate_endpoint_action(
75
+ class_obj_self.action_class_strings,
76
+ action_method_name,
77
+ class_obj_self.__class__.__name__,
78
78
  )
79
79
 
80
- # Mark your batches, excluding invalid rows
81
- valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
82
- N = class_obj_self.batch_size # N rows will go per API request
83
- # JOIN back, which is by index
84
- if valid_dat.shape[0] != input_data.shape[0]:
85
- valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
86
- input_data = input_data.merge(
87
- valid_dat.batch, left_index=True, right_index=True, how="left"
88
- )
89
- else:
90
- input_data["batch"] = np.arange(input_data.shape[0]) // N
91
-
92
- res = f(class_obj_self, input_data, **kwargs)
93
- return res
94
-
95
- return wrapper
80
+ input_data = args[1]
81
+ # Validate each row's text/input based on class attribute `seq_classes`
82
+ if action == "predict":
83
+ input_classes = class_obj_self.predict_input_classes
84
+ elif action == "encode":
85
+ input_classes = class_obj_self.encode_input_classes
86
+ elif action == "generate":
87
+ input_classes = class_obj_self.generate_input_classes
88
+ elif action == "transform":
89
+ input_classes = class_obj_self.transform_input_classes
90
+ for c in input_classes:
91
+ # Validate input data against regex
92
+ if class_obj_self.multiprocess_threads:
93
+ validation = input_data.text.apply(text_validator, args=(c,))
94
+ else:
95
+ validation = input_data.text.apply(text_validator, args=(c,))
96
+ if "validation" not in input_data.columns:
97
+ input_data["validation"] = validation
98
+ else:
99
+ # masking and loc may be more performant option
100
+ input_data["validation"] = input_data["validation"].combine(validation, combine_validation)
101
+
102
+ # Mark your batches, excluding invalid rows
103
+ valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
104
+ N = class_obj_self.batch_size # N rows will go per API request
105
+ # JOIN back, which is by index
106
+ if valid_dat.shape[0] != input_data.shape[0]:
107
+ valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
108
+ input_data = input_data.merge(
109
+ valid_dat.batch, left_index=True, right_index=True, how="left"
110
+ )
111
+ else:
112
+ input_data["batch"] = np.arange(input_data.shape[0]) // N
113
+ res = f(class_obj_self, input_data, **kwargs)
114
+ return res
96
115
 
116
+ return wrapper
117
+ return validate
97
118
 
98
119
  def convert_input(f):
99
120
  def wrapper(*args, **kwargs):
@@ -123,7 +144,20 @@ def convert_input(f):
123
144
 
124
145
 
125
146
  class APIEndpoint:
126
- batch_size = 3 # Overwrite in parent classes as needed
147
+ # Overwrite in parent classes as needed
148
+ batch_size = 3
149
+ params = None
150
+ action_classes = ()
151
+ api_version = 2
152
+
153
+ predict_input_key = "sequence"
154
+ encode_input_key = "sequence"
155
+ generate_input_key = "context"
156
+
157
+ predict_input_classes = ()
158
+ encode_input_classes = ()
159
+ generate_input_classes = ()
160
+ transform_input_classes = ()
127
161
 
128
162
  def __init__(self, multiprocess_threads=None):
129
163
  # Check for instance-specific threads, otherwise read from env var
@@ -137,7 +171,7 @@ class APIEndpoint:
137
171
  [c.__name__.replace("Action", "").lower() for c in self.action_classes]
138
172
  )
139
173
 
140
- def post_batches(self, dat, slug, action, payload_maker, resp_key):
174
+ def post_batches(self, dat, slug, action, payload_maker, resp_key, key="sequence", params=None):
141
175
  keep_batches = dat.loc[~dat.batch.isnull(), ["text", "batch"]]
142
176
  if keep_batches.shape[0] == 0:
143
177
  pass # Do nothing - we made nice JSON errors to return in the DF
@@ -145,7 +179,7 @@ class APIEndpoint:
145
179
  # raise AssertionError(err)
146
180
  if keep_batches.shape[0] > 0:
147
181
  api_resps = async_api_call_wrapper(
148
- keep_batches, slug, action, payload_maker, resp_key
182
+ keep_batches, slug, action, payload_maker, resp_key, api_version=self.api_version, key=key, params=params,
149
183
  )
150
184
  if isinstance(api_resps, pd.DataFrame):
151
185
  batch_res = api_resps.explode("api_resp") # Should be lists of results
@@ -154,7 +188,9 @@ class APIEndpoint:
154
188
  batch_res = pd.DataFrame({"api_resp": api_resps})
155
189
  len_res = batch_res.shape[0]
156
190
  orig_request_rows = keep_batches.shape[0]
157
- if len_res != orig_request_rows:
191
+ # For 'generate' actions, models may return multiple results per item
192
+ # (e.g., hyper-mpnn with batch_size > 1), so skip the 1:1 check
193
+ if action != "generate" and len_res != orig_request_rows:
158
194
  err = "Response rows ({}) mismatch with input rows ({})"
159
195
  err = err.format(len_res, orig_request_rows)
160
196
  raise AssertionError(err)
@@ -170,11 +206,11 @@ class APIEndpoint:
170
206
  dat["api_resp"] = None
171
207
  return dat
172
208
 
173
- def unpack_local_validations(self, dat):
209
+ def unpack_local_validations(self, dat, response_key):
174
210
  dat.loc[dat.api_resp.isnull(), "api_resp"] = (
175
211
  dat.loc[~dat.validation.isnull(), "validation"]
176
212
  .apply(
177
- predict_resp_many_in_one_to_many_singles, args=(None, None, True, None)
213
+ predict_resp_many_in_one_to_many_singles, args=(None, None, True, None), response_key=response_key
178
214
  )
179
215
  .explode()
180
216
  )
@@ -182,39 +218,46 @@ class APIEndpoint:
182
218
  return dat
183
219
 
184
220
  @convert_input
185
- @validate
186
- def predict(self, dat):
187
- dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
188
- dat = self.unpack_local_validations(dat)
221
+ @validate_action("predict")
222
+ def predict(self, dat, params=None):
223
+ if self.api_version == 1:
224
+ dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
225
+ dat = self.unpack_local_validations(dat, "predictions")
226
+ else:
227
+ dat = self.post_batches(dat, self.slug, "predict", PARAMS_ITEMS, "results", key=self.predict_input_key, params=params)
228
+ dat = self.unpack_local_validations(dat,"results")
189
229
  return dat.api_resp.replace(np.nan, None).tolist()
190
230
 
191
- def infer(self, dat):
192
- return self.predict(dat)
231
+ def infer(self, dat, params=None):
232
+ return self.predict(dat, params)
193
233
 
194
234
  @convert_input
195
- @validate
235
+ @validate_action("transform") # api v1 legacy action
196
236
  def transform(self, dat):
197
237
  dat = self.post_batches(
198
238
  dat, self.slug, "transform", INST_DAT_TXT, "predictions"
199
239
  )
200
- dat = self.unpack_local_validations(dat)
240
+ dat = self.unpack_local_validations(dat,"predictions")
201
241
  return dat.api_resp.replace(np.nan, None).tolist()
202
242
 
203
- # @convert_input
204
- # @validate
205
- # def encode(self, dat):
206
- # # NOTE: we defined this for the specific case of ESM2
207
- # # TODO: this will be need again in v2 of API contract
208
- # dat = self.post_batches(dat, self.slug, "transform",
209
- # INST_DAT_TXT, "embeddings")
210
- # dat = self.unpack_local_validations(dat)
211
- # return dat.api_resp.replace(np.nan, None).tolist()
243
+ @convert_input
244
+ @validate_action("encode")
245
+ def encode(self, dat, params=None):
246
+
247
+ dat = self.post_batches(dat, self.slug, "encode", PARAMS_ITEMS, "results", key=self.encode_input_key, params=params)
248
+ dat = self.unpack_local_validations(dat, "results")
249
+ return dat.api_resp.replace(np.nan, None).tolist()
212
250
 
213
251
  @convert_input
214
- @validate
215
- def generate(self, dat):
216
- dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
217
- dat = self.unpack_local_validations(dat)
252
+ @validate_action("generate")
253
+ def generate(self, dat, params=None):
254
+ if self.api_version == 1:
255
+ dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
256
+ dat = self.unpack_local_validations(dat, "predictions")
257
+ else:
258
+ dat = self.post_batches(dat, self.slug, "generate", PARAMS_ITEMS, "results", key=self.generate_input_key, params=params)
259
+ dat = self.unpack_local_validations(dat, "results")
260
+
218
261
  return dat.api_resp.replace(np.nan, None).tolist()
219
262
 
220
263
 
@@ -290,9 +333,9 @@ class TransformAction:
290
333
  return "TransformAction"
291
334
 
292
335
 
293
- # class EncodeAction:
294
- # def __str__(self):
295
- # return "EncodeAction"
336
+ class EncodeAction:
337
+ def __str__(self):
338
+ return "EncodeAction"
296
339
 
297
340
 
298
341
  class ExplainAction:
@@ -7,7 +7,7 @@ import aiohttp.resolver
7
7
  from aiohttp import ClientSession
8
8
 
9
9
  from biolmai.auth import get_user_auth_header
10
- from biolmai.const import BASE_API_URL, MULTIPROCESS_THREADS
10
+ from biolmai.const import BASE_API_URL, BASE_API_URL_V1, MULTIPROCESS_THREADS
11
11
 
12
12
  aiohttp.resolver.DefaultResolver = aiohttp.resolver.AsyncResolver
13
13
 
@@ -146,11 +146,14 @@ async def async_main(urls, concurrency) -> list:
146
146
  return await get_all(urls, concurrency)
147
147
 
148
148
 
149
- async def async_api_calls(model_name, action, headers, payloads, response_key=None):
149
+ async def async_api_calls(model_name, action, headers, payloads, response_key=None, api_version=2):
150
150
  """Hit an arbitrary BioLM model inference API."""
151
151
  # Normally would POST multiple sequences at once for greater efficiency,
152
152
  # but for simplicity sake will do one at at time right now
153
- url = f"{BASE_API_URL}/models/{model_name}/{action}/"
153
+ if api_version == 1:
154
+ url = f"{BASE_API_URL_V1}/models/{model_name}/{action}/"
155
+ else:
156
+ url = f"{BASE_API_URL}/{model_name}/{action}/"
154
157
 
155
158
  if not isinstance(payloads, (list, dict)):
156
159
  err = "API request payload must be a list or dict, got {}"
@@ -180,15 +183,20 @@ async def async_api_calls(model_name, action, headers, payloads, response_key=No
180
183
  # return response
181
184
 
182
185
 
183
- def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key):
186
+ def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key, api_version=2, key="sequence", params=None):
184
187
  """Wrap API calls to assist with sequence validation as a pre-cursor to
185
188
  each API call.
186
189
  """
187
190
  model_name = slug
188
191
  # payload = payload_maker(grouped_df)
189
- init_ploads = grouped_df.groupby("batch").apply(
190
- payload_maker, include_batch_size=True
191
- )
192
+ if api_version == 1:
193
+ init_ploads = grouped_df.groupby("batch").apply(
194
+ payload_maker, include_batch_size=True
195
+ )
196
+ else:
197
+ init_ploads = grouped_df.groupby("batch").apply(
198
+ payload_maker, key=key, params=params, include_batch_size=True
199
+ )
192
200
  ploads = init_ploads.to_list()
193
201
  init_ploads = init_ploads.to_frame(name="pload")
194
202
  init_ploads["batch"] = init_ploads.index
@@ -208,7 +216,7 @@ def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key
208
216
  # "https://python.org",
209
217
  # ]
210
218
  # concurrency = 3
211
- api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key))
219
+ api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key, api_version))
212
220
  api_resp = [item for sublist in api_resp for item in sublist]
213
221
  api_resp = sorted(api_resp, key=lambda x: x["batch_id"])
214
222
  # print(api_resp)