biolmai 0.1.2__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. biolmai-0.2.10/PKG-INFO +137 -0
  2. biolmai-0.2.10/README.rst +95 -0
  3. biolmai-0.2.10/biolmai/__init__.py +24 -0
  4. biolmai-0.2.10/biolmai/api.py +353 -0
  5. biolmai-0.2.10/biolmai/asynch.py +230 -0
  6. biolmai-0.2.10/biolmai/auth.py +1043 -0
  7. biolmai-0.2.10/biolmai/biolmai.py +122 -0
  8. biolmai-0.2.10/biolmai/cli.py +115 -0
  9. biolmai-0.2.10/biolmai/client.py +741 -0
  10. biolmai-0.2.10/biolmai/cls.py +176 -0
  11. biolmai-0.2.10/biolmai/const.py +66 -0
  12. biolmai-0.2.10/biolmai/payloads.py +44 -0
  13. biolmai-0.2.10/biolmai/seqflow_auth.py +200 -0
  14. biolmai-0.2.10/biolmai/validate.py +159 -0
  15. biolmai-0.2.10/biolmai.egg-info/PKG-INFO +137 -0
  16. biolmai-0.2.10/biolmai.egg-info/SOURCES.txt +73 -0
  17. biolmai-0.2.10/biolmai.egg-info/entry_points.txt +5 -0
  18. biolmai-0.2.10/biolmai.egg-info/requires.txt +21 -0
  19. biolmai-0.2.10/docs/_static/api_reference_icon.png +0 -0
  20. biolmai-0.2.10/docs/_static/biolm_docs_logo_dark.png +0 -0
  21. biolmai-0.2.10/docs/_static/biolm_docs_logo_light.png +0 -0
  22. biolmai-0.2.10/docs/_static/biolm_logomark_transparent.png +0 -0
  23. biolmai-0.2.10/docs/_static/biolm_logomark_transparent_for_dark.png +0 -0
  24. biolmai-0.2.10/docs/_static/chat_agents_icon.png +0 -0
  25. biolmai-0.2.10/docs/_static/jupyter_notebooks_icon.png +0 -0
  26. biolmai-0.2.10/docs/_static/model_docs_icon.png +0 -0
  27. biolmai-0.2.10/docs/_static/python_sdk_icon.png +0 -0
  28. biolmai-0.2.10/docs/_static/tutorials_icon.png +0 -0
  29. {biolmai-0.1.2 → biolmai-0.2.10}/docs/biolmai.rst +32 -0
  30. biolmai-0.2.10/docs/conf.py +289 -0
  31. biolmai-0.2.10/docs/index.rst +48 -0
  32. biolmai-0.2.10/docs/python-client/api_biolm.rst +49 -0
  33. biolmai-0.2.10/docs/python-client/api_client.rst +74 -0
  34. biolmai-0.2.10/docs/python-client/async_sync.rst +157 -0
  35. biolmai-0.2.10/docs/python-client/authentication.rst +137 -0
  36. biolmai-0.2.10/docs/python-client/batching.rst +187 -0
  37. biolmai-0.2.10/docs/python-client/disk_output.rst +66 -0
  38. biolmai-0.2.10/docs/python-client/error_handling.rst +173 -0
  39. biolmai-0.2.10/docs/python-client/faq.rst +38 -0
  40. biolmai-0.2.10/docs/python-client/features.rst +22 -0
  41. biolmai-0.2.10/docs/python-client/index.rst +29 -0
  42. {biolmai-0.1.2/docs → biolmai-0.2.10/docs/python-client}/installation.rst +2 -2
  43. biolmai-0.2.10/docs/python-client/overview.rst +18 -0
  44. biolmai-0.2.10/docs/python-client/quickstart.rst +28 -0
  45. biolmai-0.2.10/docs/python-client/rate_limiting.rst +97 -0
  46. biolmai-0.2.10/docs/python-client/usage.rst +84 -0
  47. biolmai-0.2.10/docs/tutorials_use_cases/notebooks.rst +9 -0
  48. biolmai-0.2.10/pyproject.toml +99 -0
  49. {biolmai-0.1.2 → biolmai-0.2.10}/setup.cfg +11 -2
  50. biolmai-0.2.10/setup.py +71 -0
  51. biolmai-0.2.10/tests/test_abatch_calls.py +208 -0
  52. biolmai-0.2.10/tests/test_aclient.py +296 -0
  53. biolmai-0.2.10/tests/test_batch_error_retry.py +80 -0
  54. biolmai-0.2.10/tests/test_batch_errors.py +47 -0
  55. biolmai-0.2.10/tests/test_biolmai.py +211 -0
  56. biolmai-0.2.10/tests/test_client.py +379 -0
  57. biolmai-0.2.10/tests/test_integration.py +132 -0
  58. biolmai-0.2.10/tests/test_max_items.py +183 -0
  59. biolmai-0.2.10/tests/test_oauth_auth.py +286 -0
  60. biolmai-0.2.10/tests/test_rate_limit.py +182 -0
  61. biolmai-0.2.10/tests/test_schemas.py +30 -0
  62. biolmai-0.1.2/PKG-INFO +0 -70
  63. biolmai-0.1.2/README.rst +0 -37
  64. biolmai-0.1.2/biolmai/__init__.py +0 -15
  65. biolmai-0.1.2/biolmai/api.py +0 -291
  66. biolmai-0.1.2/biolmai/async.py +0 -6
  67. biolmai-0.1.2/biolmai/auth.py +0 -127
  68. biolmai-0.1.2/biolmai/biolmai.py +0 -153
  69. biolmai-0.1.2/biolmai/cli.py +0 -67
  70. biolmai-0.1.2/biolmai/cls.py +0 -1
  71. biolmai-0.1.2/biolmai/const.py +0 -13
  72. biolmai-0.1.2/biolmai/payloads.py +0 -6
  73. biolmai-0.1.2/biolmai/validate.py +0 -107
  74. biolmai-0.1.2/biolmai.egg-info/PKG-INFO +0 -70
  75. biolmai-0.1.2/biolmai.egg-info/SOURCES.txt +0 -40
  76. biolmai-0.1.2/biolmai.egg-info/entry_points.txt +0 -2
  77. biolmai-0.1.2/biolmai.egg-info/requires.txt +0 -6
  78. biolmai-0.1.2/docs/authors.rst +0 -1
  79. biolmai-0.1.2/docs/conf.py +0 -168
  80. biolmai-0.1.2/docs/contributing.rst +0 -1
  81. biolmai-0.1.2/docs/history.rst +0 -1
  82. biolmai-0.1.2/docs/index.rst +0 -20
  83. biolmai-0.1.2/docs/readme.rst +0 -1
  84. biolmai-0.1.2/docs/usage.rst +0 -7
  85. biolmai-0.1.2/setup.py +0 -52
  86. biolmai-0.1.2/tests/test_biolmai.py +0 -217
  87. {biolmai-0.1.2 → biolmai-0.2.10}/AUTHORS.rst +0 -0
  88. {biolmai-0.1.2 → biolmai-0.2.10}/CONTRIBUTING.rst +0 -0
  89. {biolmai-0.1.2 → biolmai-0.2.10}/HISTORY.rst +0 -0
  90. {biolmai-0.1.2 → biolmai-0.2.10}/LICENSE +0 -0
  91. {biolmai-0.1.2 → biolmai-0.2.10}/MANIFEST.in +0 -0
  92. {biolmai-0.1.2 → biolmai-0.2.10}/biolmai/ltc.py +0 -0
  93. {biolmai-0.1.2 → biolmai-0.2.10}/biolmai.egg-info/dependency_links.txt +0 -0
  94. {biolmai-0.1.2 → biolmai-0.2.10}/biolmai.egg-info/not-zip-safe +0 -0
  95. {biolmai-0.1.2 → biolmai-0.2.10}/biolmai.egg-info/top_level.txt +0 -0
  96. {biolmai-0.1.2 → biolmai-0.2.10}/docs/Makefile +0 -0
  97. {biolmai-0.1.2 → biolmai-0.2.10}/docs/make.bat +0 -0
  98. {biolmai-0.1.2 → biolmai-0.2.10}/docs/modules.rst +0 -0
  99. {biolmai-0.1.2 → biolmai-0.2.10}/tests/__init__.py +0 -0
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.4
2
+ Name: biolmai
3
+ Version: 0.2.10
4
+ Summary: BioLM Python client
5
+ Home-page: https://github.com/BioLM/py-biolm
6
+ Author: BioLM
7
+ Author-email: BioLM <support@biolm.ai>
8
+ License: Apache Software License 2.0
9
+ Keywords: biolmai
10
+ Classifier: Development Status :: 2 - Pre-Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Natural Language :: English
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Requires-Python: >=3.7
22
+ Description-Content-Type: text/x-rst
23
+ License-File: LICENSE
24
+ License-File: AUTHORS.rst
25
+ Requires-Dist: httpx>=0.23.0
26
+ Requires-Dist: httpcore
27
+ Requires-Dist: Click>=6.0
28
+ Requires-Dist: requests
29
+ Requires-Dist: aiodns
30
+ Requires-Dist: synchronicity>=0.5.0; python_version >= "3.9"
31
+ Requires-Dist: synchronicity<0.5.0; python_version < "3.9"
32
+ Requires-Dist: typing_extensions; python_version < "3.9"
33
+ Requires-Dist: aiohttp<=3.8.6; python_version < "3.12"
34
+ Requires-Dist: aiohttp>=3.9.0; python_version >= "3.12"
35
+ Requires-Dist: async-lru
36
+ Requires-Dist: aiofiles
37
+ Requires-Dist: cryptography
38
+ Dynamic: author
39
+ Dynamic: home-page
40
+ Dynamic: license-file
41
+ Dynamic: requires-python
42
+
43
+ ========
44
+ BioLM AI
45
+ ========
46
+
47
+
48
+ .. image:: https://img.shields.io/pypi/v/biolmai.svg
49
+ :target: https://pypi.python.org/pypi/biolmai
50
+
51
+ .. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
52
+ :target: https://travis-ci.org/github/BioLM/py-biolm
53
+
54
+ .. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
55
+ :target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
56
+ :alt: Documentation Status
57
+
58
+
59
+
60
+
61
+ Python client and SDK for `BioLM <https://biolm.ai>`_
62
+
63
+ Install the package:
64
+
65
+ .. code-block:: bash
66
+
67
+ pip install biolmai
68
+
69
+ Basic usage:
70
+
71
+ .. code-block:: python
72
+
73
+ from biolmai import biolm
74
+
75
+ # Encode a single sequence
76
+ result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
77
+
78
+ # Predict a batch of sequences
79
+ result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
80
+
81
+ # Write results to disk
82
+ biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
83
+
84
+ Asynchronous usage:
85
+
86
+ .. code-block:: python
87
+
88
+ from biolmai.client import BioLMApiClient
89
+ import asyncio
90
+
91
+ async def main():
92
+ model = BioLMApiClient("esmfold")
93
+ result = await model.predict(items=[{"sequence": "MDNELE"}])
94
+ print(result)
95
+
96
+ asyncio.run(main())
97
+
98
+ Overview
99
+ ========
100
+
101
+ The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
102
+
103
+ Main features:
104
+
105
+ - High-level BioLM constructor for quick requests
106
+ - Sync and async interfaces
107
+ - Automatic or custom rate limiting/throttling
108
+ - Schema-based batch size detection
109
+ - Flexible input formats (single key + list, or list of dicts)
110
+ - Low memory usage via generators
111
+ - Flexible error handling (raise, continue, or stop on error)
112
+ - Universal HTTP client for both sync and async
113
+
114
+ Features
115
+ ========
116
+
117
+ - **High-level constructor**: Instantly run an API call with a single line.
118
+ - **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
119
+ - **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
120
+ - **Schema-based batching**: Automatically queries API for max batch size.
121
+ - **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
122
+ - **Low memory**: Uses generators for validation and batching.
123
+ - **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
124
+ - **Disk output**: Write results as JSONL to disk.
125
+ - **Universal HTTP client**: Efficient for both sync and async.
126
+ - **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
127
+
128
+ **Example endpoints and actions:**
129
+
130
+ - `esm2-8m/encode`: Embedding for protein sequences.
131
+ - `esmfold/predict`: Structure prediction for protein sequences.
132
+ - `progen2-oas/generate`: Sequence generation from a context string.
133
+ - `dnabert2/predict`: Masked prediction for protein sequences.
134
+ - `ablang2/encode`: Embeddings for paired-chain antibodies.
135
+
136
+ * Free software: Apache Software License 2.0
137
+ * Documentation: https://docs.biolm.ai
@@ -0,0 +1,95 @@
1
+ ========
2
+ BioLM AI
3
+ ========
4
+
5
+
6
+ .. image:: https://img.shields.io/pypi/v/biolmai.svg
7
+ :target: https://pypi.python.org/pypi/biolmai
8
+
9
+ .. image:: https://api.travis-ci.com/BioLM/py-biolm.svg?branch=production
10
+ :target: https://travis-ci.org/github/BioLM/py-biolm
11
+
12
+ .. image:: https://readthedocs.org/projects/biolm-ai/badge/?version=latest
13
+ :target: https://biolm-ai.readthedocs.io/en/latest/?version=latest
14
+ :alt: Documentation Status
15
+
16
+
17
+
18
+
19
+ Python client and SDK for `BioLM <https://biolm.ai>`_
20
+
21
+ Install the package:
22
+
23
+ .. code-block:: bash
24
+
25
+ pip install biolmai
26
+
27
+ Basic usage:
28
+
29
+ .. code-block:: python
30
+
31
+ from biolmai import biolm
32
+
33
+ # Encode a single sequence
34
+ result = biolm(entity="esm2-8m", action="encode", type="sequence", items="MSILVTRPSPAGEEL")
35
+
36
+ # Predict a batch of sequences
37
+ result = biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"])
38
+
39
+ # Write results to disk
40
+ biolm(entity="esmfold", action="predict", type="sequence", items=["SEQ1", "SEQ2"], output='disk', file_path="results.jsonl")
41
+
42
+ Asynchronous usage:
43
+
44
+ .. code-block:: python
45
+
46
+ from biolmai.client import BioLMApiClient
47
+ import asyncio
48
+
49
+ async def main():
50
+ model = BioLMApiClient("esmfold")
51
+ result = await model.predict(items=[{"sequence": "MDNELE"}])
52
+ print(result)
53
+
54
+ asyncio.run(main())
55
+
56
+ Overview
57
+ ========
58
+
59
+ The BioLM Python client provides a high-level, user-friendly interface for interacting with the BioLM API. It supports both synchronous and asynchronous usage, automatic batching, flexible error handling, and efficient processing of biological data.
60
+
61
+ Main features:
62
+
63
+ - High-level BioLM constructor for quick requests
64
+ - Sync and async interfaces
65
+ - Automatic or custom rate limiting/throttling
66
+ - Schema-based batch size detection
67
+ - Flexible input formats (single key + list, or list of dicts)
68
+ - Low memory usage via generators
69
+ - Flexible error handling (raise, continue, or stop on error)
70
+ - Universal HTTP client for both sync and async
71
+
72
+ Features
73
+ ========
74
+
75
+ - **High-level constructor**: Instantly run an API call with a single line.
76
+ - **Sync and async**: Use `BioLM` for sync, or `BioLMApiClient` for async.
77
+ - **Flexible rate limiting**: Use API throttle, disable, or set your own (e.g., '1000/second').
78
+ - **Schema-based batching**: Automatically queries API for max batch size.
79
+ - **Flexible input**: Accepts a single key and list, or list of dicts, or list of lists for advanced batching.
80
+ - **Low memory**: Uses generators for validation and batching.
81
+ - **Error handling**: Raise HTTPX errors, continue on error, or stop on first error.
82
+ - **Disk output**: Write results as JSONL to disk.
83
+ - **Universal HTTP client**: Efficient for both sync and async.
84
+ - **Direct access to schema and batching**: Use `BioLMApi` for advanced workflows, including `.schema()`, `.call()`, and `._batch_call_autoschema_or_manual()`.
85
+
86
+ **Example endpoints and actions:**
87
+
88
+ - `esm2-8m/encode`: Embedding for protein sequences.
89
+ - `esmfold/predict`: Structure prediction for protein sequences.
90
+ - `progen2-oas/generate`: Sequence generation from a context string.
91
+ - `dnabert2/predict`: Masked prediction for protein sequences.
92
+ - `ablang2/encode`: Embeddings for paired-chain antibodies.
93
+
94
+ * Free software: Apache Software License 2.0
95
+ * Documentation: https://docs.biolm.ai
@@ -0,0 +1,24 @@
1
+ """Top-level package for BioLM AI."""
2
+ __author__ = """Nikhil Haas"""
3
+ __email__ = "nikhil@biolm.ai"
4
+ __version__ = '0.2.10'
5
+
6
+ from biolmai.client import BioLMApi, BioLMApiClient
7
+ from biolmai.biolmai import BioLM
8
+ from typing import Optional, Union, List, Any
9
+
10
+ __all__ = ['biolm']
11
+
12
+
13
+ def biolm(
14
+ *,
15
+ entity: str,
16
+ action: str,
17
+ type: Optional[str] = None,
18
+ items: Union[Any, List[Any]],
19
+ params: Optional[dict] = None,
20
+ api_key: Optional[str] = None,
21
+ **kwargs
22
+ ) -> Any:
23
+ """Top-level convenience function that wraps the BioLM class and returns the result."""
24
+ return BioLM(entity=entity, action=action, type=type, items=items, params=params, api_key=api_key, **kwargs)
@@ -0,0 +1,353 @@
1
+ """References to API endpoints."""
2
+ import datetime
3
+ import inspect
4
+ import time
5
+ from functools import lru_cache
6
+
7
+ try:
8
+ import numpy as np
9
+ import pandas as pd
10
+ except ImportError:
11
+ pass
12
+
13
+ import requests
14
+ from requests.adapters import HTTPAdapter
15
+ from requests.packages.urllib3.util.retry import Retry
16
+
17
+ import biolmai
18
+ import biolmai.auth
19
+ from biolmai.asynch import async_api_call_wrapper
20
+ from biolmai.biolmai import log
21
+ from biolmai.const import MULTIPROCESS_THREADS
22
+ from biolmai.payloads import INST_DAT_TXT, PARAMS_ITEMS, predict_resp_many_in_one_to_many_singles
23
+
24
+
25
+ @lru_cache(maxsize=64)
26
+ def validate_endpoint_action(allowed_classes, method_name, api_class_name):
27
+ action_method_name = method_name.split(".")[-1]
28
+ if action_method_name not in allowed_classes:
29
+ err = "Only {} supported on {}"
30
+ err = err.format(list(allowed_classes), api_class_name)
31
+ raise AssertionError(err)
32
+
33
+
34
+ def text_validator(text, c):
35
+ """Validate some text against a class-based validator, returning a string
36
+ if invalid, or None otherwise."""
37
+ try:
38
+ c(text)
39
+ except Exception as e:
40
+ return str(e)
41
+
42
+ def combine_validation(x, y):
43
+ if x is None and y is None:
44
+ return None
45
+ elif isinstance(x, str) and y is None:
46
+ return x
47
+ elif x is None and isinstance(y, str):
48
+ return y
49
+ elif isinstance(x, str) and isinstance(y, str):
50
+ return f"{x}\n{y}"
51
+
52
+
53
+ def validate_action(action):
54
+ def validate(f):
55
+ def wrapper(*args, **kwargs):
56
+ # Get class instance at runtime, so you can access not just
57
+ # APIEndpoints, but any *parent* classes of that,
58
+ # like ESMFoldSinglechain.
59
+ class_obj_self = args[0]
60
+ try:
61
+ is_method = inspect.getfullargspec(f)[0][0] == "self"
62
+ except Exception:
63
+ is_method = False
64
+
65
+ # Is the function we decorated a class method?
66
+ if is_method:
67
+ name = f"{f.__module__}.{class_obj_self.__class__.__name__}.{f.__name__}"
68
+ else:
69
+ name = f"{f.__module__}.{f.__name__}"
70
+
71
+ if is_method:
72
+ # Splits name, e.g. 'biolmai.api.ESMFoldSingleChain.predict'
73
+ action_method_name = name.split(".")[-1]
74
+ validate_endpoint_action(
75
+ class_obj_self.action_class_strings,
76
+ action_method_name,
77
+ class_obj_self.__class__.__name__,
78
+ )
79
+
80
+ input_data = args[1]
81
+ # Validate each row's text/input based on class attribute `seq_classes`
82
+ if action == "predict":
83
+ input_classes = class_obj_self.predict_input_classes
84
+ elif action == "encode":
85
+ input_classes = class_obj_self.encode_input_classes
86
+ elif action == "generate":
87
+ input_classes = class_obj_self.generate_input_classes
88
+ elif action == "transform":
89
+ input_classes = class_obj_self.transform_input_classes
90
+ for c in input_classes:
91
+ # Validate input data against regex
92
+ if class_obj_self.multiprocess_threads:
93
+ validation = input_data.text.apply(text_validator, args=(c,))
94
+ else:
95
+ validation = input_data.text.apply(text_validator, args=(c,))
96
+ if "validation" not in input_data.columns:
97
+ input_data["validation"] = validation
98
+ else:
99
+ # masking and loc may be more performant option
100
+ input_data["validation"] = input_data["validation"].combine(validation, combine_validation)
101
+
102
+ # Mark your batches, excluding invalid rows
103
+ valid_dat = input_data.loc[input_data.validation.isnull(), :].copy()
104
+ N = class_obj_self.batch_size # N rows will go per API request
105
+ # JOIN back, which is by index
106
+ if valid_dat.shape[0] != input_data.shape[0]:
107
+ valid_dat["batch"] = np.arange(valid_dat.shape[0]) // N
108
+ input_data = input_data.merge(
109
+ valid_dat.batch, left_index=True, right_index=True, how="left"
110
+ )
111
+ else:
112
+ input_data["batch"] = np.arange(input_data.shape[0]) // N
113
+ res = f(class_obj_self, input_data, **kwargs)
114
+ return res
115
+
116
+ return wrapper
117
+ return validate
118
+
119
+ def convert_input(f):
120
+ def wrapper(*args, **kwargs):
121
+ # Get the user-input data argument to the decorated function
122
+ # class_obj_self = args[0]
123
+ input_data = args[1]
124
+ # Make sure we have expected input types
125
+ acceptable_inputs = (str, list, tuple, np.ndarray, pd.DataFrame)
126
+ if not isinstance(input_data, acceptable_inputs):
127
+ err = "Input must be one or many DNA or protein strings"
128
+ raise ValueError(err)
129
+ # Convert single-sequence input to list
130
+ if isinstance(input_data, str):
131
+ input_data = [input_data]
132
+ # Make sure we don't have a matrix
133
+ if isinstance(input_data, np.ndarray) and len(input_data.shape) > 1:
134
+ err = "Detected Numpy matrix - input a single vector or array"
135
+ raise AssertionError(err)
136
+ # Make sure we don't have a >=2D DF
137
+ if isinstance(input_data, pd.DataFrame) and len(input_data.shape) > 1:
138
+ err = "Detected Pandas DataFrame - input a single vector or Series"
139
+ raise AssertionError(err)
140
+ input_data = pd.DataFrame(input_data, columns=["text"])
141
+ return f(args[0], input_data, **kwargs)
142
+
143
+ return wrapper
144
+
145
+
146
+ class APIEndpoint:
147
+ # Overwrite in parent classes as needed
148
+ batch_size = 3
149
+ params = None
150
+ action_classes = ()
151
+ api_version = 2
152
+
153
+ predict_input_key = "sequence"
154
+ encode_input_key = "sequence"
155
+ generate_input_key = "context"
156
+
157
+ predict_input_classes = ()
158
+ encode_input_classes = ()
159
+ generate_input_classes = ()
160
+ transform_input_classes = ()
161
+
162
+ def __init__(self, multiprocess_threads=None):
163
+ # Check for instance-specific threads, otherwise read from env var
164
+ if multiprocess_threads is not None:
165
+ self.multiprocess_threads = multiprocess_threads
166
+ else:
167
+ self.multiprocess_threads = MULTIPROCESS_THREADS # Could be False
168
+ # Get correct auth-like headers
169
+ self.auth_headers = biolmai.auth.get_user_auth_header()
170
+ self.action_class_strings = tuple(
171
+ [c.__name__.replace("Action", "").lower() for c in self.action_classes]
172
+ )
173
+
174
+ def post_batches(self, dat, slug, action, payload_maker, resp_key, key="sequence", params=None):
175
+ keep_batches = dat.loc[~dat.batch.isnull(), ["text", "batch"]]
176
+ if keep_batches.shape[0] == 0:
177
+ pass # Do nothing - we made nice JSON errors to return in the DF
178
+ # err = "No inputs found following local validation"
179
+ # raise AssertionError(err)
180
+ if keep_batches.shape[0] > 0:
181
+ api_resps = async_api_call_wrapper(
182
+ keep_batches, slug, action, payload_maker, resp_key, api_version=self.api_version, key=key, params=params,
183
+ )
184
+ if isinstance(api_resps, pd.DataFrame):
185
+ batch_res = api_resps.explode("api_resp") # Should be lists of results
186
+ len_res = batch_res.shape[0]
187
+ else:
188
+ batch_res = pd.DataFrame({"api_resp": api_resps})
189
+ len_res = batch_res.shape[0]
190
+ orig_request_rows = keep_batches.shape[0]
191
+ # For 'generate' actions, models may return multiple results per item
192
+ # (e.g., hyper-mpnn with batch_size > 1), so skip the 1:1 check
193
+ if action != "generate" and len_res != orig_request_rows:
194
+ err = "Response rows ({}) mismatch with input rows ({})"
195
+ err = err.format(len_res, orig_request_rows)
196
+ raise AssertionError(err)
197
+
198
+ # Stack the results horizontally w/ original rows of batches
199
+ keep_batches["prev_idx"] = keep_batches.index
200
+ keep_batches.reset_index(drop=False, inplace=True)
201
+ batch_res.reset_index(drop=True, inplace=True)
202
+ keep_batches["api_resp"] = batch_res
203
+ keep_batches.set_index("prev_idx", inplace=True)
204
+ dat = dat.join(keep_batches.reindex(["api_resp"], axis=1))
205
+ else:
206
+ dat["api_resp"] = None
207
+ return dat
208
+
209
+ def unpack_local_validations(self, dat, response_key):
210
+ dat.loc[dat.api_resp.isnull(), "api_resp"] = (
211
+ dat.loc[~dat.validation.isnull(), "validation"]
212
+ .apply(
213
+ predict_resp_many_in_one_to_many_singles, args=(None, None, True, None), response_key=response_key
214
+ )
215
+ .explode()
216
+ )
217
+
218
+ return dat
219
+
220
+ @convert_input
221
+ @validate_action("predict")
222
+ def predict(self, dat, params=None):
223
+ if self.api_version == 1:
224
+ dat = self.post_batches(dat, self.slug, "predict", INST_DAT_TXT, "predictions")
225
+ dat = self.unpack_local_validations(dat, "predictions")
226
+ else:
227
+ dat = self.post_batches(dat, self.slug, "predict", PARAMS_ITEMS, "results", key=self.predict_input_key, params=params)
228
+ dat = self.unpack_local_validations(dat,"results")
229
+ return dat.api_resp.replace(np.nan, None).tolist()
230
+
231
+ def infer(self, dat, params=None):
232
+ return self.predict(dat, params)
233
+
234
+ @convert_input
235
+ @validate_action("transform") # api v1 legacy action
236
+ def transform(self, dat):
237
+ dat = self.post_batches(
238
+ dat, self.slug, "transform", INST_DAT_TXT, "predictions"
239
+ )
240
+ dat = self.unpack_local_validations(dat,"predictions")
241
+ return dat.api_resp.replace(np.nan, None).tolist()
242
+
243
+ @convert_input
244
+ @validate_action("encode")
245
+ def encode(self, dat, params=None):
246
+
247
+ dat = self.post_batches(dat, self.slug, "encode", PARAMS_ITEMS, "results", key=self.encode_input_key, params=params)
248
+ dat = self.unpack_local_validations(dat, "results")
249
+ return dat.api_resp.replace(np.nan, None).tolist()
250
+
251
+ @convert_input
252
+ @validate_action("generate")
253
+ def generate(self, dat, params=None):
254
+ if self.api_version == 1:
255
+ dat = self.post_batches(dat, self.slug, "generate", INST_DAT_TXT, "generated")
256
+ dat = self.unpack_local_validations(dat, "predictions")
257
+ else:
258
+ dat = self.post_batches(dat, self.slug, "generate", PARAMS_ITEMS, "results", key=self.generate_input_key, params=params)
259
+ dat = self.unpack_local_validations(dat, "results")
260
+
261
+ return dat.api_resp.replace(np.nan, None).tolist()
262
+
263
+
264
+ def retry_minutes(sess, URL, HEADERS, dat, timeout, mins):
265
+ """Retry for N minutes."""
266
+ HEADERS.update({"Content-Type": "application/json"})
267
+ attempts, max_attempts = 0, 5
268
+ try:
269
+ now = datetime.datetime.now()
270
+ try_until = now + datetime.timedelta(minutes=mins)
271
+ while datetime.datetime.now() < try_until and attempts < max_attempts:
272
+ response = None
273
+ try:
274
+ log.info(f"Trying {datetime.datetime.now()}")
275
+ response = sess.post(URL, headers=HEADERS, data=dat, timeout=timeout)
276
+ if response.status_code not in (400, 404):
277
+ response.raise_for_status()
278
+ if "error" in response.json():
279
+ raise ValueError(response.json().dumps())
280
+ else:
281
+ break
282
+ except Exception as e:
283
+ log.warning(e)
284
+ if response:
285
+ log.warning(response.text)
286
+ time.sleep(5) # Wait 5 seconds between tries
287
+ attempts += 1
288
+ if response is None:
289
+ err = "Got Nonetype response"
290
+ raise ValueError(err)
291
+ elif "Server Error" in response.text:
292
+ err = "Got Server Error"
293
+ raise ValueError(err)
294
+ except Exception:
295
+ return response
296
+ return response
297
+
298
+
299
+ def requests_retry_session(
300
+ retries=3,
301
+ backoff_factor=0.3,
302
+ status_forcelist=None,
303
+ session=None,
304
+ ):
305
+ if status_forcelist is None:
306
+ status_forcelist = list(range(400, 599))
307
+ session = session or requests.Session()
308
+ retry = Retry(
309
+ total=retries,
310
+ read=retries,
311
+ connect=retries,
312
+ backoff_factor=backoff_factor,
313
+ status_forcelist=status_forcelist,
314
+ )
315
+ adapter = HTTPAdapter(max_retries=retry)
316
+ session.mount("http://", adapter)
317
+ session.mount("https://", adapter)
318
+ return session
319
+
320
+
321
+ class PredictAction:
322
+ def __str__(self):
323
+ return "PredictAction"
324
+
325
+
326
+ class GenerateAction:
327
+ def __str__(self):
328
+ return "GenerateAction"
329
+
330
+
331
+ class TransformAction:
332
+ def __str__(self):
333
+ return "TransformAction"
334
+
335
+
336
+ class EncodeAction:
337
+ def __str__(self):
338
+ return "EncodeAction"
339
+
340
+
341
+ class ExplainAction:
342
+ def __str__(self):
343
+ return "ExplainAction"
344
+
345
+
346
+ class SimilarityAction:
347
+ def __str__(self):
348
+ return "SimilarityAction"
349
+
350
+
351
+ class FinetuneAction:
352
+ def __str__(self):
353
+ return "FinetuneAction"