biolmai 0.1.10__py2.py3-none-any.whl → 0.2.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biolmai might be problematic. Click here for more details.

biolmai/__init__.py CHANGED
@@ -1,8 +1,10 @@
1
1
  """Top-level package for BioLM AI."""
2
2
  __author__ = """Nikhil Haas"""
3
3
  __email__ = "nikhil@biolm.ai"
4
- __version__ = '0.1.10'
4
+ __version__ = '0.2.2'
5
5
 
6
6
  from biolmai.cls import *
7
+ from biolmai.client import BioLMApi, BioLMApiClient
8
+ from biolmai.biolmai import BioLM
7
9
 
8
10
  __all__ = []
biolmai/api.py CHANGED
@@ -4,8 +4,12 @@ import inspect
4
4
  import time
5
5
  from functools import lru_cache
6
6
 
7
- import numpy as np
8
- import pandas as pd
7
+ try:
8
+ import numpy as np
9
+ import pandas as pd
10
+ except ImportError:
11
+ pass
12
+
9
13
  import requests
10
14
  from requests.adapters import HTTPAdapter
11
15
  from requests.packages.urllib3.util.retry import Retry
biolmai/biolmai.py CHANGED
@@ -1,5 +1,121 @@
1
1
  """Main module."""
2
-
3
2
  import logging
4
3
 
5
4
  log = logging.getLogger("biolm_util")
5
+
6
+ from typing import Optional, Union, List, Any
7
+ from biolmai.client import BioLMApi, is_list_of_lists
8
+
9
+
10
+ class BioLM:
11
+ """
12
+ Universal client for BioLM API.
13
+
14
+ Args:
15
+ entity (str): The entity name (model, database, calculation, etc).
16
+ action (str): The action to perform (e.g., 'generate', 'encode', 'predict', 'search', 'finetune').
17
+ type (str): The type of item (e.g., 'sequence', 'pdb', 'fasta_str').
18
+ item (Union[Any, List[Any]]): The item(s) to process.
19
+ params (Optional[dict]): Optional parameters for the action.
20
+ raise_httpx (bool): Whether to raise HTTPX errors.
21
+ stop_on_error (bool): Stop on first error if True.
22
+ output (str): 'memory' or 'disk'.
23
+ file_path (Optional[str]): Output file path if output='disk'.
24
+ api_key (Optional[str]): API key for authentication.
25
+ """
26
+ def __new__(
27
+ cls,
28
+ *,
29
+ entity: str,
30
+ action: str,
31
+ type: Optional[str] = None,
32
+ items: Union[Any, List[Any]],
33
+ params: Optional[dict] = None,
34
+ api_key: Optional[str] = None,
35
+ **kwargs
36
+ ):
37
+ self = super().__new__(cls)
38
+ self.entity = entity
39
+ self.action = action
40
+ self.type = type
41
+ self.items = items
42
+ self.params = params
43
+ self.api_key = api_key
44
+ self._class_kwargs = kwargs
45
+ return self.run()
46
+
47
+ def run(self) -> Any:
48
+ """
49
+ Run the specified action on the entity with the given item(s).
50
+ Returns the result(s), unpacked if a single item was provided.
51
+ """
52
+ # if not self.items:
53
+ # return self.items
54
+
55
+ # Always pass a list of items to BioLMModel
56
+ if isinstance(self.items, list):
57
+ items = self.items
58
+ else:
59
+ items = [self.items]
60
+
61
+ is_lol, first_n, rest_iter = is_list_of_lists(items, check_n=10)
62
+ if is_lol:
63
+ for batch in first_n:
64
+ if not all(isinstance(x, dict) for x in batch):
65
+ raise ValueError("All items in each batch must be dicts when passing a list of lists.")
66
+ if self.type is not None:
67
+ raise ValueError("Do not specify `type` when passing a list of lists of dicts for `items`.")
68
+ items_dicts = list(first_n) + list(rest_iter)
69
+ elif all(isinstance(v, dict) for v in items):
70
+ items_dicts = items
71
+ else:
72
+ if self.type is None:
73
+ raise ValueError("If `items` are not dicts, `type` must be specified.")
74
+ items_dicts = [{self.type: v} for v in items]
75
+
76
+ unwrap_single = self._class_kwargs.pop('unwrap_single', True)
77
+
78
+ # Instantiate BioLMModel with correct settings
79
+ # Need these for the `action` method on BioLMApi; other kwargs to BioLMApi init
80
+ action_kwargs = {k: v for k, v in dict(
81
+ stop_on_error=self._class_kwargs.pop('stop_on_error', None),
82
+ output=self._class_kwargs.pop('output', None),
83
+ file_path=self._class_kwargs.pop('file_path', None),
84
+ ).items() if v is not None}
85
+
86
+ model = BioLMApi(
87
+ self.entity,
88
+ api_key=self.api_key,
89
+ unwrap_single=unwrap_single,
90
+ **self._class_kwargs,
91
+ )
92
+
93
+ # Map action to method
94
+ action_map = {
95
+ 'generate': model.generate,
96
+ 'predict': model.predict,
97
+ 'encode': model.encode,
98
+ 'search': getattr(model, 'search', None),
99
+ 'finetune': getattr(model, 'finetune', None),
100
+ 'lookup': model.lookup,
101
+ }
102
+ if self.action not in action_map or action_map[self.action] is None:
103
+ raise ValueError(f"Action '{self.action}' is not amongst the available actions {', '.join(action_map.keys())}.")
104
+
105
+ # Prepare kwargs for the method
106
+ method = action_map[self.action]
107
+ kwargs = {
108
+ 'items': items_dicts,
109
+ 'params': self.params,
110
+ }
111
+ kwargs.update(action_kwargs)
112
+ # Remove None values
113
+ kwargs = {k: v for k, v in kwargs.items() if v is not None}
114
+
115
+ # Call the method
116
+ result = method(**kwargs)
117
+
118
+ return result
119
+
120
+ # Example usage:
121
+ # result = BioLM(entity="esmfold", action="predict", type="sequence", item="MKT...").run()
biolmai/client.py ADDED
@@ -0,0 +1,699 @@
1
+ import asyncio
2
+ import functools
3
+ import json
4
+ import os
5
+ import time
6
+ from collections import namedtuple, OrderedDict
7
+ from contextlib import asynccontextmanager
8
+ from itertools import chain
9
+ from itertools import tee, islice
10
+ from json import dumps as json_dumps
11
+ from typing import Callable
12
+ from typing import Optional, Union, List, Any, Dict, Tuple
13
+
14
+ import aiofiles
15
+ import httpx
16
+ import httpx._content
17
+ from async_lru import alru_cache
18
+ from httpx import AsyncHTTPTransport
19
+ from httpx import ByteStream
20
+ from synchronicity import Synchronizer
21
+
22
+ try:
23
+ from importlib.metadata import version
24
+ except ImportError:
25
+ from importlib_metadata import version
26
+
27
+
28
+ def custom_httpx_encode_json(json: Any) -> Tuple[Dict[str, str], ByteStream]:
29
+ # disable ascii for json_dumps
30
+ body = json_dumps(json, ensure_ascii=False).encode("utf-8")
31
+ content_length = str(len(body))
32
+ content_type = "application/json"
33
+ headers = {"Content-Length": content_length, "Content-Type": content_type}
34
+ return headers, ByteStream(body)
35
+
36
+ # fix encoding utf-8 bug
37
+ httpx._content.encode_json = custom_httpx_encode_json
38
+
39
+ import sys
40
+
41
+ def debug(msg):
42
+ sys.stderr.write(msg + "\n")
43
+ sys.stderr.flush()
44
+
45
+ import logging
46
+
47
+ # Turn this on to dev lots of logs
48
+ if os.environ.get("DEBUG", '').upper().strip() in ('TRUE', '1'):
49
+ logging.basicConfig(
50
+ level=logging.INFO,
51
+ stream=sys.stderr,
52
+ format="%(asctime)s %(levelname)s %(message)s",
53
+ force=True, # Python 3.8+
54
+ )
55
+
56
+ USER_BIOLM_DIR = os.path.join(os.path.expanduser("~"), ".biolmai")
57
+ ACCESS_TOK_PATH = os.path.join(USER_BIOLM_DIR, "credentials")
58
+ TIMEOUT_MINS = 20 # Match API server's keep-alive/timeout
59
+ DEFAULT_TIMEOUT = httpx.Timeout(TIMEOUT_MINS * 60, connect=10.0)
60
+
61
+ LookupResult = namedtuple("LookupResult", ["data", "raw"])
62
+
63
+ _synchronizer = Synchronizer()
64
+
65
+ if not hasattr(_synchronizer, "sync"):
66
+ if hasattr(_synchronizer, "wrap"):
67
+ _synchronizer.sync = _synchronizer.wrap
68
+ if hasattr(_synchronizer, "create_blocking"):
69
+ _synchronizer.sync = _synchronizer.create_blocking
70
+ else:
71
+ raise ImportError(f"Your version of 'synchronicity' ({version('synchronicity')}) is incompatible.")
72
+
73
+ def type_check(param_types: Dict[str, Any]):
74
+ def decorator(func: Callable):
75
+ @functools.wraps(func)
76
+ def wrapper(*args, **kwargs):
77
+ for param, expected_type in param_types.items():
78
+ value = kwargs.get(param)
79
+ if value is None and len(args) > 0:
80
+ arg_names = func.__code__.co_varnames
81
+ if param in arg_names:
82
+ idx = arg_names.index(param)
83
+ if idx < len(args):
84
+ value = args[idx]
85
+ if value is not None:
86
+ # Allow tuple of types or single type
87
+ if not isinstance(expected_type, tuple):
88
+ expected_types = (expected_type,)
89
+ else:
90
+ expected_types = expected_type
91
+ if not isinstance(value, expected_types):
92
+ type_names = ", ".join([t.__name__ for t in expected_types])
93
+ raise TypeError(
94
+ f"Parameter '{param}' must be of type {type_names}, got {type(value).__name__}"
95
+ )
96
+ # Check for empty list/tuple
97
+ # if isinstance(value, (list, tuple)) and len(value) == 0:
98
+ # raise ValueError(
99
+ # f"Parameter '{param}' must not be an empty {type(value).__name__}"
100
+ # )
101
+ return func(*args, **kwargs)
102
+ return wrapper
103
+ return decorator
104
+
105
+
106
+ class AsyncRateLimiter:
107
+ def __init__(self, max_calls: int, period: float):
108
+ self._max_calls = max_calls
109
+ self._period = period
110
+ self._lock = asyncio.Lock()
111
+ self._calls = []
112
+
113
+ @asynccontextmanager
114
+ async def limit(self):
115
+ async with self._lock:
116
+ now = time.monotonic()
117
+ # Remove calls outside the window
118
+ self._calls = [t for t in self._calls if now - t < self._period]
119
+ if len(self._calls) >= self._max_calls:
120
+ sleep_time = self._period - (now - self._calls[0])
121
+ await asyncio.sleep(max(0, sleep_time))
122
+ now = time.monotonic()
123
+ self._calls = [t for t in self._calls if now - t < self._period]
124
+ self._calls.append(time.monotonic())
125
+ yield
126
+
127
+ def parse_rate_limit(rate: str):
128
+ # e.g. "1000/second", "60/minute"
129
+ if not rate:
130
+ return None
131
+ num, per = rate.strip().split("/")
132
+ num = int(num)
133
+ per = per.strip().lower()
134
+ if per == "second":
135
+ return num, 1.0
136
+ elif per == "minute":
137
+ return num, 60.0
138
+ else:
139
+ raise ValueError(f"Unknown rate period: {per}")
140
+
141
+ class CredentialsProvider:
142
+ @staticmethod
143
+ def get_auth_headers(api_key: Optional[str] = None) -> Dict[str, str]:
144
+ if api_key:
145
+ return {"Authorization": f"Token {api_key}"}
146
+ api_token = os.environ.get("BIOLMAI_TOKEN")
147
+ if api_token:
148
+ return {"Authorization": f"Token {api_token}"}
149
+ if os.path.exists(ACCESS_TOK_PATH):
150
+ with open(ACCESS_TOK_PATH) as f:
151
+ creds = json.load(f)
152
+ access = creds.get("access")
153
+ refresh = creds.get("refresh")
154
+ return {
155
+ "Cookie": f"access={access};refresh={refresh}",
156
+ "Content-Type": "application/json",
157
+ }
158
+ raise AssertionError("No credentials found. Set BIOLMAI_TOKEN or run `biolmai login`.")
159
+
160
+
161
+ class HttpClient:
162
+
163
+ def __init__(self, base_url: str, headers: Dict[str, str], timeout: httpx.Timeout):
164
+ self._base_url = base_url.rstrip("/") + "/"
165
+ self._headers = headers
166
+ self._timeout = timeout
167
+ self._async_client: Optional[httpx.AsyncClient] = None
168
+ self._transport = None
169
+ # Removed AsyncResolver, use default resolver
170
+ self._transport = AsyncHTTPTransport()
171
+
172
+ async def get_async_client(self) -> httpx.AsyncClient:
173
+ if self._async_client is None or getattr(self._async_client, 'is_closed', False):
174
+ if self._transport:
175
+ self._async_client = httpx.AsyncClient(
176
+ base_url=self._base_url,
177
+ headers=self._headers,
178
+ timeout=self._timeout,
179
+ transport=self._transport,
180
+ )
181
+ else:
182
+ self._async_client = httpx.AsyncClient(
183
+ base_url=self._base_url,
184
+ headers=self._headers,
185
+ timeout=self._timeout,
186
+ )
187
+ return self._async_client
188
+
189
+ async def post(self, endpoint: str, payload: dict) -> httpx.Response:
190
+ client = await self.get_async_client()
191
+ # Remove leading slash, ensure trailing slash
192
+ endpoint = endpoint.lstrip("/")
193
+ if not endpoint.endswith("/"):
194
+ endpoint += "/"
195
+ if "Content-Type" not in client.headers:
196
+ client.headers["Content-Type"] = "application/json"
197
+ r = await client.post(endpoint, json=payload)
198
+ return r
199
+
200
+ async def get(self, endpoint: str) -> httpx.Response:
201
+ client = await self.get_async_client()
202
+ endpoint = endpoint.lstrip("/")
203
+ if not endpoint.endswith("/"):
204
+ endpoint += "/"
205
+ return await client.get(endpoint)
206
+
207
+
208
+ async def close(self):
209
+ if self._async_client:
210
+ await self._async_client.aclose()
211
+ self._async_client = None
212
+
213
+
214
+ def is_list_of_lists(items, check_n=10):
215
+ # Accepts any iterable, checks first N items for list/tuple-ness
216
+ # Returns (is_list_of_lists, first_n_items, rest_iter)
217
+ if isinstance(items, (list, tuple)):
218
+ if not items:
219
+ return False, [], iter(())
220
+ first_n = items[:check_n]
221
+ is_lol = all(isinstance(x, (list, tuple)) for x in first_n)
222
+ return is_lol, first_n, iter(items[check_n:])
223
+ # For iterators/generators
224
+ items, items_copy = tee(items)
225
+ first_n = list(islice(items_copy, check_n))
226
+ is_lol = all(isinstance(x, (list, tuple)) for x in first_n) and bool(first_n)
227
+ return is_lol, first_n, items
228
+
229
+ def batch_iterable(iterable, batch_size):
230
+ # Yields lists of up to batch_size from any iterable, deleting as we go
231
+ batch = []
232
+ for item in iterable:
233
+ batch.append(item)
234
+ if len(batch) == batch_size:
235
+ yield batch
236
+ batch = []
237
+ if batch:
238
+ yield batch
239
+
240
+ class BioLMApiClient:
241
+ def __init__(
242
+ self,
243
+ model_name: str,
244
+ api_key: Optional[str] = None,
245
+ base_url: str = "https://biolm.ai/api/v3",
246
+ timeout: httpx.Timeout = DEFAULT_TIMEOUT,
247
+ raise_httpx: bool = True,
248
+ unwrap_single: bool = False,
249
+ semaphore: 'Optional[Union[int, asyncio.Semaphore]]' = None,
250
+ rate_limit: 'Optional[str]' = None,
251
+ retry_error_batches: bool = False,
252
+
253
+ ):
254
+ self.model_name = model_name
255
+ self.base_url = base_url.rstrip("/") + "/" # Ensure trailing slash
256
+ self.timeout = timeout
257
+ self.raise_httpx = raise_httpx
258
+ self.unwrap_single = unwrap_single
259
+ self._headers = CredentialsProvider.get_auth_headers(api_key)
260
+ self._http_client = HttpClient(self.base_url, self._headers, self.timeout)
261
+ self._semaphore = None
262
+ self._rate_limiter = None
263
+ self._rate_limit_lock = None
264
+ self._rate_limit_initialized = False
265
+ self.retry_error_batches = retry_error_batches
266
+
267
+
268
+ # Concurrency limit
269
+ if isinstance(semaphore, asyncio.Semaphore):
270
+ self._semaphore = semaphore
271
+ elif isinstance(semaphore, int):
272
+ self._semaphore = asyncio.Semaphore(semaphore)
273
+
274
+ # RPS limit
275
+ if rate_limit:
276
+ max_calls, period = parse_rate_limit(rate_limit)
277
+ self._rate_limiter = AsyncRateLimiter(max_calls, period)
278
+ self._rate_limit_initialized = True
279
+
280
+ async def _ensure_rate_limit(self):
281
+ if self._rate_limit_lock is None:
282
+ self._rate_limit_lock = asyncio.Lock()
283
+ if self._rate_limit_initialized:
284
+ return
285
+ async with self._rate_limit_lock:
286
+ if self._rate_limit_initialized:
287
+ return
288
+ if self._rate_limiter is None:
289
+ schema = await self.schema(self.model_name, "encode")
290
+ throttle_rate = schema.get("throttle_rate") if schema else None
291
+ if throttle_rate:
292
+ max_calls, period = parse_rate_limit(throttle_rate)
293
+ self._rate_limiter = AsyncRateLimiter(max_calls, period)
294
+ self._rate_limit_initialized = True
295
+
296
+ @asynccontextmanager
297
+ async def _limit(self):
298
+ """
299
+ Usage:
300
+ # No throttling: BioLMApiClient(...)
301
+ # Concurrency limit: BioLMApiClient(..., semaphore=5)
302
+ # User's own semaphore: BioLMApiClient(..., semaphore=my_semaphore)
303
+ # RPS limit: BioLMApiClient(..., rate_limit="1000/second")
304
+ # Both: BioLMApiClient(..., semaphore=5, rate_limit="1000/second")
305
+ """
306
+ if self._semaphore:
307
+ async with self._semaphore:
308
+ if self._rate_limiter:
309
+ async with self._rate_limiter.limit():
310
+ yield
311
+ else:
312
+ yield
313
+ elif self._rate_limiter:
314
+ async with self._rate_limiter.limit():
315
+ yield
316
+ else:
317
+ yield
318
+
319
+ @alru_cache(maxsize=8)
320
+ async def schema(
321
+ self,
322
+ model: str,
323
+ action: str,
324
+ ) -> Optional[dict]:
325
+ """
326
+ Fetch the JSON schema for a given model and action, with caching.
327
+ Returns the schema dict if successful, else None.
328
+ """
329
+ endpoint = f"schema/{model}/{action}/"
330
+ try:
331
+ resp = await self._http_client.get(endpoint)
332
+ if resp.status_code == 200:
333
+ schema = resp.json()
334
+ return schema
335
+ else:
336
+ return None
337
+ except Exception:
338
+ return None
339
+
340
+ @staticmethod
341
+ def extract_max_items(schema: dict) -> Optional[int]:
342
+ """
343
+ Extracts the 'maxItems' value for the 'items' key from the schema.
344
+ Returns the integer value if found, else None.
345
+ """
346
+ try:
347
+ props = schema.get('properties', {})
348
+ items_schema = props.get('items', {})
349
+ max_items = items_schema.get('maxItems')
350
+ if isinstance(max_items, int):
351
+ return max_items
352
+ except Exception:
353
+ pass
354
+ return None
355
+
356
+ async def _get_max_batch_size(self, model: str, action: str) -> Optional[int]:
357
+ schema = await self.schema(model, action)
358
+ if schema:
359
+ return self.extract_max_items(schema)
360
+ return None
361
+
362
+ async def _fetch_rps_limit_async(self) -> Optional[int]:
363
+ return None
364
+ # Not implemented yet
365
+ try:
366
+ async with httpx.AsyncClient(base_url=self.base_url, headers=self._headers, timeout=5.0) as client:
367
+ resp = await client.get(f"/{self.model_name}/")
368
+ if resp.status_code == 200:
369
+ meta = resp.json()
370
+ return meta.get("rps_limit") or meta.get("max_rps") or meta.get("requests_per_second")
371
+ except Exception:
372
+ pass
373
+ return None
374
+
375
+ async def _api_call(
376
+ self, endpoint: str, payload: dict, raw: bool = False
377
+ ) -> Union[dict, Tuple[Any, httpx.Response]]:
378
+ await self._ensure_rate_limit()
379
+ async with self._limit():
380
+ resp = await self._http_client.post(endpoint, payload)
381
+ content_type = resp.headers.get("Content-Type", "")
382
+
383
+ assert hasattr(resp, 'status_code') or hasattr(resp, 'status') or 'status' in resp or 'status_code' in resp
384
+
385
+ try:
386
+ resp_json = resp.json()
387
+ except Exception:
388
+ resp_json = ''
389
+
390
+ assert resp.status_code
391
+ if resp.status_code >= 400 or 'error' in resp_json:
392
+ if 'application/json' in content_type:
393
+ try:
394
+ error_json = resp_json
395
+ # If the API already returns a dict with "error" or similar, just return it
396
+ if isinstance(error_json, (dict, list)):
397
+ DEFAULT_STATUS_CODE = 502
398
+ stat = error_json.get('status', DEFAULT_STATUS_CODE)
399
+ error_json['status_code'] = resp.status_code or error_json.get('status_code', stat)
400
+ if raw:
401
+ return (error_json, resp)
402
+ if self.raise_httpx:
403
+ raise httpx.HTTPStatusError(message=resp.text, request=resp.request, response=resp)
404
+ return error_json
405
+ else:
406
+ # If the JSON is not a dict or list, wrap it
407
+ error_info = {'error': error_json, 'status_code': resp.status_code}
408
+ except Exception:
409
+ error_info = {'error': resp.text, 'status_code': resp.status_code}
410
+ else:
411
+ error_info = {'error': resp.text, 'status_code': resp.status_code}
412
+ if raw:
413
+ return (error_info, resp)
414
+ if self.raise_httpx:
415
+ raise httpx.HTTPStatusError(message=resp.text, request=resp.request, response=resp)
416
+ return error_info
417
+
418
+ data = resp.json() if 'application/json' in content_type else {"error": resp.text, "status_code": resp.status_code}
419
+ return (data, resp) if raw else data
420
+
421
+ async def call(self, func: str, items: List[dict], params: Optional[dict] = None, raw: bool = False):
422
+ if not items:
423
+ return items
424
+
425
+ endpoint = f"{self.model_name}/{func}/"
426
+ endpoint = endpoint.lstrip("/")
427
+ payload = {'items': items} if func != 'lookup' else {'query': items}
428
+ if params:
429
+ payload['params'] = params
430
+ try:
431
+ res = await self._api_call(endpoint, payload, raw=raw if func == 'lookup' else False)
432
+ except Exception as e:
433
+ if self.raise_httpx:
434
+ raise
435
+ res = self._format_exception(e, 0)
436
+ res = self._format_result(res)
437
+ if isinstance(res, dict) and ('error' in res or 'status_code' in res):
438
+ return res
439
+ elif isinstance(res, (list, tuple)):
440
+ return list(res)
441
+ else:
442
+ return res
443
+
444
+ async def _batch_call_autoschema_or_manual(
445
+ self,
446
+ func: str,
447
+ items,
448
+ params: Optional[dict] = None,
449
+ stop_on_error: bool = False,
450
+ output: str = 'memory',
451
+ file_path: Optional[str] = None,
452
+ raw: bool = False,
453
+ ):
454
+ if not items:
455
+ return items
456
+
457
+ is_lol, first_n, rest_iter = is_list_of_lists(items)
458
+ results = []
459
+
460
+ async def retry_batch_individually(batch):
461
+ out = []
462
+ for item in batch:
463
+ single_result = await self.call(func, [item], params=params, raw=raw)
464
+ if isinstance(single_result, list) and len(single_result) == 1:
465
+ out.append(single_result[0])
466
+ else:
467
+ out.append(single_result)
468
+ return out
469
+
470
+ if is_lol:
471
+ all_batches = chain(first_n, rest_iter)
472
+ if output == 'disk':
473
+ path = file_path or f"{self.model_name}_{func}_output.jsonl"
474
+ async with aiofiles.open(path, 'w', encoding='utf-8') as file_handle:
475
+ for batch in all_batches:
476
+ batch_results = await self.call(func, batch, params=params, raw=raw)
477
+ if (
478
+ self.retry_error_batches and
479
+ isinstance(batch_results, dict) and
480
+ ('error' in batch_results or 'status_code' in batch_results)
481
+ ):
482
+ batch_results = await retry_batch_individually(batch)
483
+
484
+ if isinstance(batch_results, list):
485
+ assert len(batch_results) == len(batch), (
486
+ f"API returned {len(batch_results)} results for a batch of {len(batch)} items. "
487
+ "This is a contract violation."
488
+ )
489
+ for res in batch_results:
490
+ await file_handle.write(json.dumps(res) + '\n')
491
+ else:
492
+ for _ in batch:
493
+ await file_handle.write(json.dumps(batch_results) + '\n')
494
+ await file_handle.flush()
495
+
496
+ if stop_on_error and (
497
+ (isinstance(batch_results, dict) and ('error' in batch_results or 'status_code' in batch_results)) or
498
+ (isinstance(batch_results, list) and all(isinstance(r, dict) and ('error' in r or 'status_code' in r) for r in batch_results))
499
+ ):
500
+ break
501
+ return
502
+ else:
503
+ for batch in all_batches:
504
+ batch_results = await self.call(func, batch, params=params, raw=raw)
505
+ if (
506
+ self.retry_error_batches and
507
+ isinstance(batch_results, dict) and
508
+ ('error' in batch_results or 'status_code' in batch_results)
509
+ ):
510
+ batch_results = await retry_batch_individually(batch)
511
+ if isinstance(batch_results, dict) and ('error' in batch_results or 'status_code' in batch_results):
512
+ results.extend([batch_results] * len(batch))
513
+ if stop_on_error:
514
+ break
515
+ elif isinstance(batch_results, list):
516
+ assert len(batch_results) == len(batch), (
517
+ f"API returned {len(batch_results)} results for a batch of {len(batch)} items. "
518
+ "This is a contract violation."
519
+ )
520
+ results.extend(batch_results)
521
+ if stop_on_error and all(isinstance(r, dict) and ('error' in r or 'status_code' in r) for r in batch_results):
522
+ break
523
+ else:
524
+ results.append(batch_results)
525
+ return self._unwrap_single(results) if self.unwrap_single and len(results) == 1 else results
526
+
527
+ all_items = chain(first_n, rest_iter)
528
+ max_batch = await self._get_max_batch_size(self.model_name, func) or 1
529
+
530
+ if output == 'disk':
531
+ path = file_path or f"{self.model_name}_{func}_output.jsonl"
532
+ async with aiofiles.open(path, 'w', encoding='utf-8') as file_handle:
533
+ for batch in batch_iterable(all_items, max_batch):
534
+ batch_results = await self.call(func, batch, params=params, raw=raw)
535
+
536
+ if (
537
+ self.retry_error_batches and
538
+ isinstance(batch_results, dict) and
539
+ ('error' in batch_results or 'status_code' in batch_results)
540
+ ):
541
+ batch_results = await retry_batch_individually(batch)
542
+ # After retry, always treat as list
543
+ for res in batch_results:
544
+ to_dump = res[0] if (raw and isinstance(res, tuple)) else res
545
+ await file_handle.write(json.dumps(to_dump) + '\n')
546
+ await file_handle.flush()
547
+ if stop_on_error and all(isinstance(r, dict) and ('error' in r or 'status_code' in r) for r in batch_results):
548
+ break
549
+ continue # move to next batch
550
+
551
+ if isinstance(batch_results, dict) and ('error' in batch_results or 'status_code' in batch_results):
552
+ for _ in batch:
553
+ await file_handle.write(json.dumps(batch_results) + '\n')
554
+ await file_handle.flush()
555
+ if stop_on_error:
556
+ break
557
+ else:
558
+ if not isinstance(batch_results, list):
559
+ batch_results = [batch_results]
560
+ assert len(batch_results) == len(batch), (
561
+ f"API returned {len(batch_results)} results for a batch of {len(batch)} items. "
562
+ "This is a contract violation."
563
+ )
564
+ for res in batch_results:
565
+ to_dump = res[0] if (raw and isinstance(res, tuple)) else res
566
+ await file_handle.write(json.dumps(to_dump) + '\n')
567
+ await file_handle.flush()
568
+ if stop_on_error and all(isinstance(r, dict) and ('error' in r or 'status_code' in r) for r in batch_results):
569
+ break
570
+
571
+ return
572
+ else:
573
+ for batch in batch_iterable(all_items, max_batch):
574
+ batch_results = await self.call(func, batch, params=params, raw=raw)
575
+
576
+ if (
577
+ self.retry_error_batches and
578
+ isinstance(batch_results, dict) and
579
+ ('error' in batch_results or 'status_code' in batch_results)
580
+ ):
581
+ batch_results = await retry_batch_individually(batch)
582
+ results.extend(batch_results)
583
+ if stop_on_error and any(isinstance(r, dict) and ('error' in r or 'status_code' in r) for r in batch_results):
584
+ break
585
+ continue # move to next batch
586
+
587
+
588
+ if isinstance(batch_results, dict) and ('error' in batch_results or 'status_code' in batch_results):
589
+ results.extend([batch_results] * len(batch))
590
+ if stop_on_error:
591
+ break
592
+ else:
593
+ if not isinstance(batch_results, list):
594
+ batch_results = [batch_results]
595
+ assert len(batch_results) == len(batch), (
596
+ f"API returned {len(batch_results)} results for a batch of {len(batch)} items. "
597
+ "This is a contract violation."
598
+ )
599
+ results.extend(batch_results)
600
+ if stop_on_error and all(isinstance(r, dict) and ('error' in r or 'status_code' in r) for r in batch_results):
601
+ break
602
+
603
+ return self._unwrap_single(results) if self.unwrap_single and len(results) == 1 else results
604
+
605
+ @staticmethod
606
+ def _format_result(res: Union[dict, List[dict], Tuple[dict, int]]) -> Union[dict, List[dict], Tuple[dict, int]]:
607
+ if isinstance(res, dict) and 'results' in res:
608
+ return res['results']
609
+ elif isinstance(res, list):
610
+ if all(isinstance(x, dict) for x in res):
611
+ return res
612
+ raise ValueError("Unexpected response format")
613
+ elif isinstance(res, dict) and ('error' in res or 'status_code' in res):
614
+ return res
615
+ return res
616
+
617
+
618
+ def _format_exception(self, exc: Exception, index: int) -> dict:
619
+ return {"error": str(exc), "index": index}
620
+
621
+ @staticmethod
622
+ def _unwrap_single(result):
623
+ if isinstance(result, list) and len(result) == 1:
624
+ return result[0]
625
+ return result
626
+
627
+ @type_check({'items': (list, tuple), 'params': (dict, OrderedDict, None)})
628
+ async def generate(
629
+ self,
630
+ *,
631
+ items: List[dict],
632
+ params: Optional[dict] = None,
633
+ stop_on_error: bool = False,
634
+ output: str = 'memory',
635
+ file_path: Optional[str] = None,
636
+ ):
637
+ return await self._batch_call_autoschema_or_manual(
638
+ "generate", items, params=params, stop_on_error=stop_on_error, output=output, file_path=file_path
639
+ )
640
+
641
+ @type_check({'items': (list, tuple), 'params': (dict, OrderedDict, None)})
642
+ async def predict(
643
+ self,
644
+ *,
645
+ items: List[dict],
646
+ params: Optional[dict] = None,
647
+ stop_on_error: bool = False,
648
+ output: str = 'memory',
649
+ file_path: Optional[str] = None,
650
+ ):
651
+ return await self._batch_call_autoschema_or_manual(
652
+ "predict", items, params=params, stop_on_error=stop_on_error, output=output, file_path=file_path
653
+ )
654
+
655
+ @type_check({'items': (list, tuple), 'params': (dict, OrderedDict, None)})
656
+ async def encode(
657
+ self,
658
+ *,
659
+ items: List[dict],
660
+ params: Optional[dict] = None,
661
+ stop_on_error: bool = False,
662
+ output: str = 'memory',
663
+ file_path: Optional[str] = None,
664
+ ):
665
+ return await self._batch_call_autoschema_or_manual(
666
+ "encode", items, params=params, stop_on_error=stop_on_error, output=output, file_path=file_path
667
+ )
668
+
669
+ async def lookup(
670
+ self,
671
+ query: Union[dict, List[dict]],
672
+ *,
673
+ raw: bool = False,
674
+ output: str = 'memory',
675
+ file_path: Optional[str] = None,
676
+ ):
677
+ items = query if isinstance(query, list) else [query]
678
+ res = await self.call("lookup", items, params=None, raw=raw)
679
+ if raw:
680
+ single = len(items) == 1
681
+ if single:
682
+ data, resp = res
683
+ return LookupResult(data, resp)
684
+ return [LookupResult(r[0], r[1]) for r in res]
685
+ return res
686
+
687
+ async def shutdown(self):
688
+ await self._http_client.close()
689
+
690
+ async def __aenter__(self):
691
+ return self
692
+
693
+ async def __aexit__(self, exc_type, exc, tb):
694
+ await self.shutdown()
695
+
696
+ # Synchronous wrapper for compatibility
697
+ @_synchronizer.sync
698
+ class BioLMApi(BioLMApiClient):
699
+ pass
@@ -1,10 +1,10 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: biolmai
3
- Version: 0.1.10
4
- Summary: Python client and SDK for https://biolm.ai
3
+ Version: 0.2.2
4
+ Summary: BioLM Python client
5
5
  Home-page: https://github.com/BioLM/py-biolm
6
- Author: Nikhil Haas
7
- Author-email: nhaas@biolm.ai
6
+ Author: BioLM
7
+ Author-email: BioLM <support@biolm.ai>
8
8
  License: Apache Software License 2.0
9
9
  Keywords: biolmai
10
10
  Classifier: Development Status :: 2 - Pre-Alpha
@@ -12,20 +12,30 @@ Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Natural Language :: English
14
14
  Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.7
16
15
  Classifier: Programming Language :: Python :: 3.8
17
16
  Classifier: Programming Language :: Python :: 3.9
18
17
  Classifier: Programming Language :: Python :: 3.10
19
18
  Classifier: Programming Language :: Python :: 3.11
20
- Requires-Python: >=3.6
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Requires-Python: >=3.7
22
+ Description-Content-Type: text/x-rst
21
23
  License-File: LICENSE
22
24
  License-File: AUTHORS.rst
23
- Requires-Dist: Click (>=6.0)
25
+ Requires-Dist: httpx>=0.23.0
26
+ Requires-Dist: httpcore
27
+ Requires-Dist: Click>=6.0
24
28
  Requires-Dist: requests
25
- Requires-Dist: pandas
26
- Requires-Dist: aiohttp
27
- Provides-Extra: aiodns
28
- Requires-Dist: aiodns ; extra == 'aiodns'
29
+ Requires-Dist: aiodns
30
+ Requires-Dist: synchronicity>=0.5.0
31
+ Requires-Dist: aiohttp<=3.8.6; python_version < "3.12"
32
+ Requires-Dist: aiohttp>=3.9.0; python_version >= "3.12"
33
+ Requires-Dist: async-lru
34
+ Requires-Dist: aiofiles
35
+ Dynamic: author
36
+ Dynamic: home-page
37
+ Dynamic: license-file
38
+ Dynamic: requires-python
29
39
 
30
40
  ========
31
41
  BioLM AI
@@ -64,13 +74,3 @@ This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypack
64
74
 
65
75
  .. _Cookiecutter: https://github.com/audreyr/cookiecutter
66
76
  .. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
67
-
68
-
69
- =======
70
- History
71
- =======
72
-
73
- 0.1.0 (2023-09-04)
74
- ------------------
75
-
76
- * First release on PyPI.
@@ -0,0 +1,19 @@
1
+ biolmai/__init__.py,sha256=kEsgaNE96KwaHai2Bbwqa0G2qrcytyHYJ2kC-O9CmPs,248
2
+ biolmai/api.py,sha256=tqxQ-FoZosE88YmLJPQKskjNQdcb5jzZccywGP73lDc,13002
3
+ biolmai/asynch.py,sha256=BVypJhhEEK2Bek2AhqNGn7FIRJehAbJflUdeeslbXFE,9073
4
+ biolmai/auth.py,sha256=flI9KAD90qdXyLDnpJTrc9voKsiK0uWtD2ehsPBn8r4,6329
5
+ biolmai/biolmai.py,sha256=_NxDPiwT7cnKgnRCRoGZvzBd4jVHJ8DNCuSu3FTznCs,4373
6
+ biolmai/cli.py,sha256=bdb4q8QlN73A6Ttz0e-dBIwoct7PYqy5WSc52jCMIyU,1967
7
+ biolmai/client.py,sha256=nD6sjjnQGinn0tzDVxaKWhsw2AQ3VNhgbsX-Smm9ghc,28310
8
+ biolmai/cls.py,sha256=Hiy_Qoj2Eb43oltnEUdJfMPCsOeFKZ-GUNljF-yShug,4287
9
+ biolmai/const.py,sha256=vCSj-itsusZWoLR27DYQSpuq024GQz3-uKJuDUoPF0Y,1153
10
+ biolmai/ltc.py,sha256=al7HZc5tLyUR5fmpIb95hOz5ctudVsc0xzjd_c2Ew3M,49
11
+ biolmai/payloads.py,sha256=BOhEKl9kWkKMXy1YiNw2_eC6MJ4Dn6vKNvkhEBsM7Lw,1735
12
+ biolmai/validate.py,sha256=58XMWrdWoDRmfiNAayWqrYaH3_bjRmEpG_yx6XSjTrM,4168
13
+ biolmai-0.2.2.dist-info/licenses/AUTHORS.rst,sha256=TB_ACuFPgVmxn1NspYwksTdT6jdZeShcxfafmi-XWKQ,158
14
+ biolmai-0.2.2.dist-info/licenses/LICENSE,sha256=8yt0SdP38I7a3g0zWqZjNe0VSDQhJA4bWLQSqqKtAVg,583
15
+ biolmai-0.2.2.dist-info/METADATA,sha256=U6aKtxUUgi1XZY5WNDsW-rxhHBw2N2uH6rzgfYxpvSg,2191
16
+ biolmai-0.2.2.dist-info/WHEEL,sha256=Td9E1opt19FSuwsk_gcDwtsGPmyXw7uz9xQf-y2gvl8,109
17
+ biolmai-0.2.2.dist-info/entry_points.txt,sha256=ylQnDpCYrxF1F9z_T7NRQcYMWYF5ia_KsTUuboxjEAM,44
18
+ biolmai-0.2.2.dist-info/top_level.txt,sha256=jyQO45JN3g_jbdI8WqMnb0aEIzf4h1MrmPAZkKgfnwY,8
19
+ biolmai-0.2.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.38.4)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py2-none-any
5
5
  Tag: py3-none-any
@@ -1,18 +0,0 @@
1
- biolmai/__init__.py,sha256=fpuxOgNf_H_6yin7NJLjtwJVcAjT60Nw0hie5OVDFms,163
2
- biolmai/api.py,sha256=1T38KUoOiPl8IjXfxsypIKGraLNcjtlDbtkrvohEZJU,12959
3
- biolmai/asynch.py,sha256=BVypJhhEEK2Bek2AhqNGn7FIRJehAbJflUdeeslbXFE,9073
4
- biolmai/auth.py,sha256=flI9KAD90qdXyLDnpJTrc9voKsiK0uWtD2ehsPBn8r4,6329
5
- biolmai/biolmai.py,sha256=xwjAvuw6AtmQdkRf_usSGUZ-k2oU-fjl82_WAgfSvVE,74
6
- biolmai/cli.py,sha256=bdb4q8QlN73A6Ttz0e-dBIwoct7PYqy5WSc52jCMIyU,1967
7
- biolmai/cls.py,sha256=Hiy_Qoj2Eb43oltnEUdJfMPCsOeFKZ-GUNljF-yShug,4287
8
- biolmai/const.py,sha256=vCSj-itsusZWoLR27DYQSpuq024GQz3-uKJuDUoPF0Y,1153
9
- biolmai/ltc.py,sha256=al7HZc5tLyUR5fmpIb95hOz5ctudVsc0xzjd_c2Ew3M,49
10
- biolmai/payloads.py,sha256=BOhEKl9kWkKMXy1YiNw2_eC6MJ4Dn6vKNvkhEBsM7Lw,1735
11
- biolmai/validate.py,sha256=58XMWrdWoDRmfiNAayWqrYaH3_bjRmEpG_yx6XSjTrM,4168
12
- biolmai-0.1.10.dist-info/AUTHORS.rst,sha256=TB_ACuFPgVmxn1NspYwksTdT6jdZeShcxfafmi-XWKQ,158
13
- biolmai-0.1.10.dist-info/LICENSE,sha256=8yt0SdP38I7a3g0zWqZjNe0VSDQhJA4bWLQSqqKtAVg,583
14
- biolmai-0.1.10.dist-info/METADATA,sha256=3sNY6gTfD85EfQV1SkvcmXKTGS_EnVHn__n42ooIVDs,1973
15
- biolmai-0.1.10.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
16
- biolmai-0.1.10.dist-info/entry_points.txt,sha256=ylQnDpCYrxF1F9z_T7NRQcYMWYF5ia_KsTUuboxjEAM,44
17
- biolmai-0.1.10.dist-info/top_level.txt,sha256=jyQO45JN3g_jbdI8WqMnb0aEIzf4h1MrmPAZkKgfnwY,8
18
- biolmai-0.1.10.dist-info/RECORD,,