pybiolib 1.1.1691__tar.gz → 1.1.1990__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/PKG-INFO +1 -2
  2. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/__init__.py +11 -3
  3. pybiolib-1.1.1990/biolib/_internal/data_record/__init__.py +1 -0
  4. pybiolib-1.1.1990/biolib/_internal/data_record/data_record.py +166 -0
  5. pybiolib-1.1.1990/biolib/_internal/data_record/remote_storage_endpoint.py +27 -0
  6. pybiolib-1.1.1990/biolib/_internal/fuse_mount/__init__.py +1 -0
  7. pybiolib-1.1.1990/biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  8. pybiolib-1.1.1990/biolib/_internal/http_client.py +136 -0
  9. pybiolib-1.1.1990/biolib/_internal/libs/__init__.py +1 -0
  10. pybiolib-1.1.1990/biolib/_internal/libs/fusepy/__init__.py +1257 -0
  11. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/_internal/push_application.py +22 -37
  12. pybiolib-1.1.1990/biolib/_internal/runtime.py +96 -0
  13. pybiolib-1.1.1990/biolib/_internal/utils/__init__.py +18 -0
  14. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/api/client.py +12 -6
  15. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/app/app.py +38 -72
  16. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/app/search_apps.py +8 -12
  17. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/api_client.py +14 -9
  18. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/app_types.py +2 -0
  19. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/auth.py +0 -12
  20. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/biolib_app_api.py +5 -8
  21. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/biolib_job_api.py +11 -40
  22. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/job_types.py +2 -1
  23. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/remote_endpoints.py +12 -10
  24. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/utils.py +41 -4
  25. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/cli/__init__.py +6 -2
  26. pybiolib-1.1.1990/biolib/cli/auth.py +58 -0
  27. pybiolib-1.1.1990/biolib/cli/data_record.py +43 -0
  28. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/cli/download_container.py +3 -1
  29. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/cli/init.py +1 -0
  30. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/cli/lfs.py +39 -9
  31. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/cli/run.py +3 -2
  32. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/cli/start.py +1 -0
  33. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/cloud_utils/cloud_utils.py +38 -65
  34. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/cache_state.py +1 -1
  35. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/docker_executor.py +126 -112
  36. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_storage.py +11 -16
  37. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_worker.py +36 -17
  38. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/remote_host_proxy.py +67 -29
  39. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/webserver/worker_thread.py +2 -2
  40. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/experiments/experiment.py +29 -30
  41. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/jobs/job.py +120 -46
  42. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/jobs/job_result.py +16 -16
  43. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/jobs/types.py +1 -1
  44. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/lfs/__init__.py +0 -2
  45. pybiolib-1.1.1990/biolib/lfs/utils.py +153 -0
  46. pybiolib-1.1.1990/biolib/runtime/__init__.py +13 -0
  47. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/sdk/__init__.py +17 -4
  48. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/user/sign_in.py +8 -12
  49. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/utils/__init__.py +17 -45
  50. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/utils/app_uri.py +11 -4
  51. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/utils/cache_state.py +2 -2
  52. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/utils/multipart_uploader.py +42 -68
  53. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/utils/seq_util.py +29 -13
  54. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/utils/zip/remote_zip.py +9 -17
  55. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/pyproject.toml +14 -12
  56. pybiolib-1.1.1691/biolib/_internal/http_client.py +0 -113
  57. pybiolib-1.1.1691/biolib/biolib_api_client/biolib_account_api.py +0 -21
  58. pybiolib-1.1.1691/biolib/biolib_api_client/biolib_large_file_system_api.py +0 -53
  59. pybiolib-1.1.1691/biolib/lfs/utils.py +0 -245
  60. pybiolib-1.1.1691/biolib/runtime/__init__.py +0 -1
  61. pybiolib-1.1.1691/biolib/runtime/results.py +0 -20
  62. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/LICENSE +0 -0
  63. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/README.md +0 -0
  64. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/_internal/__init__.py +0 -0
  65. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/api/__init__.py +0 -0
  66. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/app/__init__.py +0 -0
  67. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/__init__.py +0 -0
  68. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/common_types.py +0 -0
  69. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/lfs_types.py +0 -0
  70. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_api_client/user_state.py +0 -0
  71. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/__init__.py +0 -0
  72. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/base_bbf_package.py +0 -0
  73. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/file_in_container.py +0 -0
  74. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/module_input.py +0 -0
  75. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/module_output_v2.py +0 -0
  76. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/remote_stream_seeker.py +0 -0
  77. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/saved_job.py +0 -0
  78. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/stdout_and_stderr.py +0 -0
  79. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/system_exception.py +0 -0
  80. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_binary_format/system_status_update.py +0 -0
  81. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_docker_client/__init__.py +0 -0
  82. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_download_container.py +0 -0
  83. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_errors.py +0 -0
  84. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/biolib_logging.py +0 -0
  85. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/cli/push.py +1 -1
  86. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/cli/runtime.py +0 -0
  87. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/.gitignore +0 -0
  88. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/__init__.py +0 -0
  89. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/cloud_utils/__init__.py +0 -0
  90. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/__init__.py +0 -0
  91. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/cache_types.py +0 -0
  92. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/docker_image_cache.py +0 -0
  93. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/__init__.py +0 -0
  94. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/docker_types.py +0 -0
  95. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/tars/__init__.py +0 -0
  96. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/types.py +0 -0
  97. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +0 -0
  98. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +0 -0
  99. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/large_file_system.py +0 -0
  100. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/mappings.py +0 -0
  101. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/utilization_reporter_thread.py +0 -0
  102. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/utils.py +0 -0
  103. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/socker_listener_thread.py +0 -0
  104. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/socket_sender_thread.py +0 -0
  105. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/utils.py +0 -0
  106. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/webserver/__init__.py +0 -0
  107. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/webserver/gunicorn_flask_application.py +0 -0
  108. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/webserver/webserver.py +0 -0
  109. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/webserver/webserver_types.py +0 -0
  110. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/compute_node/webserver/webserver_utils.py +0 -0
  111. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/experiments/__init__.py +0 -0
  112. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/experiments/types.py +0 -0
  113. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/jobs/__init__.py +0 -0
  114. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/lfs/cache.py +0 -0
  115. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/tables.py +0 -0
  116. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/templates/__init__.py +0 -0
  117. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/templates/example_app.py +0 -0
  118. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/typing_utils.py +0 -0
  119. {pybiolib-1.1.1691 → pybiolib-1.1.1990}/biolib/user/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.1.1691
3
+ Version: 1.1.1990
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -25,7 +25,6 @@ Requires-Dist: flask (>=2.0.1) ; extra == "compute-node"
25
25
  Requires-Dist: gunicorn (>=20.1.0) ; extra == "compute-node"
26
26
  Requires-Dist: importlib-metadata (>=1.6.1)
27
27
  Requires-Dist: pyyaml (>=5.3.1)
28
- Requires-Dist: requests (>=2.25.1)
29
28
  Requires-Dist: rich (>=12.4.4)
30
29
  Requires-Dist: typing_extensions (>=3.10.0) ; python_version < "3.8"
31
30
  Description-Content-Type: text/markdown
@@ -1,5 +1,6 @@
1
1
  # Imports to hide
2
2
  import os
3
+ from urllib.parse import urlparse as _urlparse
3
4
 
4
5
  from biolib import typing_utils as _typing_utils
5
6
  from biolib.app import BioLibApp as _BioLibApp
@@ -12,10 +13,12 @@ from biolib.biolib_api_client import BiolibApiClient as _BioLibApiClient, App
12
13
  from biolib.jobs import Job as _Job
13
14
  from biolib import user as _user
14
15
  from biolib.typing_utils import List, Optional
16
+ from biolib._internal.data_record import DataRecord as _DataRecord
15
17
 
16
18
  import biolib.api
17
19
  import biolib.app
18
20
  import biolib.cli
21
+ import biolib.sdk
19
22
  import biolib.utils
20
23
 
21
24
 
@@ -32,8 +35,8 @@ def load(uri: str) -> _BioLibApp:
32
35
  def search(
33
36
  search_query: Optional[str] = None,
34
37
  team: Optional[str] = None,
35
- count: int = 100
36
- ) -> List[str]:
38
+ count: int = 100,
39
+ ) -> List[str]:
37
40
  apps: List[str] = search_apps(search_query, team, count)
38
41
  return apps
39
42
 
@@ -46,6 +49,10 @@ def fetch_jobs(count: int = 25) -> List[_Job]:
46
49
  return _Job.fetch_jobs(count)
47
50
 
48
51
 
52
+ def fetch_data_records(uri: Optional[str] = None, count: Optional[int] = None) -> List[_DataRecord]:
53
+ return _DataRecord.fetch(uri, count)
54
+
55
+
49
56
  def get_experiment(name: str) -> Experiment:
50
57
  return Experiment(name)
51
58
 
@@ -77,6 +84,7 @@ def logout() -> None:
77
84
  def set_api_base_url(api_base_url: str) -> None:
78
85
  _BioLibApiClient.initialize(base_url=api_base_url)
79
86
  biolib.utils.BIOLIB_BASE_URL = api_base_url
87
+ biolib.utils.BIOLIB_SITE_HOSTNAME = _urlparse(api_base_url).hostname
80
88
  biolib.utils.BASE_URL_IS_PUBLIC_BIOLIB = api_base_url.endswith('biolib.com') or (
81
89
  os.environ.get('BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB', '').upper() == 'TRUE'
82
90
  )
@@ -127,4 +135,4 @@ _logger.configure(default_log_level=_DEFAULT_LOG_LEVEL)
127
135
  _logger_no_user_data.configure(default_log_level=_DEFAULT_LOG_LEVEL)
128
136
  _configure_requests_certificates()
129
137
 
130
- set_api_base_url(biolib.utils.BIOLIB_BASE_URL)
138
+ set_api_base_url(biolib.utils.load_base_url_from_env())
@@ -0,0 +1 @@
1
+ from .data_record import DataRecord
@@ -0,0 +1,166 @@
1
+ import os
2
+ from collections import namedtuple
3
+ from datetime import datetime
4
+ from fnmatch import fnmatch
5
+ from struct import Struct
6
+ from typing import Callable, Dict, List, Optional, Union, cast
7
+
8
+ from biolib import lfs
9
+ from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
10
+ from biolib._internal.http_client import HttpClient
11
+ from biolib.api import client as api_client
12
+ from biolib.biolib_api_client import AppGetResponse
13
+ from biolib.biolib_binary_format import LazyLoadedFile
14
+ from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
15
+ from biolib.biolib_logging import logger
16
+ from biolib.utils.app_uri import parse_app_uri
17
+ from biolib.utils.zip.remote_zip import RemoteZip # type: ignore
18
+
19
+ PathFilter = Union[str, Callable[[str], bool]]
20
+
21
+
22
+ class DataRecord:
23
+ def __init__(self, uri: str):
24
+ self._uri = uri
25
+ uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
26
+ if not uri_parsed['app_name']:
27
+ raise ValueError('Expected parameter "uri" to contain resource name')
28
+
29
+ self._name = uri_parsed['app_name']
30
+
31
+ def __repr__(self):
32
+ return f'DataRecord: {self._uri}'
33
+
34
+ @property
35
+ def uri(self) -> str:
36
+ return self._uri
37
+
38
+ @property
39
+ def name(self) -> str:
40
+ return self._name
41
+
42
+ def list_files(self, path_filter: Optional[PathFilter] = None) -> List[LazyLoadedFile]:
43
+ app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
44
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
45
+ resource_version_uuid=app_response['app_version']['public_id'],
46
+ )
47
+ files: List[LazyLoadedFile] = []
48
+ with RemoteZip(url=remote_storage_endpoint.get_remote_url()) as remote_zip:
49
+ central_directory = remote_zip.get_central_directory()
50
+ for file_info in central_directory.values():
51
+ files.append(self._get_file(remote_storage_endpoint, file_info))
52
+
53
+ return self._get_filtered_files(files=files, path_filter=path_filter) if path_filter else files
54
+
55
+ def download_zip(self, output_path: str):
56
+ app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
57
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
58
+ resource_version_uuid=app_response['app_version']['public_id'],
59
+ )
60
+ HttpClient.request(url=remote_storage_endpoint.get_remote_url(), response_path=output_path)
61
+
62
+ def download_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
63
+ filtered_files = self.list_files(path_filter=path_filter)
64
+
65
+ if len(filtered_files) == 0:
66
+ logger.debug('No files to save')
67
+ return
68
+
69
+ for file in filtered_files:
70
+ file_path = os.path.join(output_dir, file.path)
71
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
72
+ with open(file_path, mode='wb') as file_handle:
73
+ for chunk in file.get_data_iterator():
74
+ file_handle.write(chunk)
75
+
76
+ def save_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
77
+ self.download_files(output_dir=output_dir, path_filter=path_filter)
78
+
79
+ @staticmethod
80
+ def create(destination: str, data_path: str, name: Optional[str] = None) -> 'DataRecord':
81
+ assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
82
+ record_name = name if name else 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
83
+ record_uri = lfs.create_large_file_system(lfs_uri=f'{destination}/{record_name}')
84
+ record_version_uri = lfs.push_large_file_system(lfs_uri=record_uri, input_dir=data_path)
85
+ return DataRecord(uri=record_version_uri)
86
+
87
+ @staticmethod
88
+ def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
89
+ max_page_size = 1_000
90
+ params: Dict[str, Union[str, int]] = {
91
+ 'page_size': str(count or max_page_size),
92
+ 'resource_type': 'data-record',
93
+ }
94
+ if uri:
95
+ uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
96
+ params['account_handle'] = uri_parsed['account_handle_normalized']
97
+ if uri_parsed['app_name_normalized']:
98
+ params['app_name'] = uri_parsed['app_name_normalized']
99
+
100
+ results = api_client.get(path='/apps/', params=params).json()['results']
101
+ if count is None and len(results) == max_page_size:
102
+ logger.warning(
103
+ f'Fetch results exceeded maximum count of {max_page_size}. Some data records might not be fetched.'
104
+ )
105
+
106
+ return [DataRecord(result['resource_uri']) for result in results]
107
+
108
+ @staticmethod
109
+ def _get_file(remote_storage_endpoint: DataRecordRemoteStorageEndpoint, file_info: Dict) -> LazyLoadedFile:
110
+ local_file_header_signature_bytes = b'\x50\x4b\x03\x04'
111
+ local_file_header_struct = Struct('<H2sHHHIIIHH')
112
+ LocalFileHeader = namedtuple(
113
+ 'LocalFileHeader',
114
+ (
115
+ 'version',
116
+ 'flags',
117
+ 'compression_raw',
118
+ 'mod_time',
119
+ 'mod_date',
120
+ 'crc_32_expected',
121
+ 'compressed_size_raw',
122
+ 'uncompressed_size_raw',
123
+ 'file_name_len',
124
+ 'extra_field_len',
125
+ ),
126
+ )
127
+
128
+ local_file_header_start = file_info['header_offset'] + len(local_file_header_signature_bytes)
129
+ local_file_header_end = local_file_header_start + local_file_header_struct.size
130
+
131
+ def file_start_func() -> int:
132
+ local_file_header_response = HttpClient.request(
133
+ url=remote_storage_endpoint.get_remote_url(),
134
+ headers={'range': f'bytes={local_file_header_start}-{local_file_header_end - 1}'},
135
+ timeout_in_seconds=300,
136
+ )
137
+ local_file_header = LocalFileHeader._make(
138
+ local_file_header_struct.unpack(local_file_header_response.content)
139
+ )
140
+ file_start: int = (
141
+ local_file_header_end + local_file_header.file_name_len + local_file_header.extra_field_len
142
+ )
143
+ return file_start
144
+
145
+ return LazyLoadedFile(
146
+ buffer=RemoteIndexableBuffer(endpoint=remote_storage_endpoint),
147
+ length=file_info['file_size'],
148
+ path=file_info['filename'],
149
+ start=None,
150
+ start_func=file_start_func,
151
+ )
152
+
153
+ @staticmethod
154
+ def _get_filtered_files(files: List[LazyLoadedFile], path_filter: PathFilter) -> List[LazyLoadedFile]:
155
+ if not (isinstance(path_filter, str) or callable(path_filter)):
156
+ raise Exception('Expected path_filter to be a string or a function')
157
+
158
+ if callable(path_filter):
159
+ return list(filter(lambda x: path_filter(x.path), files)) # type: ignore
160
+
161
+ glob_filter = cast(str, path_filter)
162
+
163
+ def _filter_function(file: LazyLoadedFile) -> bool:
164
+ return fnmatch(file.path, glob_filter)
165
+
166
+ return list(filter(_filter_function, files))
@@ -0,0 +1,27 @@
1
+ from datetime import datetime, timedelta
2
+
3
+ from biolib.api import client as api_client
4
+ from biolib.biolib_api_client.lfs_types import LargeFileSystemVersion
5
+ from biolib.biolib_binary_format.utils import RemoteEndpoint
6
+ from biolib.biolib_logging import logger
7
+
8
+
9
+ class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
10
+ def __init__(self, resource_version_uuid: str):
11
+ self._resource_version_uuid: str = resource_version_uuid
12
+ self._expires_at = None
13
+ self._presigned_url = None
14
+
15
+ def get_remote_url(self):
16
+ if not self._presigned_url or datetime.utcnow() > self._expires_at:
17
+ lfs_version: LargeFileSystemVersion = api_client.get(
18
+ path=f'/lfs/versions/{self._resource_version_uuid}/',
19
+ ).json()
20
+ self._presigned_url = lfs_version['presigned_download_url']
21
+ self._expires_at = datetime.utcnow() + timedelta(minutes=8)
22
+ logger.debug(
23
+ f'DataRecord "{self._resource_version_uuid}" fetched presigned URL '
24
+ f'with expiry at {self._expires_at.isoformat()}'
25
+ )
26
+
27
+ return self._presigned_url
@@ -0,0 +1 @@
1
+ from .experiment_fuse_mount import ExperimentFuseMount
@@ -0,0 +1,209 @@
1
+ import errno
2
+ import os
3
+ import stat
4
+ from datetime import datetime, timezone
5
+ from time import time
6
+
7
+ from biolib._internal.libs.fusepy import FUSE, FuseOSError, Operations
8
+ from biolib.biolib_errors import BioLibError
9
+ from biolib.jobs import Job
10
+ from biolib.typing_utils import Dict, List, Optional, Tuple, TypedDict
11
+
12
+
13
+ class _AttributeDict(TypedDict):
14
+ st_atime: int
15
+ st_ctime: int
16
+ st_gid: int
17
+ st_mode: int
18
+ st_mtime: int
19
+ st_nlink: int
20
+ st_size: int
21
+ st_uid: int
22
+
23
+
24
+ _SUCCESS_CODE = 0
25
+
26
+
27
+ class ExperimentFuseMount(Operations):
28
+ def __init__(self, experiment):
29
+ self._experiment = experiment
30
+ self._job_names_map: Optional[Dict[str, Job]] = None
31
+ self._jobs_last_fetched_at: float = 0.0
32
+ self._mounted_at_epoch_seconds: int = int(time())
33
+
34
+ @staticmethod
35
+ def mount_experiment(experiment, mount_path: str) -> None:
36
+ FUSE(
37
+ operations=ExperimentFuseMount(experiment),
38
+ mountpoint=mount_path,
39
+ nothreads=True,
40
+ foreground=True,
41
+ allow_other=False,
42
+ )
43
+
44
+ def getattr(self, path: str, fh=None) -> _AttributeDict:
45
+ if path == '/':
46
+ return self._get_directory_attributes(timestamp_epoch_seconds=self._mounted_at_epoch_seconds)
47
+
48
+ job, path_in_job = self._parse_path(path)
49
+ job_finished_at_epoch_seconds: int = int(
50
+ datetime.fromisoformat(job.to_dict()['finished_at'].rstrip('Z')).replace(tzinfo=timezone.utc).timestamp()
51
+ )
52
+
53
+ if path_in_job == '/':
54
+ return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
55
+
56
+ try:
57
+ file = job.get_output_file(path_in_job)
58
+ return self._get_file_attributes(
59
+ timestamp_epoch_seconds=job_finished_at_epoch_seconds,
60
+ size_in_bytes=file.length,
61
+ )
62
+ except BioLibError:
63
+ # file not found
64
+ pass
65
+
66
+ file_paths_in_job = [file.path for file in job.list_output_files()]
67
+
68
+ for file_path_in_job in file_paths_in_job:
69
+ if file_path_in_job.startswith(path_in_job):
70
+ return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
71
+
72
+ raise FuseOSError(errno.ENOENT) from None # No such file or directory
73
+
74
+ def readdir(self, path: str, fh: int) -> List[str]:
75
+ directory_entries = ['.', '..']
76
+
77
+ if path == '/':
78
+ directory_entries.extend(self._get_job_names_map(refresh_jobs=True).keys())
79
+ else:
80
+ job, path_in_job = self._parse_path(path)
81
+ dir_path_in_job = '/' if path_in_job == '/' else path_in_job + '/'
82
+ depth = dir_path_in_job.count('/')
83
+ directory_entries.extend(
84
+ set(
85
+ [
86
+ file.path.split('/')[depth]
87
+ for file in job.list_output_files()
88
+ if file.path.startswith(dir_path_in_job)
89
+ ]
90
+ )
91
+ )
92
+
93
+ return directory_entries
94
+
95
+ def open(self, path: str, flags: int) -> int:
96
+ job, path_in_job = self._parse_path(path)
97
+ try:
98
+ job.get_output_file(path_in_job)
99
+ except BioLibError:
100
+ # file not found
101
+ raise FuseOSError(errno.ENOENT) from None
102
+
103
+ return 1234 # dummy file handle
104
+
105
+ def read(self, path: str, size: int, offset: int, fh: int) -> bytes:
106
+ job, path_in_job = self._parse_path(path)
107
+ try:
108
+ file = job.get_output_file(path_in_job)
109
+ except BioLibError:
110
+ raise FuseOSError(errno.ENOENT) from None # No such file or directory
111
+
112
+ return file.get_data(start=offset, length=size)
113
+
114
+ def release(self, path: str, fh: int) -> int:
115
+ return _SUCCESS_CODE
116
+
117
+ def releasedir(self, path: str, fh: int) -> int:
118
+ return _SUCCESS_CODE
119
+
120
+ def flush(self, path: str, fh: int) -> int:
121
+ return _SUCCESS_CODE
122
+
123
+ @staticmethod
124
+ def _get_directory_attributes(timestamp_epoch_seconds: int) -> _AttributeDict:
125
+ return _AttributeDict(
126
+ st_atime=timestamp_epoch_seconds,
127
+ st_ctime=timestamp_epoch_seconds,
128
+ st_gid=os.getgid(),
129
+ st_mode=stat.S_IFDIR | 0o555, # Directory that is readable and executable by owner, group, and others.
130
+ st_mtime=timestamp_epoch_seconds,
131
+ st_nlink=1,
132
+ st_size=1,
133
+ st_uid=os.getuid(),
134
+ )
135
+
136
+ @staticmethod
137
+ def _get_file_attributes(timestamp_epoch_seconds: int, size_in_bytes: int) -> _AttributeDict:
138
+ return _AttributeDict(
139
+ st_atime=timestamp_epoch_seconds,
140
+ st_ctime=timestamp_epoch_seconds,
141
+ st_gid=os.getgid(),
142
+ st_mode=stat.S_IFREG | 0o444, # Regular file with read permissions for owner, group, and others.
143
+ st_mtime=timestamp_epoch_seconds,
144
+ st_nlink=1,
145
+ st_size=size_in_bytes,
146
+ st_uid=os.getuid(),
147
+ )
148
+
149
+ def _get_job_names_map(self, refresh_jobs=False) -> Dict[str, Job]:
150
+ current_time = time()
151
+ if not self._job_names_map or (current_time - self._jobs_last_fetched_at > 1 and refresh_jobs):
152
+ self._jobs_last_fetched_at = current_time
153
+ self._job_names_map = {job.get_name(): job for job in self._experiment.get_jobs(status='completed')}
154
+
155
+ return self._job_names_map
156
+
157
+ def _parse_path(self, path: str) -> Tuple[Job, str]:
158
+ path_splitted = path.split('/')
159
+ job_name = path_splitted[1]
160
+ path_in_job = '/' + '/'.join(path_splitted[2:])
161
+ job = self._get_job_names_map().get(job_name)
162
+ if not job:
163
+ raise FuseOSError(errno.ENOENT) # No such file or directory
164
+
165
+ return job, path_in_job
166
+
167
+ # ----------------------------------- File system methods not implemented below -----------------------------------
168
+
169
+ def chmod(self, path, mode):
170
+ raise FuseOSError(errno.EACCES)
171
+
172
+ def chown(self, path, uid, gid):
173
+ raise FuseOSError(errno.EACCES)
174
+
175
+ def mknod(self, path, mode, dev):
176
+ raise FuseOSError(errno.EACCES)
177
+
178
+ def rmdir(self, path):
179
+ raise FuseOSError(errno.EACCES)
180
+
181
+ def mkdir(self, path, mode):
182
+ raise FuseOSError(errno.EACCES)
183
+
184
+ def unlink(self, path):
185
+ raise FuseOSError(errno.EACCES)
186
+
187
+ def symlink(self, target, source):
188
+ raise FuseOSError(errno.EACCES)
189
+
190
+ def rename(self, old, new):
191
+ raise FuseOSError(errno.EACCES)
192
+
193
+ def link(self, target, source):
194
+ raise FuseOSError(errno.EACCES)
195
+
196
+ def utimens(self, path, times=None):
197
+ raise FuseOSError(errno.EACCES)
198
+
199
+ def create(self, path, mode, fi=None):
200
+ raise FuseOSError(errno.EACCES)
201
+
202
+ def write(self, path, data, offset, fh):
203
+ raise FuseOSError(errno.EACCES)
204
+
205
+ def truncate(self, path, length, fh=None):
206
+ raise FuseOSError(errno.EACCES)
207
+
208
+ def fsync(self, path, datasync, fh):
209
+ raise FuseOSError(errno.EACCES)
@@ -0,0 +1,136 @@
1
+ import json
2
+ import platform
3
+ import shutil
4
+ import socket
5
+ import ssl
6
+ import subprocess
7
+ import time
8
+ import urllib.error
9
+ import urllib.parse
10
+ import urllib.request
11
+
12
+ from biolib.biolib_logging import logger_no_user_data
13
+ from biolib.typing_utils import Dict, Literal, Optional, Union, cast
14
+
15
+ _HttpMethod = Literal['GET', 'POST', 'PATCH', 'PUT']
16
+
17
+
18
+ def _create_ssl_context():
19
+ context = ssl.create_default_context()
20
+ try:
21
+ if platform.system() == 'Darwin':
22
+ certificates = subprocess.check_output('security find-certificate -a -p', shell=True).decode('utf-8')
23
+ context.load_verify_locations(cadata=certificates)
24
+ except BaseException:
25
+ pass
26
+ return context
27
+
28
+
29
+ class HttpError(urllib.error.HTTPError):
30
+ def __init__(self, http_error: urllib.error.HTTPError):
31
+ super().__init__(
32
+ url=http_error.url,
33
+ code=http_error.code,
34
+ msg=http_error.msg, # type: ignore
35
+ hdrs=http_error.hdrs, # type: ignore
36
+ fp=http_error.fp,
37
+ )
38
+
39
+ def __str__(self):
40
+ response_text = self.read().decode('utf-8')
41
+ return f'{self.code} Error: {response_text} for url: {self.url}'
42
+
43
+
44
+ class HttpResponse:
45
+ def __init__(self, response, response_path) -> None:
46
+ self.headers: Dict[str, str] = dict(response.headers)
47
+ self.status_code: int = int(response.status)
48
+ self.response_path = response_path
49
+ if self.response_path:
50
+ with open(self.response_path, 'wb') as out_file:
51
+ shutil.copyfileobj(response, out_file)
52
+ else:
53
+ self.content: bytes = response.read()
54
+ self.url: str = response.geturl()
55
+
56
+ @property
57
+ def text(self) -> str:
58
+ if self.response_path:
59
+ with open(self.response_path, 'rb') as fp:
60
+ return cast(str, fp.read().decode('utf-8'))
61
+ else:
62
+ return cast(str, self.content.decode('utf-8'))
63
+
64
+ def json(self):
65
+ return json.loads(self.text)
66
+
67
+
68
+ class HttpClient:
69
+ ssl_context = None
70
+
71
+ @staticmethod
72
+ def request(
73
+ url: str,
74
+ method: Optional[_HttpMethod] = None,
75
+ data: Optional[Union[Dict, bytes]] = None,
76
+ headers: Optional[Dict[str, str]] = None,
77
+ retries: int = 5,
78
+ timeout_in_seconds: Optional[int] = None,
79
+ response_path: Optional[str] = None,
80
+ ) -> HttpResponse:
81
+ if not HttpClient.ssl_context:
82
+ HttpClient.ssl_context = _create_ssl_context()
83
+ headers_to_send = headers or {}
84
+ if isinstance(data, dict):
85
+ headers_to_send['Accept'] = 'application/json'
86
+ headers_to_send['Content-Type'] = 'application/json'
87
+
88
+ request = urllib.request.Request(
89
+ url=url,
90
+ data=json.dumps(data).encode() if isinstance(data, dict) else data,
91
+ headers=headers_to_send,
92
+ method=method or 'GET',
93
+ )
94
+ if timeout_in_seconds is None:
95
+ timeout_in_seconds = 60 if isinstance(data, dict) else 180 # TODO: Calculate timeout based on data size
96
+
97
+ last_error: Optional[Exception] = None
98
+ for retry_count in range(retries + 1):
99
+ if retry_count > 0:
100
+ time.sleep(5 * retry_count)
101
+ logger_no_user_data.debug(f'Retrying HTTP {method} request...')
102
+ try:
103
+ with urllib.request.urlopen(
104
+ request,
105
+ context=HttpClient.ssl_context,
106
+ timeout=timeout_in_seconds,
107
+ ) as response:
108
+ return HttpResponse(response, response_path)
109
+
110
+ except urllib.error.HTTPError as error:
111
+ if error.code == 429:
112
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 429 for "{url}"')
113
+ last_error = error
114
+ elif error.code == 502:
115
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 502 for "{url}"')
116
+ last_error = error
117
+ elif error.code == 503:
118
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 503 for "{url}"')
119
+ last_error = error
120
+ elif error.code == 504:
121
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 504 for "{url}"')
122
+ last_error = error
123
+ else:
124
+ raise HttpError(error) from None
125
+
126
+ except urllib.error.URLError as error:
127
+ if isinstance(error.reason, socket.timeout):
128
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
129
+ last_error = error
130
+ else:
131
+ raise error
132
+ except socket.timeout as error:
133
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
134
+ last_error = error
135
+
136
+ raise last_error or Exception(f'HTTP {method} request failed after {retries} retries for "{url}"')
@@ -0,0 +1 @@
1
+ # Note: this directory is purely for libraries to be directly included instead of as dependencies