pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +357 -11
- biolib/_data_record/data_record.py +380 -0
- biolib/_index/__init__.py +0 -0
- biolib/_index/index.py +55 -0
- biolib/_index/query_result.py +103 -0
- biolib/_internal/__init__.py +0 -0
- biolib/_internal/add_copilot_prompts.py +58 -0
- biolib/_internal/add_gui_files.py +81 -0
- biolib/_internal/data_record/__init__.py +1 -0
- biolib/_internal/data_record/data_record.py +85 -0
- biolib/_internal/data_record/push_data.py +116 -0
- biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
- biolib/_internal/errors.py +5 -0
- biolib/_internal/file_utils.py +125 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +159 -0
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/lfs/cache.py +51 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +488 -0
- biolib/_internal/runtime.py +22 -0
- biolib/_internal/string_utils.py +13 -0
- biolib/_internal/templates/__init__.py +1 -0
- biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
- biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
- biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
- biolib/_internal/templates/gitignore_template/.gitignore +10 -0
- biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
- biolib/_internal/templates/gui_template/App.tsx +53 -0
- biolib/_internal/templates/gui_template/Dockerfile +27 -0
- biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
- biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
- biolib/_internal/templates/gui_template/index.css +5 -0
- biolib/_internal/templates/gui_template/index.html +13 -0
- biolib/_internal/templates/gui_template/index.tsx +10 -0
- biolib/_internal/templates/gui_template/package.json +27 -0
- biolib/_internal/templates/gui_template/tsconfig.json +24 -0
- biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
- biolib/_internal/templates/gui_template/vite.config.mts +10 -0
- biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
- biolib/_internal/templates/init_template/Dockerfile +14 -0
- biolib/_internal/templates/init_template/requirements.txt +1 -0
- biolib/_internal/templates/init_template/run.py +12 -0
- biolib/_internal/templates/init_template/run.sh +4 -0
- biolib/_internal/templates/templates.py +25 -0
- biolib/_internal/tree_utils.py +106 -0
- biolib/_internal/utils/__init__.py +65 -0
- biolib/_internal/utils/auth.py +46 -0
- biolib/_internal/utils/job_url.py +33 -0
- biolib/_internal/utils/multinode.py +263 -0
- biolib/_runtime/runtime.py +157 -0
- biolib/_session/session.py +44 -0
- biolib/_shared/__init__.py +0 -0
- biolib/_shared/types/__init__.py +74 -0
- biolib/_shared/types/account.py +12 -0
- biolib/_shared/types/account_member.py +8 -0
- biolib/_shared/types/app.py +9 -0
- biolib/_shared/types/data_record.py +40 -0
- biolib/_shared/types/experiment.py +32 -0
- biolib/_shared/types/file_node.py +17 -0
- biolib/_shared/types/push.py +6 -0
- biolib/_shared/types/resource.py +37 -0
- biolib/_shared/types/resource_deploy_key.py +11 -0
- biolib/_shared/types/resource_permission.py +14 -0
- biolib/_shared/types/resource_version.py +19 -0
- biolib/_shared/types/result.py +14 -0
- biolib/_shared/types/typing.py +10 -0
- biolib/_shared/types/user.py +19 -0
- biolib/_shared/utils/__init__.py +7 -0
- biolib/_shared/utils/resource_uri.py +75 -0
- biolib/api/__init__.py +6 -0
- biolib/api/client.py +168 -0
- biolib/app/app.py +252 -49
- biolib/app/search_apps.py +45 -0
- biolib/biolib_api_client/api_client.py +126 -31
- biolib/biolib_api_client/app_types.py +24 -4
- biolib/biolib_api_client/auth.py +31 -8
- biolib/biolib_api_client/biolib_app_api.py +147 -52
- biolib/biolib_api_client/biolib_job_api.py +161 -141
- biolib/biolib_api_client/job_types.py +21 -5
- biolib/biolib_api_client/lfs_types.py +7 -23
- biolib/biolib_api_client/user_state.py +56 -0
- biolib/biolib_binary_format/__init__.py +1 -4
- biolib/biolib_binary_format/file_in_container.py +105 -0
- biolib/biolib_binary_format/module_input.py +24 -7
- biolib/biolib_binary_format/module_output_v2.py +149 -0
- biolib/biolib_binary_format/remote_endpoints.py +34 -0
- biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
- biolib/biolib_binary_format/saved_job.py +3 -2
- biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
- biolib/biolib_binary_format/system_status_update.py +3 -2
- biolib/biolib_binary_format/utils.py +175 -0
- biolib/biolib_docker_client/__init__.py +11 -2
- biolib/biolib_errors.py +36 -0
- biolib/biolib_logging.py +27 -10
- biolib/cli/__init__.py +38 -0
- biolib/cli/auth.py +46 -0
- biolib/cli/data_record.py +164 -0
- biolib/cli/index.py +32 -0
- biolib/cli/init.py +421 -0
- biolib/cli/lfs.py +101 -0
- biolib/cli/push.py +50 -0
- biolib/cli/run.py +63 -0
- biolib/cli/runtime.py +14 -0
- biolib/cli/sdk.py +16 -0
- biolib/cli/start.py +56 -0
- biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
- biolib/compute_node/job_worker/cache_state.py +66 -88
- biolib/compute_node/job_worker/cache_types.py +1 -6
- biolib/compute_node/job_worker/docker_image_cache.py +112 -37
- biolib/compute_node/job_worker/executors/__init__.py +0 -3
- biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
- biolib/compute_node/job_worker/executors/docker_types.py +9 -1
- biolib/compute_node/job_worker/executors/types.py +19 -9
- biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
- biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
- biolib/compute_node/job_worker/job_storage.py +108 -0
- biolib/compute_node/job_worker/job_worker.py +397 -212
- biolib/compute_node/job_worker/large_file_system.py +87 -38
- biolib/compute_node/job_worker/network_alloc.py +99 -0
- biolib/compute_node/job_worker/network_buffer.py +240 -0
- biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
- biolib/compute_node/job_worker/utils.py +9 -24
- biolib/compute_node/remote_host_proxy.py +400 -98
- biolib/compute_node/utils.py +31 -9
- biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
- biolib/compute_node/webserver/proxy_utils.py +28 -0
- biolib/compute_node/webserver/webserver.py +130 -44
- biolib/compute_node/webserver/webserver_types.py +2 -6
- biolib/compute_node/webserver/webserver_utils.py +77 -12
- biolib/compute_node/webserver/worker_thread.py +183 -42
- biolib/experiments/__init__.py +0 -0
- biolib/experiments/experiment.py +356 -0
- biolib/jobs/__init__.py +1 -0
- biolib/jobs/job.py +741 -0
- biolib/jobs/job_result.py +185 -0
- biolib/jobs/types.py +50 -0
- biolib/py.typed +0 -0
- biolib/runtime/__init__.py +14 -0
- biolib/sdk/__init__.py +91 -0
- biolib/tables.py +34 -0
- biolib/typing_utils.py +2 -7
- biolib/user/__init__.py +1 -0
- biolib/user/sign_in.py +54 -0
- biolib/utils/__init__.py +162 -0
- biolib/utils/cache_state.py +94 -0
- biolib/utils/multipart_uploader.py +194 -0
- biolib/utils/seq_util.py +150 -0
- biolib/utils/zip/remote_zip.py +640 -0
- pybiolib-1.2.1890.dist-info/METADATA +41 -0
- pybiolib-1.2.1890.dist-info/RECORD +177 -0
- {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
- pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
- README.md +0 -17
- biolib/app/app_result.py +0 -68
- biolib/app/utils.py +0 -62
- biolib/biolib-js/0-biolib.worker.js +0 -1
- biolib/biolib-js/1-biolib.worker.js +0 -1
- biolib/biolib-js/2-biolib.worker.js +0 -1
- biolib/biolib-js/3-biolib.worker.js +0 -1
- biolib/biolib-js/4-biolib.worker.js +0 -1
- biolib/biolib-js/5-biolib.worker.js +0 -1
- biolib/biolib-js/6-biolib.worker.js +0 -1
- biolib/biolib-js/index.html +0 -10
- biolib/biolib-js/main-biolib.js +0 -1
- biolib/biolib_api_client/biolib_account_api.py +0 -21
- biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
- biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
- biolib/biolib_binary_format/module_output.py +0 -58
- biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
- biolib/biolib_push.py +0 -114
- biolib/cli.py +0 -203
- biolib/cli_utils.py +0 -273
- biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
- biolib/compute_node/enclave/__init__.py +0 -2
- biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
- biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
- biolib/compute_node/job_worker/executors/base_executor.py +0 -18
- biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
- biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
- biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
- biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
- biolib/lfs.py +0 -196
- biolib/pyppeteer/.circleci/config.yml +0 -100
- biolib/pyppeteer/.coveragerc +0 -3
- biolib/pyppeteer/.gitignore +0 -89
- biolib/pyppeteer/.pre-commit-config.yaml +0 -28
- biolib/pyppeteer/CHANGES.md +0 -253
- biolib/pyppeteer/CONTRIBUTING.md +0 -26
- biolib/pyppeteer/LICENSE +0 -12
- biolib/pyppeteer/README.md +0 -137
- biolib/pyppeteer/docs/Makefile +0 -177
- biolib/pyppeteer/docs/_static/custom.css +0 -28
- biolib/pyppeteer/docs/_templates/layout.html +0 -10
- biolib/pyppeteer/docs/changes.md +0 -1
- biolib/pyppeteer/docs/conf.py +0 -299
- biolib/pyppeteer/docs/index.md +0 -21
- biolib/pyppeteer/docs/make.bat +0 -242
- biolib/pyppeteer/docs/reference.md +0 -211
- biolib/pyppeteer/docs/server.py +0 -60
- biolib/pyppeteer/poetry.lock +0 -1699
- biolib/pyppeteer/pyppeteer/__init__.py +0 -135
- biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
- biolib/pyppeteer/pyppeteer/browser.py +0 -401
- biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
- biolib/pyppeteer/pyppeteer/command.py +0 -22
- biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
- biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
- biolib/pyppeteer/pyppeteer/coverage.py +0 -346
- biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
- biolib/pyppeteer/pyppeteer/dialog.py +0 -79
- biolib/pyppeteer/pyppeteer/domworld.py +0 -597
- biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
- biolib/pyppeteer/pyppeteer/errors.py +0 -48
- biolib/pyppeteer/pyppeteer/events.py +0 -63
- biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
- biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
- biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
- biolib/pyppeteer/pyppeteer/helpers.py +0 -245
- biolib/pyppeteer/pyppeteer/input.py +0 -371
- biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
- biolib/pyppeteer/pyppeteer/launcher.py +0 -683
- biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
- biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
- biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
- biolib/pyppeteer/pyppeteer/multimap.py +0 -82
- biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
- biolib/pyppeteer/pyppeteer/options.py +0 -8
- biolib/pyppeteer/pyppeteer/page.py +0 -1728
- biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
- biolib/pyppeteer/pyppeteer/target.py +0 -147
- biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
- biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
- biolib/pyppeteer/pyppeteer/tracing.py +0 -93
- biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
- biolib/pyppeteer/pyppeteer/util.py +0 -18
- biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
- biolib/pyppeteer/pyppeteer/worker.py +0 -101
- biolib/pyppeteer/pyproject.toml +0 -97
- biolib/pyppeteer/spell.txt +0 -137
- biolib/pyppeteer/tox.ini +0 -72
- biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
- biolib/start_cli.py +0 -7
- biolib/utils.py +0 -47
- biolib/validators/validate_app_version.py +0 -183
- biolib/validators/validate_argument.py +0 -134
- biolib/validators/validate_module.py +0 -323
- biolib/validators/validate_zip_file.py +0 -40
- biolib/validators/validator_utils.py +0 -103
- pybiolib-0.2.951.dist-info/LICENSE +0 -21
- pybiolib-0.2.951.dist-info/METADATA +0 -61
- pybiolib-0.2.951.dist-info/RECORD +0 -153
- pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
- /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import base64
|
|
2
|
-
|
|
3
|
-
# necessary for making RSA import work TODO: figure out if this can be removed
|
|
4
|
-
from Crypto.IO import PEM # pylint: disable=redefined-builtin, unused-import
|
|
5
|
-
|
|
6
|
-
from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
|
|
7
|
-
from biolib.biolib_binary_format import RsaEncryptedAesPackage, AesEncryptedPackage
|
|
8
|
-
from biolib.biolib_logging import logger
|
|
9
|
-
from biolib.compute_node.job_worker.executors.types import RemoteExecuteOptions
|
|
10
|
-
from biolib.compute_node.job_worker.executors.remote.nitro_enclave_utils import NitroEnclaveUtils
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class RemoteExecutor:
|
|
14
|
-
|
|
15
|
-
@staticmethod
|
|
16
|
-
def execute_job(options: RemoteExecuteOptions, module_input_serialized: bytes) -> bytes:
|
|
17
|
-
job_id = options['job']['public_id']
|
|
18
|
-
cloud_job = BiolibJobApi.create_cloud_job(module_name='main', job_id=job_id)
|
|
19
|
-
logger.debug(f"Cloud: Job created with id {cloud_job['public_id']}")
|
|
20
|
-
node_url = cloud_job['compute_node_info']['url']
|
|
21
|
-
if 'attestation_document_base64' in cloud_job['compute_node_info']:
|
|
22
|
-
attestation_document_bytes = base64.b64decode(cloud_job['compute_node_info']['attestation_document_base64'])
|
|
23
|
-
expected_pcrs_and_aws_cert = BiolibJobApi.get_enclave_json(options['biolib_base_url'])
|
|
24
|
-
|
|
25
|
-
rsa_public_key_der = NitroEnclaveUtils().attest_enclave_and_get_rsa_public_key(
|
|
26
|
-
expected_pcrs_and_aws_cert,
|
|
27
|
-
attestation_document_bytes,
|
|
28
|
-
)
|
|
29
|
-
serialized_data_to_send, aes_key_buffer = RsaEncryptedAesPackage().create(
|
|
30
|
-
rsa_public_key_der,
|
|
31
|
-
module_input_serialized,
|
|
32
|
-
)
|
|
33
|
-
else:
|
|
34
|
-
serialized_data_to_send = module_input_serialized
|
|
35
|
-
|
|
36
|
-
BiolibJobApi.start_cloud_job(job_id, serialized_data_to_send, node_url)
|
|
37
|
-
BiolibJobApi.await_compute_node_status(
|
|
38
|
-
compute_type='Cloud',
|
|
39
|
-
job_id=job_id,
|
|
40
|
-
node_url=node_url,
|
|
41
|
-
retry_interval_seconds=1.5,
|
|
42
|
-
retry_limit_minutes=30,
|
|
43
|
-
status_to_await='Result Ready',
|
|
44
|
-
)
|
|
45
|
-
compute_result = BiolibJobApi.get_cloud_result(job_id, node_url)
|
|
46
|
-
|
|
47
|
-
if 'attestation_document_base64' in cloud_job['compute_node_info']:
|
|
48
|
-
serialized_module_output: bytes = AesEncryptedPackage(compute_result).decrypt(aes_key_buffer)
|
|
49
|
-
else:
|
|
50
|
-
serialized_module_output = compute_result
|
|
51
|
-
return serialized_module_output
|
biolib/lfs.py
DELETED
|
@@ -1,196 +0,0 @@
|
|
|
1
|
-
import io
|
|
2
|
-
import json
|
|
3
|
-
import multiprocessing
|
|
4
|
-
import os
|
|
5
|
-
import zipfile as zf
|
|
6
|
-
from itertools import repeat
|
|
7
|
-
|
|
8
|
-
import requests
|
|
9
|
-
|
|
10
|
-
from biolib.app import BioLibApp
|
|
11
|
-
from biolib.biolib_api_client.biolib_account_api import BiolibAccountApi
|
|
12
|
-
from biolib.biolib_api_client.biolib_large_file_system_api import BiolibLargeFileSystemApi
|
|
13
|
-
from biolib.biolib_api_client import BiolibApiClient
|
|
14
|
-
from biolib.biolib_api_client.lfs_types import LfsUploadPartMetadata, LargeFileSystemVersionMetadata
|
|
15
|
-
from biolib.biolib_logging import logger
|
|
16
|
-
from biolib.biolib_errors import BioLibError
|
|
17
|
-
from biolib.typing_utils import List, Tuple, Iterator
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def upload_chunk(input_tuple) -> Tuple[LfsUploadPartMetadata, int]:
|
|
21
|
-
logger.debug('Starting worker...')
|
|
22
|
-
chunk_tuple, lfs_version = input_tuple
|
|
23
|
-
part_number, chunk = chunk_tuple
|
|
24
|
-
logger.debug(f'Getting upload URL for part {part_number}...')
|
|
25
|
-
upload_url_response = BiolibLargeFileSystemApi.get_upload_url(
|
|
26
|
-
resource_version_uuid=lfs_version['uuid'],
|
|
27
|
-
part_number=part_number,
|
|
28
|
-
)
|
|
29
|
-
presigned_upload_url = upload_url_response['presigned_upload_url']
|
|
30
|
-
max_upload_retries = 5
|
|
31
|
-
|
|
32
|
-
for retry_count in range(max_upload_retries):
|
|
33
|
-
logger.info(f'Uploading part {part_number}...')
|
|
34
|
-
try:
|
|
35
|
-
response = requests.put(data=chunk, url=presigned_upload_url, timeout=300) # timeout after 5 min
|
|
36
|
-
except Exception as error: # pylint: disable=broad-except
|
|
37
|
-
if retry_count >= max_upload_retries:
|
|
38
|
-
raise BioLibError(f"Max retries hit, when uploading part {part_number}. Exiting...") from error
|
|
39
|
-
else:
|
|
40
|
-
logger.warning(f"Encountered error when uploading part {part_number}. Retrying...")
|
|
41
|
-
continue
|
|
42
|
-
|
|
43
|
-
if not response.ok:
|
|
44
|
-
raise BioLibError(response.content)
|
|
45
|
-
|
|
46
|
-
break # Break if no exception thrown and response is OK
|
|
47
|
-
|
|
48
|
-
return LfsUploadPartMetadata(PartNumber=part_number, ETag=response.headers['ETag']), len(chunk)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def get_lfs_info_from_uri(lfs_uri):
|
|
52
|
-
lfs_uri_parts = lfs_uri.split('/')
|
|
53
|
-
lfs_uri_parts = [uri_part for uri_part in lfs_uri_parts if '@' not in uri_part] # Remove hostname
|
|
54
|
-
team_account_handle = lfs_uri_parts[0]
|
|
55
|
-
lfs_name = lfs_uri_parts[1]
|
|
56
|
-
account = BiolibAccountApi.fetch_by_handle(team_account_handle)
|
|
57
|
-
return account, lfs_name
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def get_files_and_size_of_cwd() -> Tuple[List[str], int]:
|
|
61
|
-
data_size = 0
|
|
62
|
-
file_list: List[str] = []
|
|
63
|
-
cwd = os.getcwd()
|
|
64
|
-
|
|
65
|
-
for path, _, files in os.walk(cwd):
|
|
66
|
-
for file in files:
|
|
67
|
-
file_path = os.path.join(path, file)
|
|
68
|
-
if os.path.islink(file_path):
|
|
69
|
-
continue # skip symlinks
|
|
70
|
-
|
|
71
|
-
file_path_without_cwd = file_path[len(cwd) + 1:] # +1 to remove starting slash
|
|
72
|
-
file_list.append(file_path_without_cwd)
|
|
73
|
-
data_size += os.path.getsize(file_path)
|
|
74
|
-
|
|
75
|
-
return file_list, data_size
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def get_iterable_zip_stream(files, chunk_size: int) -> Iterator[bytes]:
|
|
79
|
-
class ChunkedIOBuffer(io.RawIOBase):
|
|
80
|
-
def __init__(self, chunk_size: int):
|
|
81
|
-
super().__init__()
|
|
82
|
-
self.chunk_size = chunk_size
|
|
83
|
-
self.tmp_data = bytearray()
|
|
84
|
-
|
|
85
|
-
def get_buffer_size(self):
|
|
86
|
-
return len(self.tmp_data)
|
|
87
|
-
|
|
88
|
-
def read_chunk(self):
|
|
89
|
-
chunk = bytes(self.tmp_data[:self.chunk_size])
|
|
90
|
-
self.tmp_data = self.tmp_data[self.chunk_size:]
|
|
91
|
-
return chunk
|
|
92
|
-
|
|
93
|
-
def write(self, data):
|
|
94
|
-
data_length = len(data)
|
|
95
|
-
self.tmp_data += data
|
|
96
|
-
return data_length
|
|
97
|
-
|
|
98
|
-
# create chunked buffer to hold data temporarily
|
|
99
|
-
io_buffer = ChunkedIOBuffer(chunk_size)
|
|
100
|
-
|
|
101
|
-
# create zip writer that will write to the io buffer
|
|
102
|
-
zip_writer = zf.ZipFile(io_buffer, mode='w') # type: ignore
|
|
103
|
-
|
|
104
|
-
for file_path in files:
|
|
105
|
-
# generate zip info and prepare zip pointer for writing
|
|
106
|
-
z_info = zf.ZipInfo.from_file(file_path)
|
|
107
|
-
zip_pointer = zip_writer.open(z_info, mode='w')
|
|
108
|
-
|
|
109
|
-
# read file chunk by chunk
|
|
110
|
-
with open(file_path, 'br') as file_pointer:
|
|
111
|
-
while True:
|
|
112
|
-
chunk = file_pointer.read(chunk_size)
|
|
113
|
-
if len(chunk) == 0:
|
|
114
|
-
break
|
|
115
|
-
# write the chunk to the zip
|
|
116
|
-
zip_pointer.write(chunk)
|
|
117
|
-
# if writing the chunk caused us to go over chunk_size, flush it
|
|
118
|
-
if io_buffer.get_buffer_size() > chunk_size:
|
|
119
|
-
yield io_buffer.read_chunk()
|
|
120
|
-
zip_pointer.close()
|
|
121
|
-
|
|
122
|
-
# flush any remaining data in the stream (e.g. zip file meta data)
|
|
123
|
-
zip_writer.close()
|
|
124
|
-
while True:
|
|
125
|
-
chunk = io_buffer.read_chunk()
|
|
126
|
-
if len(chunk) == 0:
|
|
127
|
-
break
|
|
128
|
-
yield chunk
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def create_large_file_system(lfs_uri: str):
|
|
132
|
-
BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Large File System')
|
|
133
|
-
lfs_account, lfs_name = get_lfs_info_from_uri(lfs_uri)
|
|
134
|
-
lfs_resource = BiolibLargeFileSystemApi.create(account_uuid=lfs_account['public_id'], name=lfs_name)
|
|
135
|
-
logger.info(f"Successfully created new Large File System '{lfs_resource['uri']}'")
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
def push_large_file_system(lfs_uri: str, input_dir: str) -> None:
|
|
139
|
-
BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Large File System')
|
|
140
|
-
|
|
141
|
-
if not os.path.isdir(input_dir):
|
|
142
|
-
raise BioLibError(f'Could not find folder at {input_dir}')
|
|
143
|
-
|
|
144
|
-
if os.path.realpath(input_dir) == '/':
|
|
145
|
-
raise BioLibError('Pushing your root directory is not possible')
|
|
146
|
-
|
|
147
|
-
lfs_resource = BioLibApp(lfs_uri)
|
|
148
|
-
|
|
149
|
-
original_working_dir = os.getcwd()
|
|
150
|
-
os.chdir(input_dir)
|
|
151
|
-
files_to_zip, data_size = get_files_and_size_of_cwd()
|
|
152
|
-
data_size_in_mb = round(data_size / 10 ** 6)
|
|
153
|
-
print(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
|
|
154
|
-
|
|
155
|
-
lfs_resource_version = BiolibLargeFileSystemApi.create_version(resource_uuid=lfs_resource.uuid)
|
|
156
|
-
bytes_written = 0
|
|
157
|
-
parts: List[LfsUploadPartMetadata] = []
|
|
158
|
-
process_pool = multiprocessing.Pool(
|
|
159
|
-
# use 8 cores, unless less is available
|
|
160
|
-
processes=min(8, multiprocessing.cpu_count() - 1),
|
|
161
|
-
maxtasksperchild=10,
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
chunk_iterator = enumerate(get_iterable_zip_stream(files=files_to_zip, chunk_size=50_000_000), 1) # 50 MB
|
|
165
|
-
full_iterator = zip(chunk_iterator, repeat(lfs_resource_version))
|
|
166
|
-
|
|
167
|
-
for part_metadata, chunk_length in process_pool.imap(upload_chunk, full_iterator):
|
|
168
|
-
parts.append(part_metadata)
|
|
169
|
-
|
|
170
|
-
# calculate approximate progress
|
|
171
|
-
# note: it's approximate because data_size doesn't include the size of zip metadata
|
|
172
|
-
bytes_written += chunk_length
|
|
173
|
-
approx_progress_percent = min(bytes_written / (data_size + 1) * 100, 100)
|
|
174
|
-
print(f'Wrote {chunk_length} bytes, the approximate progress is {round(approx_progress_percent, 2)}%')
|
|
175
|
-
|
|
176
|
-
BiolibLargeFileSystemApi.complete_upload(lfs_resource_version['uuid'], parts, size_bytes=data_size)
|
|
177
|
-
logger.info(f"Successfully pushed a new LFS version '{lfs_resource_version['uri']}'")
|
|
178
|
-
os.chdir(original_working_dir)
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
def describe_large_file_system(lfs_uri: str, output_as_json: bool = False) -> None:
|
|
182
|
-
BiolibApiClient.assert_is_signed_in(authenticated_action_description='describe a Large File System')
|
|
183
|
-
lfs_resource = BioLibApp(lfs_uri)
|
|
184
|
-
lfs_version = BiolibLargeFileSystemApi.fetch_version(lfs_version_uuid=lfs_resource.version['public_id'])
|
|
185
|
-
lfs_file_info = BiolibLargeFileSystemApi.fetch_file_list(lfs_version['presigned_download_url'])
|
|
186
|
-
lfs_version_metadata = LargeFileSystemVersionMetadata(files=lfs_file_info['files'], **lfs_version) # type: ignore
|
|
187
|
-
|
|
188
|
-
if output_as_json:
|
|
189
|
-
print(json.dumps(lfs_version_metadata, indent=4))
|
|
190
|
-
else:
|
|
191
|
-
print(f"Large File System {lfs_version_metadata['uri']}\ntotal {lfs_version_metadata['size_bytes']} bytes\n")
|
|
192
|
-
print('size bytes path')
|
|
193
|
-
for file in lfs_version_metadata['files']:
|
|
194
|
-
size_string = str(file['size_bytes'])
|
|
195
|
-
leading_space_string = ' ' * (10 - len(size_string))
|
|
196
|
-
print(f"{leading_space_string}{size_string} {file['path']}")
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
version: 2.1
|
|
2
|
-
|
|
3
|
-
orbs:
|
|
4
|
-
codecov: codecov/codecov@1.0.5
|
|
5
|
-
|
|
6
|
-
workflows:
|
|
7
|
-
main:
|
|
8
|
-
jobs:
|
|
9
|
-
- lint
|
|
10
|
-
- mypy
|
|
11
|
-
- test_36
|
|
12
|
-
- test_37
|
|
13
|
-
- test_38
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
jobs:
|
|
17
|
-
test_36:
|
|
18
|
-
docker:
|
|
19
|
-
- image: circleci/python:3.6
|
|
20
|
-
environment:
|
|
21
|
-
TOXENV: py36
|
|
22
|
-
PYTEST_ADDOPTS: -n 8 --junitxml=/tmp/tests/pytest/results.xml --cov=./
|
|
23
|
-
steps: &step_template
|
|
24
|
-
- checkout
|
|
25
|
-
- restore_cache:
|
|
26
|
-
keys:
|
|
27
|
-
- poetry_deps_{{checksum "poetry.lock"}}
|
|
28
|
-
- run:
|
|
29
|
-
name: Install headless Chrome dependancies
|
|
30
|
-
# chrome headless libs, see
|
|
31
|
-
# https://github.com/puppeteer/puppeteer/blob/master/docs/troubleshooting.md#chrome-headless-doesnt-launch-on-unix
|
|
32
|
-
command: |
|
|
33
|
-
sudo apt install -yq \
|
|
34
|
-
gconf-service libasound2 libatk1.0-0 libatk-bridge2.0-0 libc6 libcairo2 libcups2 libdbus-1-3 \
|
|
35
|
-
libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 \
|
|
36
|
-
libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 \
|
|
37
|
-
libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates \
|
|
38
|
-
fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget
|
|
39
|
-
- run:
|
|
40
|
-
name: Install tox
|
|
41
|
-
command: pip install tox
|
|
42
|
-
- run:
|
|
43
|
-
name: Run tests
|
|
44
|
-
command: tox
|
|
45
|
-
- save_cache:
|
|
46
|
-
key: poetry_deps_{{checksum "poetry.lock"}}
|
|
47
|
-
paths: ~/.cache/pypoetry/
|
|
48
|
-
- store_test_results:
|
|
49
|
-
path: /tmp/tests/
|
|
50
|
-
# this step will simply fail for other jobs
|
|
51
|
-
- codecov/upload:
|
|
52
|
-
file: ./pytest-cov.pth
|
|
53
|
-
|
|
54
|
-
test_37:
|
|
55
|
-
docker:
|
|
56
|
-
- image: circleci/python:3.7
|
|
57
|
-
environment:
|
|
58
|
-
TOXENV: py37
|
|
59
|
-
PYTEST_ADDOPTS: &pytest_default -n 8 --junitxml=/tmp/tests/pytest/results.xml
|
|
60
|
-
steps: *step_template
|
|
61
|
-
|
|
62
|
-
test_38:
|
|
63
|
-
docker:
|
|
64
|
-
- image: circleci/python:3.8
|
|
65
|
-
environment:
|
|
66
|
-
TOXENV: py38
|
|
67
|
-
PYTEST_ADDOPTS: *pytest_default
|
|
68
|
-
steps: *step_template
|
|
69
|
-
|
|
70
|
-
mypy:
|
|
71
|
-
docker:
|
|
72
|
-
- image: circleci/python:3.8
|
|
73
|
-
environment:
|
|
74
|
-
TOXENV: mypy
|
|
75
|
-
MYPY_JUNIT_XML_PATH: /tmp/tests/mypy/results.xml
|
|
76
|
-
steps:
|
|
77
|
-
- checkout
|
|
78
|
-
- run:
|
|
79
|
-
name: Install tox
|
|
80
|
-
command: pip install tox
|
|
81
|
-
- run:
|
|
82
|
-
name: Check typing
|
|
83
|
-
command: tox
|
|
84
|
-
- store_test_results:
|
|
85
|
-
path: /tmp/tests
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
lint:
|
|
89
|
-
docker:
|
|
90
|
-
- image: circleci/python:3.6
|
|
91
|
-
environment:
|
|
92
|
-
TOXENV: flake8
|
|
93
|
-
steps:
|
|
94
|
-
- checkout
|
|
95
|
-
- run:
|
|
96
|
-
name: Install tox
|
|
97
|
-
command: pip install tox
|
|
98
|
-
- run:
|
|
99
|
-
name: Check code style
|
|
100
|
-
command: tox
|
biolib/pyppeteer/.coveragerc
DELETED
biolib/pyppeteer/.gitignore
DELETED
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
# Byte-compiled / optimized / DLL files
|
|
2
|
-
__pycache__/
|
|
3
|
-
*.py[cod]
|
|
4
|
-
*$py.class
|
|
5
|
-
|
|
6
|
-
# C extensions
|
|
7
|
-
*.so
|
|
8
|
-
|
|
9
|
-
# Distribution / packaging
|
|
10
|
-
.Python
|
|
11
|
-
build/
|
|
12
|
-
develop-eggs/
|
|
13
|
-
dist/
|
|
14
|
-
downloads/
|
|
15
|
-
eggs/
|
|
16
|
-
.eggs/
|
|
17
|
-
lib/
|
|
18
|
-
lib64/
|
|
19
|
-
parts/
|
|
20
|
-
sdist/
|
|
21
|
-
var/
|
|
22
|
-
*.egg-info/
|
|
23
|
-
.installed.cfg
|
|
24
|
-
*.egg
|
|
25
|
-
|
|
26
|
-
# Virtualenv
|
|
27
|
-
env/
|
|
28
|
-
venv/
|
|
29
|
-
bin/
|
|
30
|
-
include/
|
|
31
|
-
lib/
|
|
32
|
-
lib64
|
|
33
|
-
lib64/
|
|
34
|
-
man/
|
|
35
|
-
pyvenv.cfg
|
|
36
|
-
|
|
37
|
-
# PyInstaller
|
|
38
|
-
# Usually these files are written by a python script from a template
|
|
39
|
-
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
40
|
-
*.manifest
|
|
41
|
-
*.spec
|
|
42
|
-
|
|
43
|
-
# Installer logs
|
|
44
|
-
pip-log.txt
|
|
45
|
-
pip-delete-this-directory.txt
|
|
46
|
-
|
|
47
|
-
# Unit test / coverage reports
|
|
48
|
-
htmlcov/
|
|
49
|
-
.tox/
|
|
50
|
-
.coverage
|
|
51
|
-
.coverage.*
|
|
52
|
-
.cache
|
|
53
|
-
.doit.db.*
|
|
54
|
-
.mypy_cache
|
|
55
|
-
nosetests.xml
|
|
56
|
-
coverage.xml
|
|
57
|
-
*,cover
|
|
58
|
-
.hypothesis/
|
|
59
|
-
.pytest_cache/
|
|
60
|
-
|
|
61
|
-
# Translations
|
|
62
|
-
*.mo
|
|
63
|
-
*.pot
|
|
64
|
-
|
|
65
|
-
# Django stuff:
|
|
66
|
-
*.log
|
|
67
|
-
|
|
68
|
-
# Sphinx documentation
|
|
69
|
-
docs/_build/
|
|
70
|
-
|
|
71
|
-
# PyBuilder
|
|
72
|
-
target/
|
|
73
|
-
|
|
74
|
-
# pyenv python configuration file
|
|
75
|
-
.python-version
|
|
76
|
-
|
|
77
|
-
# pycharm file
|
|
78
|
-
.idea/
|
|
79
|
-
|
|
80
|
-
###### direnv ######
|
|
81
|
-
.direnv
|
|
82
|
-
.envrc
|
|
83
|
-
|
|
84
|
-
###### zsh-autoenv ######
|
|
85
|
-
.autoenv.zsh
|
|
86
|
-
.autoenv_leave.zsh
|
|
87
|
-
|
|
88
|
-
# test files
|
|
89
|
-
trace.json
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
repos:
|
|
2
|
-
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
-
rev: v2.4.0
|
|
4
|
-
hooks:
|
|
5
|
-
- id: trailing-whitespace
|
|
6
|
-
- id: end-of-file-fixer
|
|
7
|
-
- id: check-yaml
|
|
8
|
-
- id: check-toml
|
|
9
|
-
- id: check-builtin-literals
|
|
10
|
-
- id: debug-statements
|
|
11
|
-
- id: check-added-large-files
|
|
12
|
-
- repo: https://github.com/asottile/seed-isort-config
|
|
13
|
-
rev: v2.1.1
|
|
14
|
-
hooks:
|
|
15
|
-
- id: seed-isort-config
|
|
16
|
-
# if we don't specify these the seeder will intermittently include
|
|
17
|
-
# these as 'known third parties' which messes with our diffs
|
|
18
|
-
args: ['--application-directories', './pyppeteer:./tests']
|
|
19
|
-
- repo: https://github.com/timothycrosley/isort
|
|
20
|
-
rev: 4.3.21
|
|
21
|
-
hooks:
|
|
22
|
-
- id: isort
|
|
23
|
-
additional_dependencies: [toml]
|
|
24
|
-
- repo: https://github.com/psf/black
|
|
25
|
-
rev: stable
|
|
26
|
-
hooks:
|
|
27
|
-
- id: black
|
|
28
|
-
language_version: python3
|