pybiolib 1.2.1379__py3-none-any.whl → 1.2.1389__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pybiolib might be problematic. Click here for more details.
- biolib/biolib_binary_format/remote_stream_seeker.py +44 -5
- biolib/jobs/job_result.py +1 -1
- {pybiolib-1.2.1379.dist-info → pybiolib-1.2.1389.dist-info}/METADATA +1 -1
- {pybiolib-1.2.1379.dist-info → pybiolib-1.2.1389.dist-info}/RECORD +7 -7
- {pybiolib-1.2.1379.dist-info → pybiolib-1.2.1389.dist-info}/WHEEL +0 -0
- {pybiolib-1.2.1379.dist-info → pybiolib-1.2.1389.dist-info}/entry_points.txt +0 -0
- {pybiolib-1.2.1379.dist-info → pybiolib-1.2.1389.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
1
3
|
from biolib.biolib_binary_format.utils import IndexableBuffer
|
|
4
|
+
from biolib.biolib_logging import logger
|
|
2
5
|
from biolib.typing_utils import Iterable
|
|
3
6
|
|
|
4
7
|
|
|
@@ -8,11 +11,14 @@ class StreamSeeker:
|
|
|
8
11
|
upstream_buffer: IndexableBuffer,
|
|
9
12
|
files_data_start: int,
|
|
10
13
|
files_data_end: int,
|
|
11
|
-
|
|
14
|
+
max_chunk_size: int,
|
|
12
15
|
):
|
|
13
16
|
self._upstream_buffer = upstream_buffer
|
|
14
17
|
self._files_data_end = files_data_end
|
|
15
|
-
self._download_chunk_size_in_bytes =
|
|
18
|
+
self._download_chunk_size_in_bytes = 100_000
|
|
19
|
+
self._min_chunk_size = min(100_000, max_chunk_size)
|
|
20
|
+
self._max_chunk_size = max_chunk_size
|
|
21
|
+
self._target_download_time_seconds = 1.0
|
|
16
22
|
|
|
17
23
|
self._buffer_start = files_data_start
|
|
18
24
|
self._buffer = bytearray()
|
|
@@ -34,12 +40,45 @@ class StreamSeeker:
|
|
|
34
40
|
# Only fetch if there is still data left upstream
|
|
35
41
|
if self._download_chunk_size_in_bytes > len(self._buffer):
|
|
36
42
|
# Only fetch if size of buffer is below chunk size
|
|
37
|
-
|
|
43
|
+
fetch_size = min(byte_count_left_in_stream, self._download_chunk_size_in_bytes)
|
|
44
|
+
|
|
45
|
+
start_time = time.monotonic()
|
|
46
|
+
fetched_data = self._upstream_buffer.get_data(
|
|
38
47
|
start=start_of_fetch,
|
|
39
|
-
length=
|
|
40
|
-
)
|
|
48
|
+
length=fetch_size,
|
|
49
|
+
)
|
|
50
|
+
download_time = time.monotonic() - start_time
|
|
51
|
+
|
|
52
|
+
self._buffer.extend(fetched_data)
|
|
53
|
+
|
|
54
|
+
if download_time > 0:
|
|
55
|
+
self._adjust_chunk_size(download_time, fetch_size)
|
|
41
56
|
|
|
42
57
|
bytes_to_yield = self._buffer[:file_byte_count_remaining] # Returns empty array if "out of bounds"
|
|
43
58
|
yield bytes_to_yield
|
|
44
59
|
self._buffer = self._buffer[file_byte_count_remaining:] # Returns empty array if "out of bounds"
|
|
45
60
|
self._buffer_start += len(bytes_to_yield)
|
|
61
|
+
|
|
62
|
+
def _adjust_chunk_size(self, download_time: float, _bytes_downloaded: int) -> None:
|
|
63
|
+
new_chunk_size = self._download_chunk_size_in_bytes
|
|
64
|
+
time_ratio = download_time / self._target_download_time_seconds
|
|
65
|
+
|
|
66
|
+
if time_ratio > 1.1:
|
|
67
|
+
adjustment_factor = 1.0 / time_ratio
|
|
68
|
+
adjustment_factor = max(adjustment_factor, 0.5)
|
|
69
|
+
new_chunk_size = int(self._download_chunk_size_in_bytes * adjustment_factor)
|
|
70
|
+
elif time_ratio < 0.9:
|
|
71
|
+
adjustment_factor = 1.0 / time_ratio
|
|
72
|
+
new_chunk_size = int(self._download_chunk_size_in_bytes * adjustment_factor)
|
|
73
|
+
|
|
74
|
+
new_chunk_size = max(
|
|
75
|
+
self._min_chunk_size,
|
|
76
|
+
min(self._max_chunk_size, new_chunk_size)
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if new_chunk_size != self._download_chunk_size_in_bytes:
|
|
80
|
+
logger.debug(
|
|
81
|
+
f"Adjusting chunk size: {self._download_chunk_size_in_bytes} -> {new_chunk_size} bytes "
|
|
82
|
+
f"(download_time={download_time:.2f}s, time_ratio={time_ratio:.2f})"
|
|
83
|
+
)
|
|
84
|
+
self._download_chunk_size_in_bytes = new_chunk_size
|
biolib/jobs/job_result.py
CHANGED
|
@@ -55,7 +55,7 @@ class JobResult:
|
|
|
55
55
|
stream_seeker = StreamSeeker(
|
|
56
56
|
files_data_start=first_file.start,
|
|
57
57
|
files_data_end=last_file.start + last_file.length,
|
|
58
|
-
|
|
58
|
+
max_chunk_size=min(total_files_data_to_download_in_bytes, 10_000_000),
|
|
59
59
|
upstream_buffer=module_output.buffer,
|
|
60
60
|
)
|
|
61
61
|
|
|
@@ -88,7 +88,7 @@ biolib/biolib_binary_format/file_in_container.py,sha256=j1eEPRxf_ew8I6G8sDiiZZxn
|
|
|
88
88
|
biolib/biolib_binary_format/module_input.py,sha256=led2QhHeec_ymBPw5uEn3_3vJKI-1T8zrFQGqwEWLMY,2788
|
|
89
89
|
biolib/biolib_binary_format/module_output_v2.py,sha256=J5ZO5gCSeudpE12EVDrjYrNTS2DwgszY-SVXT7Qjuyg,5913
|
|
90
90
|
biolib/biolib_binary_format/remote_endpoints.py,sha256=V48mwOj3eAQAKp-8DjtWUdEKUyC0WKc1pEiKTmtjrJY,1651
|
|
91
|
-
biolib/biolib_binary_format/remote_stream_seeker.py,sha256=
|
|
91
|
+
biolib/biolib_binary_format/remote_stream_seeker.py,sha256=QzCiDiVZMgX6HZTrw-oJsFIHzNd8V1AseaBZP9wqMxU,3608
|
|
92
92
|
biolib/biolib_binary_format/saved_job.py,sha256=nFHVFRNTNcAFGODLSiBntCtMk55QKwreUq6qLX80dI4,1125
|
|
93
93
|
biolib/biolib_binary_format/stdout_and_stderr.py,sha256=WfUUJFFCBrtfXjuWIaRPiWCpuBLxfko68oxoTKhrwx8,1023
|
|
94
94
|
biolib/biolib_binary_format/system_exception.py,sha256=T3iL4_cSHAHim3RSDPS8Xyb1mfteaJBZonSXuRltc28,853
|
|
@@ -146,7 +146,7 @@ biolib/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
|
|
|
146
146
|
biolib/experiments/experiment.py,sha256=ePipnJs6TW84jr9iBfYYRpYuEXUVGIxaD8Z8Jf0FYIA,13909
|
|
147
147
|
biolib/jobs/__init__.py,sha256=aIb2H2DHjQbM2Bs-dysFijhwFcL58Blp0Co0gimED3w,32
|
|
148
148
|
biolib/jobs/job.py,sha256=X7F_jCwAwtPcI1qu-oLamZNzt1IVSIeOPWlax1ztfAM,29674
|
|
149
|
-
biolib/jobs/job_result.py,sha256=
|
|
149
|
+
biolib/jobs/job_result.py,sha256=Yl22Z8dc-8iKeC7Y4q7kgd08Ndajys1W1MsAbvE8wOA,5252
|
|
150
150
|
biolib/jobs/types.py,sha256=rFs6bQWsNI-nb1Hu9QzOW2zFZ8bOVt7ax4UpGVASxVA,1034
|
|
151
151
|
biolib/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
152
152
|
biolib/runtime/__init__.py,sha256=MlRepA11n2H-3plB5rzWyyHK2JmP6PiaP3i6x3vt0mg,506
|
|
@@ -161,8 +161,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
|
|
|
161
161
|
biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
|
|
162
162
|
biolib/utils/seq_util.py,sha256=rImaghQGuIqTVWks6b9P2yKuN34uePUYPUFW_Wyoa4A,6737
|
|
163
163
|
biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
|
|
164
|
-
pybiolib-1.2.
|
|
165
|
-
pybiolib-1.2.
|
|
166
|
-
pybiolib-1.2.
|
|
167
|
-
pybiolib-1.2.
|
|
168
|
-
pybiolib-1.2.
|
|
164
|
+
pybiolib-1.2.1389.dist-info/METADATA,sha256=y8bhxtHgW135kkkPiKCE0WKPSZ4-DaEFS8aE9UAhmn0,1644
|
|
165
|
+
pybiolib-1.2.1389.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
166
|
+
pybiolib-1.2.1389.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
|
|
167
|
+
pybiolib-1.2.1389.dist-info/licenses/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
|
|
168
|
+
pybiolib-1.2.1389.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|