rclone-api 1.2.154__py2.py3-none-any.whl → 1.3.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rclone_api/__init__.py +6 -2
- rclone_api/{logging.py → log.py} +5 -0
- rclone_api/mount_read_chunker.py +1 -1
- rclone_api/s3/chunk_task.py +215 -0
- rclone_api/s3/chunk_types.py +6 -0
- rclone_api/s3/upload_file_multipart.py +1 -1
- {rclone_api-1.2.154.dist-info → rclone_api-1.3.1.dist-info}/METADATA +1 -1
- {rclone_api-1.2.154.dist-info → rclone_api-1.3.1.dist-info}/RECORD +12 -12
- rclone_api/s3/chunk_file.py +0 -146
- {rclone_api-1.2.154.dist-info → rclone_api-1.3.1.dist-info}/LICENSE +0 -0
- {rclone_api-1.2.154.dist-info → rclone_api-1.3.1.dist-info}/WHEEL +0 -0
- {rclone_api-1.2.154.dist-info → rclone_api-1.3.1.dist-info}/entry_points.txt +0 -0
- {rclone_api-1.2.154.dist-info → rclone_api-1.3.1.dist-info}/top_level.txt +0 -0
rclone_api/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# Import logging module to activate default configuration
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
from rclone_api import log
|
|
3
4
|
|
|
4
5
|
from .completed_process import CompletedProcess
|
|
5
6
|
from .config import Config, Parsed, Section
|
|
@@ -10,7 +11,7 @@ from .file import File
|
|
|
10
11
|
from .filelist import FileList
|
|
11
12
|
|
|
12
13
|
# Import the configure_logging function to make it available at package level
|
|
13
|
-
from .
|
|
14
|
+
from .log import configure_logging, setup_default_logging
|
|
14
15
|
from .process import Process
|
|
15
16
|
from .rclone import Rclone, rclone_verbose
|
|
16
17
|
from .remote import Remote
|
|
@@ -42,4 +43,7 @@ __all__ = [
|
|
|
42
43
|
"MultiUploadResult",
|
|
43
44
|
"SizeSuffix",
|
|
44
45
|
"configure_logging",
|
|
46
|
+
"log",
|
|
45
47
|
]
|
|
48
|
+
|
|
49
|
+
setup_default_logging()
|
rclone_api/{logging.py → log.py}
RENAMED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import sys
|
|
3
3
|
|
|
4
|
+
_INITIALISED = False
|
|
5
|
+
|
|
4
6
|
|
|
5
7
|
def setup_default_logging():
|
|
6
8
|
"""Set up default logging configuration if none exists."""
|
|
9
|
+
global _INITIALISED
|
|
10
|
+
if _INITIALISED:
|
|
11
|
+
return
|
|
7
12
|
if not logging.root.handlers:
|
|
8
13
|
logging.basicConfig(
|
|
9
14
|
level=logging.INFO,
|
rclone_api/mount_read_chunker.py
CHANGED
|
@@ -94,7 +94,7 @@ class MultiMountFileChunker:
|
|
|
94
94
|
assert offset >= 0, f"Invalid offset: {offset}"
|
|
95
95
|
assert (
|
|
96
96
|
offset + size <= self.filesize
|
|
97
|
-
), f"Invalid offset + size: {offset} + {size} ({offset+size})
|
|
97
|
+
), f"Invalid offset + size: {offset} + {size} ({offset+size}) > {self.filesize}"
|
|
98
98
|
|
|
99
99
|
try:
|
|
100
100
|
mount = self._acquire_mount()
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from concurrent.futures import Future
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from queue import Queue
|
|
6
|
+
from threading import Event, Lock
|
|
7
|
+
from typing import Any, Callable
|
|
8
|
+
|
|
9
|
+
from rclone_api.mount_read_chunker import FilePart
|
|
10
|
+
from rclone_api.s3.chunk_types import S3FileInfo, UploadState
|
|
11
|
+
from rclone_api.types import EndOfStream
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__) # noqa
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# def _get_file_size(file_path: Path, timeout: int = 60) -> int:
|
|
17
|
+
# sleep_time = timeout / 60 if timeout > 0 else 1
|
|
18
|
+
# start = time.time()
|
|
19
|
+
# while True:
|
|
20
|
+
# expired = time.time() - start > timeout
|
|
21
|
+
# try:
|
|
22
|
+
# time.sleep(sleep_time)
|
|
23
|
+
# if file_path.exists():
|
|
24
|
+
# return file_path.stat().st_size
|
|
25
|
+
# except FileNotFoundError as e:
|
|
26
|
+
# if expired:
|
|
27
|
+
# print(f"File not found: {file_path}, exception is {e}")
|
|
28
|
+
# raise
|
|
29
|
+
# if expired:
|
|
30
|
+
# raise TimeoutError(f"File {file_path} not found after {timeout} seconds")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class _ShouldStopChecker:
|
|
34
|
+
def __init__(self, max_chunks: int | None) -> None:
|
|
35
|
+
self.count = 0
|
|
36
|
+
self.max_chunks = max_chunks
|
|
37
|
+
|
|
38
|
+
def should_stop(self) -> bool:
|
|
39
|
+
if self.max_chunks is None:
|
|
40
|
+
return False
|
|
41
|
+
if self.count >= self.max_chunks:
|
|
42
|
+
logger.info(
|
|
43
|
+
f"Stopping file chunker after {self.count} chunks because it exceeded max_chunks {self.max_chunks}"
|
|
44
|
+
)
|
|
45
|
+
return True
|
|
46
|
+
# self.count += 1
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
def increment(self):
|
|
50
|
+
self.count += 1
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class _PartNumberTracker:
|
|
54
|
+
def __init__(
|
|
55
|
+
self, start_part_value: int, last_part_value: int, done_parts: set[int]
|
|
56
|
+
) -> None:
|
|
57
|
+
# self._num_parts = (last_part_value - start_part_value) + 1
|
|
58
|
+
self._start_part_value = start_part_value
|
|
59
|
+
self._last_part_value = last_part_value
|
|
60
|
+
self._done_part_numbers: set[int] = done_parts
|
|
61
|
+
self._curr_part_number = start_part_value
|
|
62
|
+
self._finished = False
|
|
63
|
+
self._lock = Lock()
|
|
64
|
+
|
|
65
|
+
def next_part_number(self) -> int | None:
|
|
66
|
+
with self._lock:
|
|
67
|
+
while self._curr_part_number in self._done_part_numbers:
|
|
68
|
+
self._curr_part_number += 1
|
|
69
|
+
if self._curr_part_number > self._last_part_value:
|
|
70
|
+
self._finished = True
|
|
71
|
+
return None
|
|
72
|
+
curr_part_number = self._curr_part_number
|
|
73
|
+
self._curr_part_number += (
|
|
74
|
+
1 # prevent a second thread from getting the same part number
|
|
75
|
+
)
|
|
76
|
+
return curr_part_number
|
|
77
|
+
|
|
78
|
+
def is_finished(self) -> bool:
|
|
79
|
+
with self._lock:
|
|
80
|
+
return self._finished
|
|
81
|
+
|
|
82
|
+
def add_finished_part_number(self, part_number: int) -> None:
|
|
83
|
+
with self._lock:
|
|
84
|
+
self._done_part_numbers.add(part_number)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class _OnCompleteHandler:
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
part_number_tracker: _PartNumberTracker,
|
|
91
|
+
file_path: Path,
|
|
92
|
+
queue_upload: Queue[FilePart | EndOfStream],
|
|
93
|
+
) -> None:
|
|
94
|
+
self.part_number_tracker = part_number_tracker
|
|
95
|
+
self.file_path = file_path
|
|
96
|
+
self.queue_upload = queue_upload
|
|
97
|
+
|
|
98
|
+
def on_complete(self, fut: Future[FilePart]) -> None:
|
|
99
|
+
logger.debug("Chunk read complete")
|
|
100
|
+
fp: FilePart = fut.result()
|
|
101
|
+
extra: S3FileInfo = fp.extra
|
|
102
|
+
assert isinstance(extra, S3FileInfo)
|
|
103
|
+
part_number = extra.part_number
|
|
104
|
+
if fp.is_error():
|
|
105
|
+
logger.warning(f"Error reading file: {fp}, skipping part {part_number}")
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
if fp.n_bytes() == 0:
|
|
109
|
+
logger.warning(f"Empty data for part {part_number} of {self.file_path}")
|
|
110
|
+
raise ValueError(f"Empty data for part {part_number} of {self.file_path}")
|
|
111
|
+
|
|
112
|
+
if isinstance(fp.payload, Exception):
|
|
113
|
+
logger.warning(f"Error reading file because of error: {fp.payload}")
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
# done_part_numbers.add(part_number)
|
|
117
|
+
# queue_upload.put(fp)
|
|
118
|
+
self.part_number_tracker.add_finished_part_number(part_number)
|
|
119
|
+
self.queue_upload.put(fp)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def file_chunker(
|
|
123
|
+
upload_state: UploadState,
|
|
124
|
+
fetcher: Callable[[int, int, Any], Future[FilePart]],
|
|
125
|
+
max_chunks: int | None,
|
|
126
|
+
cancel_signal: Event,
|
|
127
|
+
queue_upload: Queue[FilePart | EndOfStream],
|
|
128
|
+
) -> None:
|
|
129
|
+
final_part_number = upload_state.upload_info.total_chunks() + 1
|
|
130
|
+
should_stop_checker = _ShouldStopChecker(max_chunks)
|
|
131
|
+
|
|
132
|
+
upload_info = upload_state.upload_info
|
|
133
|
+
file_path = upload_info.src_file_path
|
|
134
|
+
chunk_size = upload_info.chunk_size
|
|
135
|
+
# src = Path(file_path)
|
|
136
|
+
|
|
137
|
+
# for p in upload_state.parts:
|
|
138
|
+
# if not isinstance(p, EndOfStream):
|
|
139
|
+
# part_tracker.add_done_part_number(p.part_number)
|
|
140
|
+
|
|
141
|
+
done_part_numbers: set[int] = {
|
|
142
|
+
p.part_number for p in upload_state.parts if not isinstance(p, EndOfStream)
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
part_tracker = _PartNumberTracker(
|
|
146
|
+
start_part_value=1,
|
|
147
|
+
last_part_value=final_part_number,
|
|
148
|
+
done_parts=done_part_numbers,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
callback = _OnCompleteHandler(part_tracker, file_path, queue_upload)
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
num_parts = upload_info.total_chunks()
|
|
155
|
+
|
|
156
|
+
if cancel_signal.is_set():
|
|
157
|
+
logger.info(
|
|
158
|
+
f"Cancel signal is set for file chunker while processing {file_path}, returning"
|
|
159
|
+
)
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
while not should_stop_checker.should_stop():
|
|
163
|
+
should_stop_checker.increment()
|
|
164
|
+
logger.debug("Processing next chunk")
|
|
165
|
+
curr_part_number = part_tracker.next_part_number()
|
|
166
|
+
if curr_part_number is None:
|
|
167
|
+
logger.info(f"File {file_path} has completed chunking all parts")
|
|
168
|
+
break
|
|
169
|
+
|
|
170
|
+
assert curr_part_number is not None
|
|
171
|
+
offset = (curr_part_number - 1) * chunk_size
|
|
172
|
+
file_size = upload_info.file_size
|
|
173
|
+
|
|
174
|
+
assert offset < file_size, f"Offset {offset} is greater than file size"
|
|
175
|
+
fetch_size = max(0, min(chunk_size, file_size - offset))
|
|
176
|
+
|
|
177
|
+
# assert fetch_size > 0, f"Invalid fetch size: {fetch_size}"
|
|
178
|
+
|
|
179
|
+
if fetch_size == 0:
|
|
180
|
+
logger.error(
|
|
181
|
+
f"Empty data for part {curr_part_number} of {file_path}, is this the last chunk?"
|
|
182
|
+
)
|
|
183
|
+
# assert final_part_number == curr_part_number, f"Final part number is {final_part_number} but current part number is {curr_part_number}"
|
|
184
|
+
if final_part_number != curr_part_number:
|
|
185
|
+
raise ValueError(
|
|
186
|
+
f"This should have been the last part, but it is not: {final_part_number} != {curr_part_number}"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Open the file, seek, read the chunk, and close immediately.
|
|
190
|
+
# with open(file_path, "rb") as f:
|
|
191
|
+
# f.seek(offset)
|
|
192
|
+
# data = f.read(chunk_size)
|
|
193
|
+
|
|
194
|
+
# data = chunk_fetcher(offset, chunk_size).result()
|
|
195
|
+
|
|
196
|
+
assert curr_part_number is not None
|
|
197
|
+
# cpn: int = curr_part_number
|
|
198
|
+
|
|
199
|
+
# offset = (curr_part_number - 1) * chunk_size
|
|
200
|
+
logger.info(
|
|
201
|
+
f"Reading chunk {curr_part_number} of {num_parts} for {file_path}"
|
|
202
|
+
)
|
|
203
|
+
fut = fetcher(
|
|
204
|
+
offset, fetch_size, S3FileInfo(upload_info.upload_id, curr_part_number)
|
|
205
|
+
)
|
|
206
|
+
fut.add_done_callback(callback.on_complete)
|
|
207
|
+
# wait until the queue_upload queue can accept the next chunk
|
|
208
|
+
while queue_upload.full():
|
|
209
|
+
time.sleep(0.1)
|
|
210
|
+
except Exception as e:
|
|
211
|
+
|
|
212
|
+
logger.error(f"Error reading file: {e}", exc_info=True)
|
|
213
|
+
finally:
|
|
214
|
+
logger.info(f"Finishing FILE CHUNKER for {file_path} and adding EndOfStream")
|
|
215
|
+
queue_upload.put(EndOfStream())
|
rclone_api/s3/chunk_types.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing import Any, Callable
|
|
|
11
11
|
from botocore.client import BaseClient
|
|
12
12
|
|
|
13
13
|
from rclone_api.mount_read_chunker import FilePart
|
|
14
|
-
from rclone_api.s3.
|
|
14
|
+
from rclone_api.s3.chunk_task import S3FileInfo, file_chunker
|
|
15
15
|
from rclone_api.s3.chunk_types import (
|
|
16
16
|
FinishedPiece,
|
|
17
17
|
UploadInfo,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
rclone_api/__init__.py,sha256=
|
|
1
|
+
rclone_api/__init__.py,sha256=96hecpVEvaLklfaL5K_uBV-gaftt9kklC0XMlutZMf8,1197
|
|
2
2
|
rclone_api/cli.py,sha256=dibfAZIh0kXWsBbfp3onKLjyZXo54mTzDjUdzJlDlWo,231
|
|
3
3
|
rclone_api/completed_process.py,sha256=_IZ8IWK7DM1_tsbDEkH6wPZ-bbcrgf7A7smls854pmg,1775
|
|
4
4
|
rclone_api/config.py,sha256=f6jEAxVorGFr31oHfcsu5AJTtOJj2wR5tTSsbGGZuIw,2558
|
|
@@ -11,9 +11,9 @@ rclone_api/exec.py,sha256=Pd7pUBd8ib5MzqvMybG2DQISPRbDRu20VjVRL2mLAVY,1076
|
|
|
11
11
|
rclone_api/file.py,sha256=EP5yT2dZ0H2p7CY5n0y5k5pHhIliV25pm8KOwBklUTk,1863
|
|
12
12
|
rclone_api/filelist.py,sha256=xbiusvNgaB_b_kQOZoHMJJxn6TWGtPrWd2J042BI28o,767
|
|
13
13
|
rclone_api/group_files.py,sha256=H92xPW9lQnbNw5KbtZCl00bD6iRh9yRbCuxku4j_3dg,8036
|
|
14
|
-
rclone_api/
|
|
14
|
+
rclone_api/log.py,sha256=VZHM7pNSXip2ZLBKMP7M1u-rp_F7zoafFDuR8CPUoKI,1271
|
|
15
15
|
rclone_api/mount.py,sha256=TE_VIBMW7J1UkF_6HRCt8oi_jGdMov4S51bm2OgxFAM,10045
|
|
16
|
-
rclone_api/mount_read_chunker.py,sha256=
|
|
16
|
+
rclone_api/mount_read_chunker.py,sha256=Pz4s6AGigB_-vTAEdLK9Fv116j87yN4Qg0Uz1COwPtQ,4504
|
|
17
17
|
rclone_api/process.py,sha256=rBj_S86jC6nqCYop-jq8r9eMSteKeObxUrJMgH8LZvI,5084
|
|
18
18
|
rclone_api/rclone.py,sha256=WSCbIo4NtD871nvR1ZG9v7ihbLi7z1Ku4Pg4mgrNxZw,49364
|
|
19
19
|
rclone_api/remote.py,sha256=O9WDUFQy9f6oT1HdUbTixK2eg0xtBBm8k4Xl6aa6K00,431
|
|
@@ -30,14 +30,14 @@ rclone_api/experimental/flags_base.py,sha256=ajU_czkTcAxXYU-SlmiCfHY7aCQGHvpCLqJ
|
|
|
30
30
|
rclone_api/profile/mount_copy_bytes.py,sha256=okzcfpmLcQvh5IUcIwZs9jLPSxFMv2igt2-kHoEmlfE,8571
|
|
31
31
|
rclone_api/s3/api.py,sha256=PafsIEyWDpLWAXsZAjFm9CY14vJpsDr9lOsn0kGRLZ0,4009
|
|
32
32
|
rclone_api/s3/basic_ops.py,sha256=hK3366xhVEzEcjz9Gk_8lFx6MRceAk72cax6mUrr6ko,2104
|
|
33
|
-
rclone_api/s3/
|
|
34
|
-
rclone_api/s3/chunk_types.py,sha256=
|
|
33
|
+
rclone_api/s3/chunk_task.py,sha256=qJLoacUIucGh7h4Puo7wfqSYYoz7dxG-5cJ3TopgzwM,7735
|
|
34
|
+
rclone_api/s3/chunk_types.py,sha256=oSWv8No9V3BeM7IcGnowyR2a7YrszdAXzEJlxaeZcp0,8852
|
|
35
35
|
rclone_api/s3/create.py,sha256=wgfkapv_j904CfKuWyiBIWJVxfAx_ftemFSUV14aT68,3149
|
|
36
36
|
rclone_api/s3/types.py,sha256=Elmh__gvZJyJyElYwMmvYZIBIunDJiTRAbEg21GmsRU,1604
|
|
37
|
-
rclone_api/s3/upload_file_multipart.py,sha256=
|
|
38
|
-
rclone_api-1.
|
|
39
|
-
rclone_api-1.
|
|
40
|
-
rclone_api-1.
|
|
41
|
-
rclone_api-1.
|
|
42
|
-
rclone_api-1.
|
|
43
|
-
rclone_api-1.
|
|
37
|
+
rclone_api/s3/upload_file_multipart.py,sha256=UVMTSeP98fSQdOYcCdi9tV5ZjOxRDuhZbiBbVaf-rCM,11385
|
|
38
|
+
rclone_api-1.3.1.dist-info/LICENSE,sha256=b6pOoifSXiUaz_lDS84vWlG3fr4yUKwB8fzkrH9R8bQ,1064
|
|
39
|
+
rclone_api-1.3.1.dist-info/METADATA,sha256=ZjOE584_hiG9__K0BqmA3OYmTb076iT18dznebi5D98,4536
|
|
40
|
+
rclone_api-1.3.1.dist-info/WHEEL,sha256=rF4EZyR2XVS6irmOHQIJx2SUqXLZKRMUrjsg8UwN-XQ,109
|
|
41
|
+
rclone_api-1.3.1.dist-info/entry_points.txt,sha256=TV8kwP3FRzYwUEr0RLC7aJh0W03SAefIJNXTJ-FdMIQ,200
|
|
42
|
+
rclone_api-1.3.1.dist-info/top_level.txt,sha256=EvZ7uuruUpe9RiUyEp25d1Keq7PWYNT0O_-mr8FCG5g,11
|
|
43
|
+
rclone_api-1.3.1.dist-info/RECORD,,
|
rclone_api/s3/chunk_file.py
DELETED
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import time
|
|
3
|
-
from concurrent.futures import Future
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from queue import Queue
|
|
7
|
-
from threading import Event
|
|
8
|
-
from typing import Any, Callable
|
|
9
|
-
|
|
10
|
-
from rclone_api.mount_read_chunker import FilePart
|
|
11
|
-
from rclone_api.s3.chunk_types import UploadState
|
|
12
|
-
from rclone_api.types import EndOfStream
|
|
13
|
-
|
|
14
|
-
logger = logging.getLogger(__name__) # noqa
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def _get_file_size(file_path: Path, timeout: int = 60) -> int:
|
|
18
|
-
sleep_time = timeout / 60 if timeout > 0 else 1
|
|
19
|
-
start = time.time()
|
|
20
|
-
while True:
|
|
21
|
-
expired = time.time() - start > timeout
|
|
22
|
-
try:
|
|
23
|
-
time.sleep(sleep_time)
|
|
24
|
-
if file_path.exists():
|
|
25
|
-
return file_path.stat().st_size
|
|
26
|
-
except FileNotFoundError as e:
|
|
27
|
-
if expired:
|
|
28
|
-
print(f"File not found: {file_path}, exception is {e}")
|
|
29
|
-
raise
|
|
30
|
-
if expired:
|
|
31
|
-
raise TimeoutError(f"File {file_path} not found after {timeout} seconds")
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
@dataclass
|
|
35
|
-
class S3FileInfo:
|
|
36
|
-
upload_id: str
|
|
37
|
-
part_number: int
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def file_chunker(
|
|
41
|
-
upload_state: UploadState,
|
|
42
|
-
fetcher: Callable[[int, int, Any], Future[FilePart]],
|
|
43
|
-
max_chunks: int | None,
|
|
44
|
-
cancel_signal: Event,
|
|
45
|
-
queue_upload: Queue[FilePart | EndOfStream],
|
|
46
|
-
) -> None:
|
|
47
|
-
count = 0
|
|
48
|
-
|
|
49
|
-
def should_stop() -> bool:
|
|
50
|
-
nonlocal count
|
|
51
|
-
|
|
52
|
-
if max_chunks is None:
|
|
53
|
-
return False
|
|
54
|
-
if count >= max_chunks:
|
|
55
|
-
logger.info(
|
|
56
|
-
f"Stopping file chunker after {count} chunks because it exceeded max_chunks {max_chunks}"
|
|
57
|
-
)
|
|
58
|
-
return True
|
|
59
|
-
count += 1
|
|
60
|
-
return False
|
|
61
|
-
|
|
62
|
-
upload_info = upload_state.upload_info
|
|
63
|
-
file_path = upload_info.src_file_path
|
|
64
|
-
chunk_size = upload_info.chunk_size
|
|
65
|
-
# src = Path(file_path)
|
|
66
|
-
|
|
67
|
-
try:
|
|
68
|
-
part_number = 1
|
|
69
|
-
done_part_numbers: set[int] = {
|
|
70
|
-
p.part_number for p in upload_state.parts if not isinstance(p, EndOfStream)
|
|
71
|
-
}
|
|
72
|
-
num_parts = upload_info.total_chunks()
|
|
73
|
-
|
|
74
|
-
def next_part_number() -> int | None:
|
|
75
|
-
nonlocal part_number
|
|
76
|
-
while part_number in done_part_numbers:
|
|
77
|
-
part_number += 1
|
|
78
|
-
if part_number > num_parts:
|
|
79
|
-
return None
|
|
80
|
-
return part_number
|
|
81
|
-
|
|
82
|
-
if cancel_signal.is_set():
|
|
83
|
-
logger.info(
|
|
84
|
-
f"Cancel signal is set for file chunker while processing {file_path}, returning"
|
|
85
|
-
)
|
|
86
|
-
return
|
|
87
|
-
|
|
88
|
-
while not should_stop():
|
|
89
|
-
logger.debug("Processing next chunk")
|
|
90
|
-
curr_part_number = next_part_number()
|
|
91
|
-
if curr_part_number is None:
|
|
92
|
-
logger.info(f"File {file_path} has completed chunking all parts")
|
|
93
|
-
break
|
|
94
|
-
assert curr_part_number is not None
|
|
95
|
-
offset = (curr_part_number - 1) * chunk_size
|
|
96
|
-
file_size = upload_info.file_size
|
|
97
|
-
|
|
98
|
-
assert offset < file_size, f"Offset {offset} is greater than file size"
|
|
99
|
-
|
|
100
|
-
# Open the file, seek, read the chunk, and close immediately.
|
|
101
|
-
# with open(file_path, "rb") as f:
|
|
102
|
-
# f.seek(offset)
|
|
103
|
-
# data = f.read(chunk_size)
|
|
104
|
-
|
|
105
|
-
# data = chunk_fetcher(offset, chunk_size).result()
|
|
106
|
-
|
|
107
|
-
assert curr_part_number is not None
|
|
108
|
-
cpn: int = curr_part_number
|
|
109
|
-
|
|
110
|
-
def on_complete(fut: Future[FilePart]) -> None:
|
|
111
|
-
logger.debug("Chunk read complete")
|
|
112
|
-
fp: FilePart = fut.result()
|
|
113
|
-
if fp.is_error():
|
|
114
|
-
logger.warning(
|
|
115
|
-
f"Error reading file: {fp}, skipping part {part_number}"
|
|
116
|
-
)
|
|
117
|
-
return
|
|
118
|
-
|
|
119
|
-
if fp.n_bytes() == 0:
|
|
120
|
-
logger.warning(f"Empty data for part {part_number} of {file_path}")
|
|
121
|
-
raise ValueError(
|
|
122
|
-
f"Empty data for part {part_number} of {file_path}"
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
if isinstance(fp.payload, Exception):
|
|
126
|
-
logger.warning(f"Error reading file because of error: {fp.payload}")
|
|
127
|
-
return
|
|
128
|
-
|
|
129
|
-
done_part_numbers.add(part_number)
|
|
130
|
-
queue_upload.put(fp)
|
|
131
|
-
|
|
132
|
-
offset = (curr_part_number - 1) * chunk_size
|
|
133
|
-
logger.info(
|
|
134
|
-
f"Reading chunk {curr_part_number} of {num_parts} for {file_path}"
|
|
135
|
-
)
|
|
136
|
-
fut = fetcher(offset, file_size, S3FileInfo(upload_info.upload_id, cpn))
|
|
137
|
-
fut.add_done_callback(on_complete)
|
|
138
|
-
# wait until the queue_upload queue can accept the next chunk
|
|
139
|
-
while queue_upload.full():
|
|
140
|
-
time.sleep(0.1)
|
|
141
|
-
except Exception as e:
|
|
142
|
-
|
|
143
|
-
logger.error(f"Error reading file: {e}", exc_info=True)
|
|
144
|
-
finally:
|
|
145
|
-
logger.info(f"Finishing FILE CHUNKER for {file_path} and adding EndOfStream")
|
|
146
|
-
queue_upload.put(EndOfStream())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|