futurehouse-client 0.3.19.dev129__py3-none-any.whl → 0.3.19.dev133__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- futurehouse_client/clients/rest_client.py +103 -135
- futurehouse_client/py.typed +0 -0
- {futurehouse_client-0.3.19.dev129.dist-info → futurehouse_client-0.3.19.dev133.dist-info}/METADATA +1 -1
- {futurehouse_client-0.3.19.dev129.dist-info → futurehouse_client-0.3.19.dev133.dist-info}/RECORD +6 -5
- {futurehouse_client-0.3.19.dev129.dist-info → futurehouse_client-0.3.19.dev133.dist-info}/WHEEL +0 -0
- {futurehouse_client-0.3.19.dev129.dist-info → futurehouse_client-0.3.19.dev133.dist-info}/top_level.txt +0 -0
@@ -1053,24 +1053,11 @@ class RestClient:
|
|
1053
1053
|
status_url = None
|
1054
1054
|
|
1055
1055
|
try:
|
1056
|
-
|
1057
|
-
if total_chunks > 1:
|
1058
|
-
self._upload_chunks_parallel(
|
1059
|
-
job_name,
|
1060
|
-
file_path,
|
1061
|
-
file_name,
|
1062
|
-
upload_id,
|
1063
|
-
total_chunks - 1,
|
1064
|
-
total_chunks,
|
1065
|
-
)
|
1066
|
-
|
1067
|
-
# Upload the last chunk separately (handles assembly)
|
1068
|
-
status_url = self._upload_final_chunk(
|
1056
|
+
status_url = self._upload_chunks_parallel(
|
1069
1057
|
job_name,
|
1070
1058
|
file_path,
|
1071
1059
|
file_name,
|
1072
1060
|
upload_id,
|
1073
|
-
total_chunks - 1,
|
1074
1061
|
total_chunks,
|
1075
1062
|
)
|
1076
1063
|
|
@@ -1086,149 +1073,74 @@ class RestClient:
|
|
1086
1073
|
file_path: Path,
|
1087
1074
|
file_name: str,
|
1088
1075
|
upload_id: str,
|
1089
|
-
num_regular_chunks: int,
|
1090
1076
|
total_chunks: int,
|
1091
|
-
) -> None:
|
1092
|
-
"""Upload chunks in parallel batches.
|
1077
|
+
) -> str | None:
|
1078
|
+
"""Upload all chunks in parallel batches, including the final chunk.
|
1093
1079
|
|
1094
1080
|
Args:
|
1095
1081
|
job_name: The key of the crow to upload to.
|
1096
1082
|
file_path: The path to the file to upload.
|
1097
1083
|
file_name: The name to use for the file.
|
1098
1084
|
upload_id: The upload ID to use.
|
1099
|
-
num_regular_chunks: Number of regular chunks (excluding final chunk).
|
1100
1085
|
total_chunks: Total number of chunks.
|
1101
1086
|
|
1102
|
-
|
1103
|
-
|
1104
|
-
"""
|
1105
|
-
if num_regular_chunks <= 0:
|
1106
|
-
return
|
1107
|
-
|
1108
|
-
# Process chunks in batches
|
1109
|
-
for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
|
1110
|
-
batch_end = min(
|
1111
|
-
batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
|
1112
|
-
)
|
1113
|
-
|
1114
|
-
# Upload chunks in this batch concurrently
|
1115
|
-
with ThreadPoolExecutor(max_workers=self.MAX_CONCURRENT_CHUNKS) as executor:
|
1116
|
-
futures = {
|
1117
|
-
executor.submit(
|
1118
|
-
self._upload_single_chunk,
|
1119
|
-
job_name,
|
1120
|
-
file_path,
|
1121
|
-
file_name,
|
1122
|
-
upload_id,
|
1123
|
-
chunk_index,
|
1124
|
-
total_chunks,
|
1125
|
-
): chunk_index
|
1126
|
-
for chunk_index in range(batch_start, batch_end)
|
1127
|
-
}
|
1128
|
-
|
1129
|
-
for future in as_completed(futures):
|
1130
|
-
chunk_index = futures[future]
|
1131
|
-
try:
|
1132
|
-
future.result()
|
1133
|
-
logger.debug(
|
1134
|
-
f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
|
1135
|
-
)
|
1136
|
-
except Exception as e:
|
1137
|
-
logger.error(f"Error uploading chunk {chunk_index}: {e}")
|
1138
|
-
raise FileUploadError(
|
1139
|
-
f"Error uploading chunk {chunk_index} of {file_name}: {e}"
|
1140
|
-
) from e
|
1141
|
-
|
1142
|
-
def _upload_single_chunk(
|
1143
|
-
self,
|
1144
|
-
job_name: str,
|
1145
|
-
file_path: Path,
|
1146
|
-
file_name: str,
|
1147
|
-
upload_id: str,
|
1148
|
-
chunk_index: int,
|
1149
|
-
total_chunks: int,
|
1150
|
-
) -> None:
|
1151
|
-
"""Upload a single chunk.
|
1152
|
-
|
1153
|
-
Args:
|
1154
|
-
job_name: The key of the crow to upload to.
|
1155
|
-
file_path: The path to the file to upload.
|
1156
|
-
file_name: The name to use for the file.
|
1157
|
-
upload_id: The upload ID to use.
|
1158
|
-
chunk_index: The index of this chunk.
|
1159
|
-
total_chunks: Total number of chunks.
|
1087
|
+
Returns:
|
1088
|
+
The status URL from the final chunk response, or None if no chunks.
|
1160
1089
|
|
1161
1090
|
Raises:
|
1162
|
-
|
1091
|
+
FileUploadError: If there's an error uploading any chunk.
|
1163
1092
|
"""
|
1164
|
-
|
1165
|
-
|
1166
|
-
f.seek(chunk_index * self.CHUNK_SIZE)
|
1167
|
-
chunk_data = f.read(self.CHUNK_SIZE)
|
1093
|
+
if total_chunks <= 0:
|
1094
|
+
return None
|
1168
1095
|
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1096
|
+
if total_chunks > 1:
|
1097
|
+
num_regular_chunks = total_chunks - 1
|
1098
|
+
for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
|
1099
|
+
batch_end = min(
|
1100
|
+
batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
|
1101
|
+
)
|
1173
1102
|
|
1174
|
-
#
|
1175
|
-
with
|
1176
|
-
|
1177
|
-
|
1103
|
+
# Upload chunks in this batch concurrently
|
1104
|
+
with ThreadPoolExecutor(
|
1105
|
+
max_workers=self.MAX_CONCURRENT_CHUNKS
|
1106
|
+
) as executor:
|
1107
|
+
futures = {
|
1108
|
+
executor.submit(
|
1109
|
+
self._upload_single_chunk,
|
1110
|
+
job_name,
|
1111
|
+
file_path,
|
1178
1112
|
file_name,
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1184
|
-
"file_name": file_name,
|
1185
|
-
"chunk_index": chunk_index,
|
1186
|
-
"total_chunks": total_chunks,
|
1187
|
-
"upload_id": upload_id,
|
1113
|
+
upload_id,
|
1114
|
+
chunk_index,
|
1115
|
+
total_chunks,
|
1116
|
+
): chunk_index
|
1117
|
+
for chunk_index in range(batch_start, batch_end)
|
1188
1118
|
}
|
1189
1119
|
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
total_chunks: int,
|
1206
|
-
) -> str | None:
|
1207
|
-
"""Upload the final chunk with retry logic for missing chunks.
|
1208
|
-
|
1209
|
-
Args:
|
1210
|
-
job_name: The key of the crow to upload to.
|
1211
|
-
file_path: The path to the file to upload.
|
1212
|
-
file_name: The name to use for the file.
|
1213
|
-
upload_id: The upload ID to use.
|
1214
|
-
chunk_index: The index of the final chunk.
|
1215
|
-
total_chunks: Total number of chunks.
|
1216
|
-
|
1217
|
-
Returns:
|
1218
|
-
The status URL from the response.
|
1219
|
-
|
1220
|
-
Raises:
|
1221
|
-
FileUploadError: If there's an error uploading the final chunk.
|
1222
|
-
"""
|
1120
|
+
for future in as_completed(futures):
|
1121
|
+
chunk_index = futures[future]
|
1122
|
+
try:
|
1123
|
+
future.result()
|
1124
|
+
logger.debug(
|
1125
|
+
f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
|
1126
|
+
)
|
1127
|
+
except Exception as e:
|
1128
|
+
logger.error(f"Error uploading chunk {chunk_index}: {e}")
|
1129
|
+
raise FileUploadError(
|
1130
|
+
f"Error uploading chunk {chunk_index} of {file_name}: {e}"
|
1131
|
+
) from e
|
1132
|
+
|
1133
|
+
# Upload the final chunk with retry logic
|
1134
|
+
final_chunk_index = total_chunks - 1
|
1223
1135
|
retries = 0
|
1224
1136
|
max_retries = 3
|
1225
|
-
retry_delay = 2.0
|
1137
|
+
retry_delay = 2.0
|
1226
1138
|
|
1227
1139
|
while retries < max_retries:
|
1228
1140
|
try:
|
1229
1141
|
with open(file_path, "rb") as f:
|
1230
1142
|
# Read the final chunk from the file
|
1231
|
-
f.seek(
|
1143
|
+
f.seek(final_chunk_index * self.CHUNK_SIZE)
|
1232
1144
|
chunk_data = f.read(self.CHUNK_SIZE)
|
1233
1145
|
|
1234
1146
|
# Prepare and send the chunk
|
@@ -1247,7 +1159,7 @@ class RestClient:
|
|
1247
1159
|
}
|
1248
1160
|
data = {
|
1249
1161
|
"file_name": file_name,
|
1250
|
-
"chunk_index":
|
1162
|
+
"chunk_index": final_chunk_index,
|
1251
1163
|
"total_chunks": total_chunks,
|
1252
1164
|
"upload_id": upload_id,
|
1253
1165
|
}
|
@@ -1274,7 +1186,7 @@ class RestClient:
|
|
1274
1186
|
status_url = response_data.get("status_url")
|
1275
1187
|
|
1276
1188
|
logger.debug(
|
1277
|
-
f"Uploaded final chunk {
|
1189
|
+
f"Uploaded final chunk {final_chunk_index + 1}/{total_chunks} of {file_name}"
|
1278
1190
|
)
|
1279
1191
|
return status_url
|
1280
1192
|
|
@@ -1293,6 +1205,62 @@ class RestClient:
|
|
1293
1205
|
f"Failed to upload final chunk of {file_name} after {max_retries} retries"
|
1294
1206
|
)
|
1295
1207
|
|
1208
|
+
def _upload_single_chunk(
|
1209
|
+
self,
|
1210
|
+
job_name: str,
|
1211
|
+
file_path: Path,
|
1212
|
+
file_name: str,
|
1213
|
+
upload_id: str,
|
1214
|
+
chunk_index: int,
|
1215
|
+
total_chunks: int,
|
1216
|
+
) -> None:
|
1217
|
+
"""Upload a single chunk.
|
1218
|
+
|
1219
|
+
Args:
|
1220
|
+
job_name: The key of the crow to upload to.
|
1221
|
+
file_path: The path to the file to upload.
|
1222
|
+
file_name: The name to use for the file.
|
1223
|
+
upload_id: The upload ID to use.
|
1224
|
+
chunk_index: The index of this chunk.
|
1225
|
+
total_chunks: Total number of chunks.
|
1226
|
+
|
1227
|
+
Raises:
|
1228
|
+
Exception: If there's an error uploading the chunk.
|
1229
|
+
"""
|
1230
|
+
with open(file_path, "rb") as f:
|
1231
|
+
# Read the chunk from the file
|
1232
|
+
f.seek(chunk_index * self.CHUNK_SIZE)
|
1233
|
+
chunk_data = f.read(self.CHUNK_SIZE)
|
1234
|
+
|
1235
|
+
# Prepare and send the chunk
|
1236
|
+
with tempfile.NamedTemporaryFile() as temp_file:
|
1237
|
+
temp_file.write(chunk_data)
|
1238
|
+
temp_file.flush()
|
1239
|
+
|
1240
|
+
# Create form data
|
1241
|
+
with open(temp_file.name, "rb") as chunk_file_obj:
|
1242
|
+
files = {
|
1243
|
+
"chunk": (
|
1244
|
+
file_name,
|
1245
|
+
chunk_file_obj,
|
1246
|
+
"application/octet-stream",
|
1247
|
+
)
|
1248
|
+
}
|
1249
|
+
data = {
|
1250
|
+
"file_name": file_name,
|
1251
|
+
"chunk_index": chunk_index,
|
1252
|
+
"total_chunks": total_chunks,
|
1253
|
+
"upload_id": upload_id,
|
1254
|
+
}
|
1255
|
+
|
1256
|
+
# Send the chunk
|
1257
|
+
response = self.multipart_client.post(
|
1258
|
+
f"/v0.1/crows/{job_name}/upload-chunk",
|
1259
|
+
files=files,
|
1260
|
+
data=data,
|
1261
|
+
)
|
1262
|
+
response.raise_for_status()
|
1263
|
+
|
1296
1264
|
@retry(
|
1297
1265
|
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
1298
1266
|
wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
|
File without changes
|
{futurehouse_client-0.3.19.dev129.dist-info → futurehouse_client-0.3.19.dev133.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: futurehouse-client
|
3
|
-
Version: 0.3.19.
|
3
|
+
Version: 0.3.19.dev133
|
4
4
|
Summary: A client for interacting with endpoints of the FutureHouse service.
|
5
5
|
Author-email: FutureHouse technical staff <hello@futurehouse.org>
|
6
6
|
Classifier: Operating System :: OS Independent
|
{futurehouse_client-0.3.19.dev129.dist-info → futurehouse_client-0.3.19.dev133.dist-info}/RECORD
RENAMED
@@ -1,7 +1,8 @@
|
|
1
1
|
futurehouse_client/__init__.py,sha256=OzGDkVm5UTUzd4n8yOmRjMF73YrK0FaIQX5gS3Dk8Zo,304
|
2
|
+
futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
3
|
futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
|
3
4
|
futurehouse_client/clients/job_client.py,sha256=JgB5IUAyCmnhGRsYc3bgKldA-lkM1JLwHRwwUeOCdus,11944
|
4
|
-
futurehouse_client/clients/rest_client.py,sha256=
|
5
|
+
futurehouse_client/clients/rest_client.py,sha256=3wfVz6d2KuRQUr_nms7P25yVR6aTjsRrSkqmVs55soA,54552
|
5
6
|
futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
|
6
7
|
futurehouse_client/models/app.py,sha256=VCtg0ygd-TSrR6DtfljTBt9jnl1eBNal8UXHFdkDg88,28587
|
7
8
|
futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
|
@@ -11,7 +12,7 @@ futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHL
|
|
11
12
|
futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
|
12
13
|
futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
|
13
14
|
futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
|
14
|
-
futurehouse_client-0.3.19.
|
15
|
-
futurehouse_client-0.3.19.
|
16
|
-
futurehouse_client-0.3.19.
|
17
|
-
futurehouse_client-0.3.19.
|
15
|
+
futurehouse_client-0.3.19.dev133.dist-info/METADATA,sha256=fthzi_rIC5z8zWuUHtLeLOpS_hRR6Hn_6TmGlq0FOEY,12767
|
16
|
+
futurehouse_client-0.3.19.dev133.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
17
|
+
futurehouse_client-0.3.19.dev133.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
|
18
|
+
futurehouse_client-0.3.19.dev133.dist-info/RECORD,,
|
{futurehouse_client-0.3.19.dev129.dist-info → futurehouse_client-0.3.19.dev133.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|