futurehouse-client 0.3.19.dev129__py3-none-any.whl → 0.3.19.dev133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1053,24 +1053,11 @@ class RestClient:
1053
1053
  status_url = None
1054
1054
 
1055
1055
  try:
1056
- # Upload all chunks except the last one in parallel
1057
- if total_chunks > 1:
1058
- self._upload_chunks_parallel(
1059
- job_name,
1060
- file_path,
1061
- file_name,
1062
- upload_id,
1063
- total_chunks - 1,
1064
- total_chunks,
1065
- )
1066
-
1067
- # Upload the last chunk separately (handles assembly)
1068
- status_url = self._upload_final_chunk(
1056
+ status_url = self._upload_chunks_parallel(
1069
1057
  job_name,
1070
1058
  file_path,
1071
1059
  file_name,
1072
1060
  upload_id,
1073
- total_chunks - 1,
1074
1061
  total_chunks,
1075
1062
  )
1076
1063
 
@@ -1086,149 +1073,74 @@ class RestClient:
1086
1073
  file_path: Path,
1087
1074
  file_name: str,
1088
1075
  upload_id: str,
1089
- num_regular_chunks: int,
1090
1076
  total_chunks: int,
1091
- ) -> None:
1092
- """Upload chunks in parallel batches.
1077
+ ) -> str | None:
1078
+ """Upload all chunks in parallel batches, including the final chunk.
1093
1079
 
1094
1080
  Args:
1095
1081
  job_name: The key of the crow to upload to.
1096
1082
  file_path: The path to the file to upload.
1097
1083
  file_name: The name to use for the file.
1098
1084
  upload_id: The upload ID to use.
1099
- num_regular_chunks: Number of regular chunks (excluding final chunk).
1100
1085
  total_chunks: Total number of chunks.
1101
1086
 
1102
- Raises:
1103
- FileUploadError: If there's an error uploading any chunk.
1104
- """
1105
- if num_regular_chunks <= 0:
1106
- return
1107
-
1108
- # Process chunks in batches
1109
- for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
1110
- batch_end = min(
1111
- batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
1112
- )
1113
-
1114
- # Upload chunks in this batch concurrently
1115
- with ThreadPoolExecutor(max_workers=self.MAX_CONCURRENT_CHUNKS) as executor:
1116
- futures = {
1117
- executor.submit(
1118
- self._upload_single_chunk,
1119
- job_name,
1120
- file_path,
1121
- file_name,
1122
- upload_id,
1123
- chunk_index,
1124
- total_chunks,
1125
- ): chunk_index
1126
- for chunk_index in range(batch_start, batch_end)
1127
- }
1128
-
1129
- for future in as_completed(futures):
1130
- chunk_index = futures[future]
1131
- try:
1132
- future.result()
1133
- logger.debug(
1134
- f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1135
- )
1136
- except Exception as e:
1137
- logger.error(f"Error uploading chunk {chunk_index}: {e}")
1138
- raise FileUploadError(
1139
- f"Error uploading chunk {chunk_index} of {file_name}: {e}"
1140
- ) from e
1141
-
1142
- def _upload_single_chunk(
1143
- self,
1144
- job_name: str,
1145
- file_path: Path,
1146
- file_name: str,
1147
- upload_id: str,
1148
- chunk_index: int,
1149
- total_chunks: int,
1150
- ) -> None:
1151
- """Upload a single chunk.
1152
-
1153
- Args:
1154
- job_name: The key of the crow to upload to.
1155
- file_path: The path to the file to upload.
1156
- file_name: The name to use for the file.
1157
- upload_id: The upload ID to use.
1158
- chunk_index: The index of this chunk.
1159
- total_chunks: Total number of chunks.
1087
+ Returns:
1088
+ The status URL from the final chunk response, or None if no chunks.
1160
1089
 
1161
1090
  Raises:
1162
- Exception: If there's an error uploading the chunk.
1091
+ FileUploadError: If there's an error uploading any chunk.
1163
1092
  """
1164
- with open(file_path, "rb") as f:
1165
- # Read the chunk from the file
1166
- f.seek(chunk_index * self.CHUNK_SIZE)
1167
- chunk_data = f.read(self.CHUNK_SIZE)
1093
+ if total_chunks <= 0:
1094
+ return None
1168
1095
 
1169
- # Prepare and send the chunk
1170
- with tempfile.NamedTemporaryFile() as temp_file:
1171
- temp_file.write(chunk_data)
1172
- temp_file.flush()
1096
+ if total_chunks > 1:
1097
+ num_regular_chunks = total_chunks - 1
1098
+ for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
1099
+ batch_end = min(
1100
+ batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
1101
+ )
1173
1102
 
1174
- # Create form data
1175
- with open(temp_file.name, "rb") as chunk_file_obj:
1176
- files = {
1177
- "chunk": (
1103
+ # Upload chunks in this batch concurrently
1104
+ with ThreadPoolExecutor(
1105
+ max_workers=self.MAX_CONCURRENT_CHUNKS
1106
+ ) as executor:
1107
+ futures = {
1108
+ executor.submit(
1109
+ self._upload_single_chunk,
1110
+ job_name,
1111
+ file_path,
1178
1112
  file_name,
1179
- chunk_file_obj,
1180
- "application/octet-stream",
1181
- )
1182
- }
1183
- data = {
1184
- "file_name": file_name,
1185
- "chunk_index": chunk_index,
1186
- "total_chunks": total_chunks,
1187
- "upload_id": upload_id,
1113
+ upload_id,
1114
+ chunk_index,
1115
+ total_chunks,
1116
+ ): chunk_index
1117
+ for chunk_index in range(batch_start, batch_end)
1188
1118
  }
1189
1119
 
1190
- # Send the chunk
1191
- response = self.multipart_client.post(
1192
- f"/v0.1/crows/{job_name}/upload-chunk",
1193
- files=files,
1194
- data=data,
1195
- )
1196
- response.raise_for_status()
1197
-
1198
- def _upload_final_chunk(
1199
- self,
1200
- job_name: str,
1201
- file_path: Path,
1202
- file_name: str,
1203
- upload_id: str,
1204
- chunk_index: int,
1205
- total_chunks: int,
1206
- ) -> str | None:
1207
- """Upload the final chunk with retry logic for missing chunks.
1208
-
1209
- Args:
1210
- job_name: The key of the crow to upload to.
1211
- file_path: The path to the file to upload.
1212
- file_name: The name to use for the file.
1213
- upload_id: The upload ID to use.
1214
- chunk_index: The index of the final chunk.
1215
- total_chunks: Total number of chunks.
1216
-
1217
- Returns:
1218
- The status URL from the response.
1219
-
1220
- Raises:
1221
- FileUploadError: If there's an error uploading the final chunk.
1222
- """
1120
+ for future in as_completed(futures):
1121
+ chunk_index = futures[future]
1122
+ try:
1123
+ future.result()
1124
+ logger.debug(
1125
+ f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1126
+ )
1127
+ except Exception as e:
1128
+ logger.error(f"Error uploading chunk {chunk_index}: {e}")
1129
+ raise FileUploadError(
1130
+ f"Error uploading chunk {chunk_index} of {file_name}: {e}"
1131
+ ) from e
1132
+
1133
+ # Upload the final chunk with retry logic
1134
+ final_chunk_index = total_chunks - 1
1223
1135
  retries = 0
1224
1136
  max_retries = 3
1225
- retry_delay = 2.0 # seconds
1137
+ retry_delay = 2.0
1226
1138
 
1227
1139
  while retries < max_retries:
1228
1140
  try:
1229
1141
  with open(file_path, "rb") as f:
1230
1142
  # Read the final chunk from the file
1231
- f.seek(chunk_index * self.CHUNK_SIZE)
1143
+ f.seek(final_chunk_index * self.CHUNK_SIZE)
1232
1144
  chunk_data = f.read(self.CHUNK_SIZE)
1233
1145
 
1234
1146
  # Prepare and send the chunk
@@ -1247,7 +1159,7 @@ class RestClient:
1247
1159
  }
1248
1160
  data = {
1249
1161
  "file_name": file_name,
1250
- "chunk_index": chunk_index,
1162
+ "chunk_index": final_chunk_index,
1251
1163
  "total_chunks": total_chunks,
1252
1164
  "upload_id": upload_id,
1253
1165
  }
@@ -1274,7 +1186,7 @@ class RestClient:
1274
1186
  status_url = response_data.get("status_url")
1275
1187
 
1276
1188
  logger.debug(
1277
- f"Uploaded final chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1189
+ f"Uploaded final chunk {final_chunk_index + 1}/{total_chunks} of {file_name}"
1278
1190
  )
1279
1191
  return status_url
1280
1192
 
@@ -1293,6 +1205,62 @@ class RestClient:
1293
1205
  f"Failed to upload final chunk of {file_name} after {max_retries} retries"
1294
1206
  )
1295
1207
 
1208
+ def _upload_single_chunk(
1209
+ self,
1210
+ job_name: str,
1211
+ file_path: Path,
1212
+ file_name: str,
1213
+ upload_id: str,
1214
+ chunk_index: int,
1215
+ total_chunks: int,
1216
+ ) -> None:
1217
+ """Upload a single chunk.
1218
+
1219
+ Args:
1220
+ job_name: The key of the crow to upload to.
1221
+ file_path: The path to the file to upload.
1222
+ file_name: The name to use for the file.
1223
+ upload_id: The upload ID to use.
1224
+ chunk_index: The index of this chunk.
1225
+ total_chunks: Total number of chunks.
1226
+
1227
+ Raises:
1228
+ Exception: If there's an error uploading the chunk.
1229
+ """
1230
+ with open(file_path, "rb") as f:
1231
+ # Read the chunk from the file
1232
+ f.seek(chunk_index * self.CHUNK_SIZE)
1233
+ chunk_data = f.read(self.CHUNK_SIZE)
1234
+
1235
+ # Prepare and send the chunk
1236
+ with tempfile.NamedTemporaryFile() as temp_file:
1237
+ temp_file.write(chunk_data)
1238
+ temp_file.flush()
1239
+
1240
+ # Create form data
1241
+ with open(temp_file.name, "rb") as chunk_file_obj:
1242
+ files = {
1243
+ "chunk": (
1244
+ file_name,
1245
+ chunk_file_obj,
1246
+ "application/octet-stream",
1247
+ )
1248
+ }
1249
+ data = {
1250
+ "file_name": file_name,
1251
+ "chunk_index": chunk_index,
1252
+ "total_chunks": total_chunks,
1253
+ "upload_id": upload_id,
1254
+ }
1255
+
1256
+ # Send the chunk
1257
+ response = self.multipart_client.post(
1258
+ f"/v0.1/crows/{job_name}/upload-chunk",
1259
+ files=files,
1260
+ data=data,
1261
+ )
1262
+ response.raise_for_status()
1263
+
1296
1264
  @retry(
1297
1265
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
1298
1266
  wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.19.dev129
3
+ Version: 0.3.19.dev133
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent
@@ -1,7 +1,8 @@
1
1
  futurehouse_client/__init__.py,sha256=OzGDkVm5UTUzd4n8yOmRjMF73YrK0FaIQX5gS3Dk8Zo,304
2
+ futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
3
  futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
3
4
  futurehouse_client/clients/job_client.py,sha256=JgB5IUAyCmnhGRsYc3bgKldA-lkM1JLwHRwwUeOCdus,11944
4
- futurehouse_client/clients/rest_client.py,sha256=CiHUYJFZrfRr5mrkt41hxJMesZ2zkCBxqOFJb0t0LGo,55465
5
+ futurehouse_client/clients/rest_client.py,sha256=3wfVz6d2KuRQUr_nms7P25yVR6aTjsRrSkqmVs55soA,54552
5
6
  futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
6
7
  futurehouse_client/models/app.py,sha256=VCtg0ygd-TSrR6DtfljTBt9jnl1eBNal8UXHFdkDg88,28587
7
8
  futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
@@ -11,7 +12,7 @@ futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHL
11
12
  futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
12
13
  futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
13
14
  futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
14
- futurehouse_client-0.3.19.dev129.dist-info/METADATA,sha256=Nd9KxyuyzbrgLFESjeo8-DwXkiK2-xtN-rfoyqaX-ys,12767
15
- futurehouse_client-0.3.19.dev129.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
- futurehouse_client-0.3.19.dev129.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
17
- futurehouse_client-0.3.19.dev129.dist-info/RECORD,,
15
+ futurehouse_client-0.3.19.dev133.dist-info/METADATA,sha256=fthzi_rIC5z8zWuUHtLeLOpS_hRR6Hn_6TmGlq0FOEY,12767
16
+ futurehouse_client-0.3.19.dev133.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ futurehouse_client-0.3.19.dev133.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
18
+ futurehouse_client-0.3.19.dev133.dist-info/RECORD,,