futurehouse-client 0.3.19.dev129__tar.gz → 0.3.19.dev133__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/PKG-INFO +1 -1
  2. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/clients/rest_client.py +103 -135
  3. futurehouse_client-0.3.19.dev133/futurehouse_client/utils/__init__.py +0 -0
  4. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client.egg-info/PKG-INFO +1 -1
  5. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client.egg-info/SOURCES.txt +1 -0
  6. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/tests/test_rest.py +41 -37
  7. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/LICENSE +0 -0
  8. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/README.md +0 -0
  9. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/docs/__init__.py +0 -0
  10. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/docs/client_notebook.ipynb +0 -0
  11. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/__init__.py +0 -0
  12. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/clients/__init__.py +0 -0
  13. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/clients/job_client.py +0 -0
  14. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/models/__init__.py +0 -0
  15. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/models/app.py +0 -0
  16. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/models/client.py +0 -0
  17. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/models/rest.py +0 -0
  18. /futurehouse_client-0.3.19.dev129/futurehouse_client/utils/__init__.py → /futurehouse_client-0.3.19.dev133/futurehouse_client/py.typed +0 -0
  19. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/utils/auth.py +0 -0
  20. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/utils/general.py +0 -0
  21. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/utils/module_utils.py +0 -0
  22. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client/utils/monitoring.py +0 -0
  23. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client.egg-info/dependency_links.txt +0 -0
  24. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client.egg-info/requires.txt +0 -0
  25. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/futurehouse_client.egg-info/top_level.txt +0 -0
  26. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/pyproject.toml +0 -0
  27. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/setup.cfg +0 -0
  28. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/tests/test_client.py +0 -0
  29. {futurehouse_client-0.3.19.dev129 → futurehouse_client-0.3.19.dev133}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.19.dev129
3
+ Version: 0.3.19.dev133
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent
@@ -1053,24 +1053,11 @@ class RestClient:
1053
1053
  status_url = None
1054
1054
 
1055
1055
  try:
1056
- # Upload all chunks except the last one in parallel
1057
- if total_chunks > 1:
1058
- self._upload_chunks_parallel(
1059
- job_name,
1060
- file_path,
1061
- file_name,
1062
- upload_id,
1063
- total_chunks - 1,
1064
- total_chunks,
1065
- )
1066
-
1067
- # Upload the last chunk separately (handles assembly)
1068
- status_url = self._upload_final_chunk(
1056
+ status_url = self._upload_chunks_parallel(
1069
1057
  job_name,
1070
1058
  file_path,
1071
1059
  file_name,
1072
1060
  upload_id,
1073
- total_chunks - 1,
1074
1061
  total_chunks,
1075
1062
  )
1076
1063
 
@@ -1086,149 +1073,74 @@ class RestClient:
1086
1073
  file_path: Path,
1087
1074
  file_name: str,
1088
1075
  upload_id: str,
1089
- num_regular_chunks: int,
1090
1076
  total_chunks: int,
1091
- ) -> None:
1092
- """Upload chunks in parallel batches.
1077
+ ) -> str | None:
1078
+ """Upload all chunks in parallel batches, including the final chunk.
1093
1079
 
1094
1080
  Args:
1095
1081
  job_name: The key of the crow to upload to.
1096
1082
  file_path: The path to the file to upload.
1097
1083
  file_name: The name to use for the file.
1098
1084
  upload_id: The upload ID to use.
1099
- num_regular_chunks: Number of regular chunks (excluding final chunk).
1100
1085
  total_chunks: Total number of chunks.
1101
1086
 
1102
- Raises:
1103
- FileUploadError: If there's an error uploading any chunk.
1104
- """
1105
- if num_regular_chunks <= 0:
1106
- return
1107
-
1108
- # Process chunks in batches
1109
- for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
1110
- batch_end = min(
1111
- batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
1112
- )
1113
-
1114
- # Upload chunks in this batch concurrently
1115
- with ThreadPoolExecutor(max_workers=self.MAX_CONCURRENT_CHUNKS) as executor:
1116
- futures = {
1117
- executor.submit(
1118
- self._upload_single_chunk,
1119
- job_name,
1120
- file_path,
1121
- file_name,
1122
- upload_id,
1123
- chunk_index,
1124
- total_chunks,
1125
- ): chunk_index
1126
- for chunk_index in range(batch_start, batch_end)
1127
- }
1128
-
1129
- for future in as_completed(futures):
1130
- chunk_index = futures[future]
1131
- try:
1132
- future.result()
1133
- logger.debug(
1134
- f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1135
- )
1136
- except Exception as e:
1137
- logger.error(f"Error uploading chunk {chunk_index}: {e}")
1138
- raise FileUploadError(
1139
- f"Error uploading chunk {chunk_index} of {file_name}: {e}"
1140
- ) from e
1141
-
1142
- def _upload_single_chunk(
1143
- self,
1144
- job_name: str,
1145
- file_path: Path,
1146
- file_name: str,
1147
- upload_id: str,
1148
- chunk_index: int,
1149
- total_chunks: int,
1150
- ) -> None:
1151
- """Upload a single chunk.
1152
-
1153
- Args:
1154
- job_name: The key of the crow to upload to.
1155
- file_path: The path to the file to upload.
1156
- file_name: The name to use for the file.
1157
- upload_id: The upload ID to use.
1158
- chunk_index: The index of this chunk.
1159
- total_chunks: Total number of chunks.
1087
+ Returns:
1088
+ The status URL from the final chunk response, or None if no chunks.
1160
1089
 
1161
1090
  Raises:
1162
- Exception: If there's an error uploading the chunk.
1091
+ FileUploadError: If there's an error uploading any chunk.
1163
1092
  """
1164
- with open(file_path, "rb") as f:
1165
- # Read the chunk from the file
1166
- f.seek(chunk_index * self.CHUNK_SIZE)
1167
- chunk_data = f.read(self.CHUNK_SIZE)
1093
+ if total_chunks <= 0:
1094
+ return None
1168
1095
 
1169
- # Prepare and send the chunk
1170
- with tempfile.NamedTemporaryFile() as temp_file:
1171
- temp_file.write(chunk_data)
1172
- temp_file.flush()
1096
+ if total_chunks > 1:
1097
+ num_regular_chunks = total_chunks - 1
1098
+ for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
1099
+ batch_end = min(
1100
+ batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
1101
+ )
1173
1102
 
1174
- # Create form data
1175
- with open(temp_file.name, "rb") as chunk_file_obj:
1176
- files = {
1177
- "chunk": (
1103
+ # Upload chunks in this batch concurrently
1104
+ with ThreadPoolExecutor(
1105
+ max_workers=self.MAX_CONCURRENT_CHUNKS
1106
+ ) as executor:
1107
+ futures = {
1108
+ executor.submit(
1109
+ self._upload_single_chunk,
1110
+ job_name,
1111
+ file_path,
1178
1112
  file_name,
1179
- chunk_file_obj,
1180
- "application/octet-stream",
1181
- )
1182
- }
1183
- data = {
1184
- "file_name": file_name,
1185
- "chunk_index": chunk_index,
1186
- "total_chunks": total_chunks,
1187
- "upload_id": upload_id,
1113
+ upload_id,
1114
+ chunk_index,
1115
+ total_chunks,
1116
+ ): chunk_index
1117
+ for chunk_index in range(batch_start, batch_end)
1188
1118
  }
1189
1119
 
1190
- # Send the chunk
1191
- response = self.multipart_client.post(
1192
- f"/v0.1/crows/{job_name}/upload-chunk",
1193
- files=files,
1194
- data=data,
1195
- )
1196
- response.raise_for_status()
1197
-
1198
- def _upload_final_chunk(
1199
- self,
1200
- job_name: str,
1201
- file_path: Path,
1202
- file_name: str,
1203
- upload_id: str,
1204
- chunk_index: int,
1205
- total_chunks: int,
1206
- ) -> str | None:
1207
- """Upload the final chunk with retry logic for missing chunks.
1208
-
1209
- Args:
1210
- job_name: The key of the crow to upload to.
1211
- file_path: The path to the file to upload.
1212
- file_name: The name to use for the file.
1213
- upload_id: The upload ID to use.
1214
- chunk_index: The index of the final chunk.
1215
- total_chunks: Total number of chunks.
1216
-
1217
- Returns:
1218
- The status URL from the response.
1219
-
1220
- Raises:
1221
- FileUploadError: If there's an error uploading the final chunk.
1222
- """
1120
+ for future in as_completed(futures):
1121
+ chunk_index = futures[future]
1122
+ try:
1123
+ future.result()
1124
+ logger.debug(
1125
+ f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1126
+ )
1127
+ except Exception as e:
1128
+ logger.error(f"Error uploading chunk {chunk_index}: {e}")
1129
+ raise FileUploadError(
1130
+ f"Error uploading chunk {chunk_index} of {file_name}: {e}"
1131
+ ) from e
1132
+
1133
+ # Upload the final chunk with retry logic
1134
+ final_chunk_index = total_chunks - 1
1223
1135
  retries = 0
1224
1136
  max_retries = 3
1225
- retry_delay = 2.0 # seconds
1137
+ retry_delay = 2.0
1226
1138
 
1227
1139
  while retries < max_retries:
1228
1140
  try:
1229
1141
  with open(file_path, "rb") as f:
1230
1142
  # Read the final chunk from the file
1231
- f.seek(chunk_index * self.CHUNK_SIZE)
1143
+ f.seek(final_chunk_index * self.CHUNK_SIZE)
1232
1144
  chunk_data = f.read(self.CHUNK_SIZE)
1233
1145
 
1234
1146
  # Prepare and send the chunk
@@ -1247,7 +1159,7 @@ class RestClient:
1247
1159
  }
1248
1160
  data = {
1249
1161
  "file_name": file_name,
1250
- "chunk_index": chunk_index,
1162
+ "chunk_index": final_chunk_index,
1251
1163
  "total_chunks": total_chunks,
1252
1164
  "upload_id": upload_id,
1253
1165
  }
@@ -1274,7 +1186,7 @@ class RestClient:
1274
1186
  status_url = response_data.get("status_url")
1275
1187
 
1276
1188
  logger.debug(
1277
- f"Uploaded final chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1189
+ f"Uploaded final chunk {final_chunk_index + 1}/{total_chunks} of {file_name}"
1278
1190
  )
1279
1191
  return status_url
1280
1192
 
@@ -1293,6 +1205,62 @@ class RestClient:
1293
1205
  f"Failed to upload final chunk of {file_name} after {max_retries} retries"
1294
1206
  )
1295
1207
 
1208
+ def _upload_single_chunk(
1209
+ self,
1210
+ job_name: str,
1211
+ file_path: Path,
1212
+ file_name: str,
1213
+ upload_id: str,
1214
+ chunk_index: int,
1215
+ total_chunks: int,
1216
+ ) -> None:
1217
+ """Upload a single chunk.
1218
+
1219
+ Args:
1220
+ job_name: The key of the crow to upload to.
1221
+ file_path: The path to the file to upload.
1222
+ file_name: The name to use for the file.
1223
+ upload_id: The upload ID to use.
1224
+ chunk_index: The index of this chunk.
1225
+ total_chunks: Total number of chunks.
1226
+
1227
+ Raises:
1228
+ Exception: If there's an error uploading the chunk.
1229
+ """
1230
+ with open(file_path, "rb") as f:
1231
+ # Read the chunk from the file
1232
+ f.seek(chunk_index * self.CHUNK_SIZE)
1233
+ chunk_data = f.read(self.CHUNK_SIZE)
1234
+
1235
+ # Prepare and send the chunk
1236
+ with tempfile.NamedTemporaryFile() as temp_file:
1237
+ temp_file.write(chunk_data)
1238
+ temp_file.flush()
1239
+
1240
+ # Create form data
1241
+ with open(temp_file.name, "rb") as chunk_file_obj:
1242
+ files = {
1243
+ "chunk": (
1244
+ file_name,
1245
+ chunk_file_obj,
1246
+ "application/octet-stream",
1247
+ )
1248
+ }
1249
+ data = {
1250
+ "file_name": file_name,
1251
+ "chunk_index": chunk_index,
1252
+ "total_chunks": total_chunks,
1253
+ "upload_id": upload_id,
1254
+ }
1255
+
1256
+ # Send the chunk
1257
+ response = self.multipart_client.post(
1258
+ f"/v0.1/crows/{job_name}/upload-chunk",
1259
+ files=files,
1260
+ data=data,
1261
+ )
1262
+ response.raise_for_status()
1263
+
1296
1264
  @retry(
1297
1265
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
1298
1266
  wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.19.dev129
3
+ Version: 0.3.19.dev133
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent
@@ -5,6 +5,7 @@ uv.lock
5
5
  docs/__init__.py
6
6
  docs/client_notebook.ipynb
7
7
  futurehouse_client/__init__.py
8
+ futurehouse_client/py.typed
8
9
  futurehouse_client.egg-info/PKG-INFO
9
10
  futurehouse_client.egg-info/SOURCES.txt
10
11
  futurehouse_client.egg-info/dependency_links.txt
@@ -264,9 +264,6 @@ class TestParallelChunking:
264
264
  client._upload_single_chunk = types.MethodType(
265
265
  RestClient._upload_single_chunk, client
266
266
  )
267
- client._upload_final_chunk = types.MethodType(
268
- RestClient._upload_final_chunk, client
269
- )
270
267
  client._upload_single_file = types.MethodType(
271
268
  RestClient._upload_single_file, client
272
269
  )
@@ -372,19 +369,34 @@ class TestParallelChunking:
372
369
  num_regular_chunks = 5 # Smaller number for easier testing
373
370
  total_chunks = 6
374
371
 
375
- # Use patch to mock the _upload_single_chunk method
372
+ # Mock file content for final chunk
373
+ chunk_content = b"A" * 1000
374
+
375
+ # Mock final chunk response
376
+ mock_response = MagicMock()
377
+ mock_response.status_code = 200
378
+ mock_response.raise_for_status.return_value = None
379
+ mock_response.json.return_value = {"status_url": "http://test.com/status"}
380
+ mock_client.multipart_client.post.return_value = mock_response
381
+
382
+ # Use patch to mock the _upload_single_chunk method and file operations
376
383
  with patch.object(mock_client, "_upload_single_chunk") as mock_upload_chunk:
377
- # Call the method - it should use ThreadPoolExecutor internally
378
- mock_client._upload_chunks_parallel(
379
- job_name,
380
- file_path,
381
- file_name,
382
- upload_id,
383
- num_regular_chunks,
384
- total_chunks,
385
- )
386
-
387
- # Verify all chunks were processed by checking the call count
384
+ with patch("builtins.open", mock_open(read_data=chunk_content)):
385
+ with patch("tempfile.NamedTemporaryFile") as mock_temp_file:
386
+ mock_temp_file.return_value.__enter__.return_value.name = (
387
+ "temp_chunk"
388
+ )
389
+
390
+ # Call the method - it should use ThreadPoolExecutor internally
391
+ mock_client._upload_chunks_parallel(
392
+ job_name,
393
+ file_path,
394
+ file_name,
395
+ upload_id,
396
+ total_chunks,
397
+ )
398
+
399
+ # Verify all regular chunks were processed by checking the call count
388
400
  assert mock_upload_chunk.call_count == num_regular_chunks
389
401
 
390
402
  # Verify the calls were made with correct parameters
@@ -437,14 +449,13 @@ class TestParallelChunking:
437
449
  assert data["total_chunks"] == total_chunks
438
450
  assert data["upload_id"] == upload_id
439
451
 
440
- def test_upload_final_chunk_with_retry_on_conflict(self, mock_client):
452
+ def test_upload_chunks_parallel_retry_on_conflict(self, mock_client):
441
453
  """Test final chunk upload with retry logic for missing chunks (409 conflict)."""
442
454
  job_name = "test-job"
443
455
  file_path = Path("test_file.txt")
444
456
  file_name = "test_file.txt"
445
457
  upload_id = "test-upload-id"
446
- chunk_index = 2
447
- total_chunks = 3
458
+ total_chunks = 1
448
459
 
449
460
  # Mock file content
450
461
  chunk_content = b"A" * 1000
@@ -473,12 +484,11 @@ class TestParallelChunking:
473
484
  "temp_chunk"
474
485
  )
475
486
 
476
- status_url = mock_client._upload_final_chunk(
487
+ status_url = mock_client._upload_chunks_parallel(
477
488
  job_name,
478
489
  file_path,
479
490
  file_name,
480
491
  upload_id,
481
- chunk_index,
482
492
  total_chunks,
483
493
  )
484
494
 
@@ -487,29 +497,24 @@ class TestParallelChunking:
487
497
  assert status_url == "http://test.com/status"
488
498
  mock_sleep.assert_called_once() # Verify sleep was called for retry
489
499
 
490
- def test_upload_final_chunk_max_retries_exceeded(self, mock_client):
500
+ def test_upload_chunks_parallel_final_chunk_max_retries_exceeded(self, mock_client):
491
501
  """Test final chunk upload fails after max retries."""
492
502
  job_name = "test-job"
493
503
  file_path = Path("test_file.txt")
494
504
  file_name = "test_file.txt"
495
505
  upload_id = "test-upload-id"
496
- chunk_index = 2
497
- total_chunks = 3
506
+ total_chunks = 1
498
507
 
499
508
  # Mock file content
500
509
  chunk_content = b"A" * 1000
501
510
 
502
- # Mock response that always returns 409 (conflict) and raises an exception on raise_for_status
503
- mock_response = MagicMock()
504
- mock_response.status_code = 409
505
- # Make raise_for_status raise an exception after the retries are exhausted
506
- from httpx import HTTPStatusError, Request, codes
511
+ # Create a side effect that simulates an exception on every attempt
512
+ def side_effect(*args, **kwargs):
513
+ raise Exception("Simulated upload failure") # noqa: TRY002
507
514
 
508
- mock_request = MagicMock(spec=Request)
509
- mock_response.raise_for_status.side_effect = HTTPStatusError(
510
- "409 Conflict", request=mock_request, response=mock_response
511
- )
512
- mock_client.multipart_client.post.return_value = mock_response
515
+ mock_client.multipart_client.post.side_effect = side_effect
516
+
517
+ from httpx import codes
513
518
 
514
519
  with patch("builtins.open", mock_open(read_data=chunk_content)):
515
520
  with patch("tempfile.NamedTemporaryFile") as mock_temp_file:
@@ -523,17 +528,16 @@ class TestParallelChunking:
523
528
  with pytest.raises(
524
529
  FileUploadError, match="Error uploading final chunk"
525
530
  ):
526
- mock_client._upload_final_chunk(
531
+ mock_client._upload_chunks_parallel(
527
532
  job_name,
528
533
  file_path,
529
534
  file_name,
530
535
  upload_id,
531
- chunk_index,
532
536
  total_chunks,
533
537
  )
534
538
 
535
- # Verify that retries were attempted (should be 3 attempts total)
536
- assert mock_client.multipart_client.post.call_count == 3
539
+ # Verify that retries were attempted (should be 3 attempts total)
540
+ assert mock_client.multipart_client.post.call_count == 3
537
541
 
538
542
  def test_upload_directory_recursive(self, mock_client):
539
543
  """Test uploading a directory with nested files."""