futurehouse-client 0.3.18.dev195__py3-none-any.whl → 0.3.19.dev111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- futurehouse_client/__init__.py +1 -2
- futurehouse_client/clients/__init__.py +1 -2
- futurehouse_client/clients/job_client.py +27 -1
- futurehouse_client/clients/rest_client.py +362 -34
- futurehouse_client/models/app.py +70 -2
- futurehouse_client/utils/auth.py +6 -3
- {futurehouse_client-0.3.18.dev195.dist-info → futurehouse_client-0.3.19.dev111.dist-info}/METADATA +1 -1
- futurehouse_client-0.3.19.dev111.dist-info/RECORD +17 -0
- {futurehouse_client-0.3.18.dev195.dist-info → futurehouse_client-0.3.19.dev111.dist-info}/WHEEL +1 -1
- futurehouse_client-0.3.18.dev195.dist-info/RECORD +0 -17
- {futurehouse_client-0.3.18.dev195.dist-info → futurehouse_client-0.3.19.dev111.dist-info}/top_level.txt +0 -0
futurehouse_client/__init__.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
from .clients.job_client import JobClient, JobNames
|
2
|
-
from .clients.rest_client import PQATaskResponse, TaskResponse, TaskResponseVerbose
|
3
2
|
from .clients.rest_client import RestClient as FutureHouseClient
|
3
|
+
from .clients.rest_client import TaskResponse, TaskResponseVerbose
|
4
4
|
|
5
5
|
__all__ = [
|
6
6
|
"FutureHouseClient",
|
7
7
|
"JobClient",
|
8
8
|
"JobNames",
|
9
|
-
"PQATaskResponse",
|
10
9
|
"TaskResponse",
|
11
10
|
"TaskResponseVerbose",
|
12
11
|
]
|
@@ -1,12 +1,11 @@
|
|
1
1
|
from .job_client import JobClient, JobNames
|
2
|
-
from .rest_client import PQATaskResponse, TaskResponse, TaskResponseVerbose
|
3
2
|
from .rest_client import RestClient as FutureHouseClient
|
3
|
+
from .rest_client import TaskResponse, TaskResponseVerbose
|
4
4
|
|
5
5
|
__all__ = [
|
6
6
|
"FutureHouseClient",
|
7
7
|
"JobClient",
|
8
8
|
"JobNames",
|
9
|
-
"PQATaskResponse",
|
10
9
|
"TaskResponse",
|
11
10
|
"TaskResponseVerbose",
|
12
11
|
]
|
@@ -8,7 +8,13 @@ from aviary.env import Frame
|
|
8
8
|
from pydantic import BaseModel
|
9
9
|
from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponential
|
10
10
|
|
11
|
-
from futurehouse_client.models.app import
|
11
|
+
from futurehouse_client.models.app import (
|
12
|
+
FinchTaskResponse,
|
13
|
+
PhoenixTaskResponse,
|
14
|
+
PQATaskResponse,
|
15
|
+
Stage,
|
16
|
+
TaskResponse,
|
17
|
+
)
|
12
18
|
from futurehouse_client.models.rest import (
|
13
19
|
FinalEnvironmentRequest,
|
14
20
|
StoreAgentStatePostRequest,
|
@@ -31,6 +37,19 @@ class JobNames(StrEnum):
|
|
31
37
|
DUMMY = "job-futurehouse-dummy-env"
|
32
38
|
PHOENIX = "job-futurehouse-phoenix"
|
33
39
|
FINCH = "job-futurehouse-data-analysis-crow-high"
|
40
|
+
CHIMP = "job-futurehouse-chimp"
|
41
|
+
|
42
|
+
@classmethod
|
43
|
+
def _get_response_mapping(cls) -> dict[str, type[TaskResponse]]:
|
44
|
+
return {
|
45
|
+
cls.CROW: PQATaskResponse,
|
46
|
+
cls.FALCON: PQATaskResponse,
|
47
|
+
cls.OWL: PQATaskResponse,
|
48
|
+
cls.CHIMP: PQATaskResponse,
|
49
|
+
cls.PHOENIX: PhoenixTaskResponse,
|
50
|
+
cls.FINCH: FinchTaskResponse,
|
51
|
+
cls.DUMMY: TaskResponse,
|
52
|
+
}
|
34
53
|
|
35
54
|
@classmethod
|
36
55
|
def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:
|
@@ -52,6 +71,13 @@ class JobNames(StrEnum):
|
|
52
71
|
f"Invalid job name: {job_name}. \nOptions are: {', '.join([name.name for name in cls])}"
|
53
72
|
) from e
|
54
73
|
|
74
|
+
@staticmethod
|
75
|
+
def get_response_object_from_job(job_name: str) -> type[TaskResponse]:
|
76
|
+
return JobNames._get_response_mapping()[job_name]
|
77
|
+
|
78
|
+
def get_response_object(self) -> type[TaskResponse]:
|
79
|
+
return self._get_response_mapping()[self.name]
|
80
|
+
|
55
81
|
|
56
82
|
class JobClient:
|
57
83
|
REQUEST_TIMEOUT: ClassVar[float] = 30.0 # sec
|
@@ -13,6 +13,7 @@ import tempfile
|
|
13
13
|
import time
|
14
14
|
import uuid
|
15
15
|
from collections.abc import Collection
|
16
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
16
17
|
from pathlib import Path
|
17
18
|
from types import ModuleType
|
18
19
|
from typing import Any, ClassVar, cast
|
@@ -31,6 +32,7 @@ from httpx import (
|
|
31
32
|
ReadError,
|
32
33
|
ReadTimeout,
|
33
34
|
RemoteProtocolError,
|
35
|
+
codes,
|
34
36
|
)
|
35
37
|
from ldp.agent import AgentConfig
|
36
38
|
from requests.exceptions import RequestException, Timeout
|
@@ -47,7 +49,6 @@ from futurehouse_client.clients import JobNames
|
|
47
49
|
from futurehouse_client.models.app import (
|
48
50
|
AuthType,
|
49
51
|
JobDeploymentConfig,
|
50
|
-
PQATaskResponse,
|
51
52
|
Stage,
|
52
53
|
TaskRequest,
|
53
54
|
TaskResponse,
|
@@ -133,6 +134,9 @@ class RestClient:
|
|
133
134
|
MAX_RETRY_WAIT: ClassVar[int] = 10
|
134
135
|
DEFAULT_POLLING_TIME: ClassVar[int] = 5 # seconds
|
135
136
|
CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024 # 16MB chunks
|
137
|
+
ASSEMBLY_POLLING_INTERVAL: ClassVar[int] = 10 # seconds
|
138
|
+
MAX_ASSEMBLY_WAIT_TIME: ClassVar[int] = 1800 # 30 minutes
|
139
|
+
MAX_CONCURRENT_CHUNKS: ClassVar[int] = 12 # Maximum concurrent chunk uploads
|
136
140
|
|
137
141
|
def __init__(
|
138
142
|
self,
|
@@ -174,7 +178,7 @@ class RestClient:
|
|
174
178
|
|
175
179
|
@property
|
176
180
|
def unauthenticated_client(self) -> Client:
|
177
|
-
"""Unauthenticated HTTP client for auth operations
|
181
|
+
"""Unauthenticated HTTP client for auth operations."""
|
178
182
|
return cast(Client, self.get_client("application/json", authenticated=False))
|
179
183
|
|
180
184
|
@property
|
@@ -219,6 +223,8 @@ class RestClient:
|
|
219
223
|
if content_type:
|
220
224
|
headers["Content-Type"] = content_type
|
221
225
|
|
226
|
+
headers["x-client"] = "sdk"
|
227
|
+
|
222
228
|
self._clients[key] = (
|
223
229
|
AsyncClient(
|
224
230
|
base_url=self.base_url,
|
@@ -280,6 +286,104 @@ class RestClient:
|
|
280
286
|
orgs = response.json()
|
281
287
|
return [org["name"] for org in orgs]
|
282
288
|
|
289
|
+
def _check_assembly_status(
|
290
|
+
self, job_name: str, upload_id: str, file_name: str
|
291
|
+
) -> dict[str, Any]:
|
292
|
+
"""Check the assembly status of an uploaded file.
|
293
|
+
|
294
|
+
Args:
|
295
|
+
job_name: The name of the futurehouse job
|
296
|
+
upload_id: The upload ID
|
297
|
+
file_name: The name of the file
|
298
|
+
|
299
|
+
Returns:
|
300
|
+
Dict containing status information
|
301
|
+
|
302
|
+
Raises:
|
303
|
+
RestClientError: If there's an error checking status
|
304
|
+
"""
|
305
|
+
try:
|
306
|
+
url = f"/v0.1/crows/{job_name}/assembly-status/{upload_id}/{file_name}"
|
307
|
+
response = self.client.get(url)
|
308
|
+
response.raise_for_status()
|
309
|
+
return response.json()
|
310
|
+
except Exception as e:
|
311
|
+
raise RestClientError(f"Error checking assembly status: {e}") from e
|
312
|
+
|
313
|
+
def _wait_for_all_assemblies_completion(
|
314
|
+
self,
|
315
|
+
job_name: str,
|
316
|
+
upload_id: str,
|
317
|
+
file_names: list[str],
|
318
|
+
timeout: int = MAX_ASSEMBLY_WAIT_TIME,
|
319
|
+
) -> bool:
|
320
|
+
"""Wait for all file assemblies to complete.
|
321
|
+
|
322
|
+
Args:
|
323
|
+
job_name: The name of the futurehouse job
|
324
|
+
upload_id: The upload ID
|
325
|
+
file_names: List of file names to wait for
|
326
|
+
timeout: Maximum time to wait in seconds
|
327
|
+
|
328
|
+
Returns:
|
329
|
+
True if all assemblies succeeded, False if any failed or timed out
|
330
|
+
|
331
|
+
Raises:
|
332
|
+
RestClientError: If any assembly fails
|
333
|
+
"""
|
334
|
+
if not file_names:
|
335
|
+
return True
|
336
|
+
|
337
|
+
start_time = time.time()
|
338
|
+
logger.info(f"Waiting for assembly of {len(file_names)} file(s) to complete...")
|
339
|
+
|
340
|
+
completed_files: set[str] = set()
|
341
|
+
|
342
|
+
while (time.time() - start_time) < timeout and len(completed_files) < len(
|
343
|
+
file_names
|
344
|
+
):
|
345
|
+
for file_name in file_names:
|
346
|
+
if file_name in completed_files:
|
347
|
+
continue
|
348
|
+
|
349
|
+
try:
|
350
|
+
status_data = self._check_assembly_status(
|
351
|
+
job_name, upload_id, file_name
|
352
|
+
)
|
353
|
+
status = status_data.get("status")
|
354
|
+
|
355
|
+
if status == ExecutionStatus.SUCCESS.value:
|
356
|
+
logger.info(f"Assembly completed for {file_name}")
|
357
|
+
completed_files.add(file_name)
|
358
|
+
elif status == ExecutionStatus.FAIL.value:
|
359
|
+
error_msg = status_data.get("error", "Unknown assembly error")
|
360
|
+
raise RestClientError(
|
361
|
+
f"Assembly failed for {file_name}: {error_msg}"
|
362
|
+
)
|
363
|
+
elif status == ExecutionStatus.IN_PROGRESS.value:
|
364
|
+
logger.debug(f"Assembly in progress for {file_name}...")
|
365
|
+
|
366
|
+
except RestClientError:
|
367
|
+
raise # Re-raise assembly errors
|
368
|
+
except Exception as e:
|
369
|
+
logger.warning(
|
370
|
+
f"Error checking assembly status for {file_name}: {e}"
|
371
|
+
)
|
372
|
+
|
373
|
+
# Don't sleep if all files are complete
|
374
|
+
if len(completed_files) < len(file_names):
|
375
|
+
time.sleep(self.ASSEMBLY_POLLING_INTERVAL)
|
376
|
+
|
377
|
+
if len(completed_files) < len(file_names):
|
378
|
+
remaining_files = set(file_names) - completed_files
|
379
|
+
logger.warning(
|
380
|
+
f"Assembly timeout for files: {remaining_files} after {timeout} seconds"
|
381
|
+
)
|
382
|
+
return False
|
383
|
+
|
384
|
+
logger.info(f"All {len(file_names)} file assemblies completed successfully")
|
385
|
+
return True
|
386
|
+
|
283
387
|
@staticmethod
|
284
388
|
def _validate_module_path(path: Path) -> None:
|
285
389
|
"""Validates that the given path exists and is a directory.
|
@@ -366,12 +470,9 @@ class RestClient:
|
|
366
470
|
|
367
471
|
if verbose:
|
368
472
|
return verbose_response
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
):
|
373
|
-
return PQATaskResponse(**data)
|
374
|
-
return TaskResponse(**data)
|
473
|
+
return JobNames.get_response_object_from_job(verbose_response.job_name)(
|
474
|
+
**data
|
475
|
+
)
|
375
476
|
except Exception as e:
|
376
477
|
raise TaskFetchError(f"Error getting task: {e!s}") from e
|
377
478
|
|
@@ -412,12 +513,9 @@ class RestClient:
|
|
412
513
|
|
413
514
|
if verbose:
|
414
515
|
return verbose_response
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
):
|
419
|
-
return PQATaskResponse(**data)
|
420
|
-
return TaskResponse(**data)
|
516
|
+
return JobNames.get_response_object_from_job(verbose_response.job_name)(
|
517
|
+
**data
|
518
|
+
)
|
421
519
|
except Exception as e:
|
422
520
|
raise TaskFetchError(f"Error getting task: {e!s}") from e
|
423
521
|
|
@@ -820,6 +918,8 @@ class RestClient:
|
|
820
918
|
raise JobCreationError(f"Error generating docker image: {e!s}") from e
|
821
919
|
return build_context
|
822
920
|
|
921
|
+
# TODO: we should have have an async upload_file, check_assembly_status,
|
922
|
+
# wait_for_assembly_completion, upload_directory, upload_single_file
|
823
923
|
@retry(
|
824
924
|
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
825
925
|
wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
|
@@ -830,6 +930,8 @@ class RestClient:
|
|
830
930
|
job_name: str,
|
831
931
|
file_path: str | os.PathLike,
|
832
932
|
upload_id: str | None = None,
|
933
|
+
wait_for_assembly: bool = True,
|
934
|
+
assembly_timeout: int = MAX_ASSEMBLY_WAIT_TIME,
|
833
935
|
) -> str:
|
834
936
|
"""Upload a file or directory to a futurehouse job bucket.
|
835
937
|
|
@@ -837,29 +939,47 @@ class RestClient:
|
|
837
939
|
job_name: The name of the futurehouse job to upload to.
|
838
940
|
file_path: The local path to the file or directory to upload.
|
839
941
|
upload_id: Optional folder name to use for the upload. If not provided, a random UUID will be used.
|
942
|
+
wait_for_assembly: After file chunking, wait for the assembly to be processed.
|
943
|
+
assembly_timeout: Maximum time to wait for assembly in seconds.
|
840
944
|
|
841
945
|
Returns:
|
842
946
|
The upload ID used for the upload.
|
843
947
|
|
844
948
|
Raises:
|
845
949
|
FileUploadError: If there's an error uploading the file.
|
950
|
+
RestClientError: If assembly fails or times out.
|
846
951
|
"""
|
847
952
|
file_path = Path(file_path)
|
848
953
|
if not file_path.exists():
|
849
954
|
raise FileNotFoundError(f"File or directory not found: {file_path}")
|
850
955
|
|
851
956
|
upload_id = upload_id or str(uuid.uuid4())
|
957
|
+
uploaded_files: list[str] = []
|
852
958
|
|
853
959
|
if file_path.is_dir():
|
854
960
|
# Process directory recursively
|
855
|
-
self._upload_directory(job_name, file_path, upload_id)
|
961
|
+
uploaded_files = self._upload_directory(job_name, file_path, upload_id)
|
856
962
|
else:
|
857
963
|
# Process single file
|
858
964
|
self._upload_single_file(job_name, file_path, upload_id)
|
965
|
+
uploaded_files = [file_path.name]
|
966
|
+
|
967
|
+
# Wait for all assemblies if requested and we have files
|
968
|
+
if wait_for_assembly and uploaded_files:
|
969
|
+
success = self._wait_for_all_assemblies_completion(
|
970
|
+
job_name, upload_id, uploaded_files, assembly_timeout
|
971
|
+
)
|
972
|
+
if not success:
|
973
|
+
raise RestClientError(
|
974
|
+
f"Assembly failed or timed out for one or more files: {uploaded_files}"
|
975
|
+
)
|
976
|
+
|
859
977
|
logger.info(f"Successfully uploaded {file_path} to {upload_id}")
|
860
978
|
return upload_id
|
861
979
|
|
862
|
-
def _upload_directory(
|
980
|
+
def _upload_directory(
|
981
|
+
self, job_name: str, dir_path: Path, upload_id: str
|
982
|
+
) -> list[str]:
|
863
983
|
"""Upload all files in a directory recursively.
|
864
984
|
|
865
985
|
Args:
|
@@ -867,12 +987,17 @@ class RestClient:
|
|
867
987
|
dir_path: The path to the directory to upload.
|
868
988
|
upload_id: The upload ID to use.
|
869
989
|
|
990
|
+
Returns:
|
991
|
+
List of uploaded file names.
|
992
|
+
|
870
993
|
Raises:
|
871
994
|
FileUploadError: If there's an error uploading any file.
|
872
995
|
"""
|
873
996
|
# Skip common directories that shouldn't be uploaded
|
874
997
|
if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
|
875
|
-
return
|
998
|
+
return []
|
999
|
+
|
1000
|
+
uploaded_files: list[str] = []
|
876
1001
|
|
877
1002
|
try:
|
878
1003
|
# Upload all files in the directory recursively
|
@@ -882,23 +1007,27 @@ class RestClient:
|
|
882
1007
|
):
|
883
1008
|
# Use path relative to the original directory as file name
|
884
1009
|
rel_path = path.relative_to(dir_path)
|
1010
|
+
file_name = str(rel_path)
|
885
1011
|
self._upload_single_file(
|
886
1012
|
job_name,
|
887
1013
|
path,
|
888
1014
|
upload_id,
|
889
|
-
file_name=
|
1015
|
+
file_name=file_name,
|
890
1016
|
)
|
1017
|
+
uploaded_files.append(file_name)
|
891
1018
|
except Exception as e:
|
892
1019
|
raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
|
893
1020
|
|
1021
|
+
return uploaded_files
|
1022
|
+
|
894
1023
|
def _upload_single_file(
|
895
1024
|
self,
|
896
1025
|
job_name: str,
|
897
1026
|
file_path: Path,
|
898
1027
|
upload_id: str,
|
899
1028
|
file_name: str | None = None,
|
900
|
-
) -> None:
|
901
|
-
"""Upload a single file in chunks.
|
1029
|
+
) -> str | None:
|
1030
|
+
"""Upload a single file in chunks using parallel uploads.
|
902
1031
|
|
903
1032
|
Args:
|
904
1033
|
job_name: The key of the crow to upload to.
|
@@ -906,6 +1035,9 @@ class RestClient:
|
|
906
1035
|
upload_id: The upload ID to use.
|
907
1036
|
file_name: Optional name to use for the file. If not provided, the file's name will be used.
|
908
1037
|
|
1038
|
+
Returns:
|
1039
|
+
The status URL if this was the last chunk, None otherwise.
|
1040
|
+
|
909
1041
|
Raises:
|
910
1042
|
FileUploadError: If there's an error uploading the file.
|
911
1043
|
"""
|
@@ -915,16 +1047,190 @@ class RestClient:
|
|
915
1047
|
# Skip empty files
|
916
1048
|
if file_size == 0:
|
917
1049
|
logger.warning(f"Skipping upload of empty file: {file_path}")
|
918
|
-
return
|
1050
|
+
return None
|
919
1051
|
|
920
1052
|
total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
|
921
1053
|
|
922
1054
|
logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
|
923
1055
|
|
1056
|
+
status_url = None
|
1057
|
+
|
924
1058
|
try:
|
925
|
-
|
926
|
-
|
927
|
-
|
1059
|
+
# Upload all chunks except the last one in parallel
|
1060
|
+
if total_chunks > 1:
|
1061
|
+
self._upload_chunks_parallel(
|
1062
|
+
job_name,
|
1063
|
+
file_path,
|
1064
|
+
file_name,
|
1065
|
+
upload_id,
|
1066
|
+
total_chunks - 1,
|
1067
|
+
total_chunks,
|
1068
|
+
)
|
1069
|
+
|
1070
|
+
# Upload the last chunk separately (handles assembly)
|
1071
|
+
status_url = self._upload_final_chunk(
|
1072
|
+
job_name,
|
1073
|
+
file_path,
|
1074
|
+
file_name,
|
1075
|
+
upload_id,
|
1076
|
+
total_chunks - 1,
|
1077
|
+
total_chunks,
|
1078
|
+
)
|
1079
|
+
|
1080
|
+
logger.info(f"Successfully uploaded {file_name}")
|
1081
|
+
except Exception as e:
|
1082
|
+
logger.exception(f"Error uploading file {file_path}")
|
1083
|
+
raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
|
1084
|
+
return status_url
|
1085
|
+
|
1086
|
+
def _upload_chunks_parallel(
|
1087
|
+
self,
|
1088
|
+
job_name: str,
|
1089
|
+
file_path: Path,
|
1090
|
+
file_name: str,
|
1091
|
+
upload_id: str,
|
1092
|
+
num_regular_chunks: int,
|
1093
|
+
total_chunks: int,
|
1094
|
+
) -> None:
|
1095
|
+
"""Upload chunks in parallel batches.
|
1096
|
+
|
1097
|
+
Args:
|
1098
|
+
job_name: The key of the crow to upload to.
|
1099
|
+
file_path: The path to the file to upload.
|
1100
|
+
file_name: The name to use for the file.
|
1101
|
+
upload_id: The upload ID to use.
|
1102
|
+
num_regular_chunks: Number of regular chunks (excluding final chunk).
|
1103
|
+
total_chunks: Total number of chunks.
|
1104
|
+
|
1105
|
+
Raises:
|
1106
|
+
FileUploadError: If there's an error uploading any chunk.
|
1107
|
+
"""
|
1108
|
+
if num_regular_chunks <= 0:
|
1109
|
+
return
|
1110
|
+
|
1111
|
+
# Process chunks in batches
|
1112
|
+
for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
|
1113
|
+
batch_end = min(
|
1114
|
+
batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
|
1115
|
+
)
|
1116
|
+
|
1117
|
+
# Upload chunks in this batch concurrently
|
1118
|
+
with ThreadPoolExecutor(max_workers=self.MAX_CONCURRENT_CHUNKS) as executor:
|
1119
|
+
futures = {
|
1120
|
+
executor.submit(
|
1121
|
+
self._upload_single_chunk,
|
1122
|
+
job_name,
|
1123
|
+
file_path,
|
1124
|
+
file_name,
|
1125
|
+
upload_id,
|
1126
|
+
chunk_index,
|
1127
|
+
total_chunks,
|
1128
|
+
): chunk_index
|
1129
|
+
for chunk_index in range(batch_start, batch_end)
|
1130
|
+
}
|
1131
|
+
|
1132
|
+
for future in as_completed(futures):
|
1133
|
+
chunk_index = futures[future]
|
1134
|
+
try:
|
1135
|
+
future.result()
|
1136
|
+
logger.debug(
|
1137
|
+
f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
|
1138
|
+
)
|
1139
|
+
except Exception as e:
|
1140
|
+
logger.error(f"Error uploading chunk {chunk_index}: {e}")
|
1141
|
+
raise FileUploadError(
|
1142
|
+
f"Error uploading chunk {chunk_index} of {file_name}: {e}"
|
1143
|
+
) from e
|
1144
|
+
|
1145
|
+
def _upload_single_chunk(
|
1146
|
+
self,
|
1147
|
+
job_name: str,
|
1148
|
+
file_path: Path,
|
1149
|
+
file_name: str,
|
1150
|
+
upload_id: str,
|
1151
|
+
chunk_index: int,
|
1152
|
+
total_chunks: int,
|
1153
|
+
) -> None:
|
1154
|
+
"""Upload a single chunk.
|
1155
|
+
|
1156
|
+
Args:
|
1157
|
+
job_name: The key of the crow to upload to.
|
1158
|
+
file_path: The path to the file to upload.
|
1159
|
+
file_name: The name to use for the file.
|
1160
|
+
upload_id: The upload ID to use.
|
1161
|
+
chunk_index: The index of this chunk.
|
1162
|
+
total_chunks: Total number of chunks.
|
1163
|
+
|
1164
|
+
Raises:
|
1165
|
+
Exception: If there's an error uploading the chunk.
|
1166
|
+
"""
|
1167
|
+
with open(file_path, "rb") as f:
|
1168
|
+
# Read the chunk from the file
|
1169
|
+
f.seek(chunk_index * self.CHUNK_SIZE)
|
1170
|
+
chunk_data = f.read(self.CHUNK_SIZE)
|
1171
|
+
|
1172
|
+
# Prepare and send the chunk
|
1173
|
+
with tempfile.NamedTemporaryFile() as temp_file:
|
1174
|
+
temp_file.write(chunk_data)
|
1175
|
+
temp_file.flush()
|
1176
|
+
|
1177
|
+
# Create form data
|
1178
|
+
with open(temp_file.name, "rb") as chunk_file_obj:
|
1179
|
+
files = {
|
1180
|
+
"chunk": (
|
1181
|
+
file_name,
|
1182
|
+
chunk_file_obj,
|
1183
|
+
"application/octet-stream",
|
1184
|
+
)
|
1185
|
+
}
|
1186
|
+
data = {
|
1187
|
+
"file_name": file_name,
|
1188
|
+
"chunk_index": chunk_index,
|
1189
|
+
"total_chunks": total_chunks,
|
1190
|
+
"upload_id": upload_id,
|
1191
|
+
}
|
1192
|
+
|
1193
|
+
# Send the chunk
|
1194
|
+
response = self.multipart_client.post(
|
1195
|
+
f"/v0.1/crows/{job_name}/upload-chunk",
|
1196
|
+
files=files,
|
1197
|
+
data=data,
|
1198
|
+
)
|
1199
|
+
response.raise_for_status()
|
1200
|
+
|
1201
|
+
def _upload_final_chunk(
|
1202
|
+
self,
|
1203
|
+
job_name: str,
|
1204
|
+
file_path: Path,
|
1205
|
+
file_name: str,
|
1206
|
+
upload_id: str,
|
1207
|
+
chunk_index: int,
|
1208
|
+
total_chunks: int,
|
1209
|
+
) -> str | None:
|
1210
|
+
"""Upload the final chunk with retry logic for missing chunks.
|
1211
|
+
|
1212
|
+
Args:
|
1213
|
+
job_name: The key of the crow to upload to.
|
1214
|
+
file_path: The path to the file to upload.
|
1215
|
+
file_name: The name to use for the file.
|
1216
|
+
upload_id: The upload ID to use.
|
1217
|
+
chunk_index: The index of the final chunk.
|
1218
|
+
total_chunks: Total number of chunks.
|
1219
|
+
|
1220
|
+
Returns:
|
1221
|
+
The status URL from the response.
|
1222
|
+
|
1223
|
+
Raises:
|
1224
|
+
FileUploadError: If there's an error uploading the final chunk.
|
1225
|
+
"""
|
1226
|
+
retries = 0
|
1227
|
+
max_retries = 3
|
1228
|
+
retry_delay = 2.0 # seconds
|
1229
|
+
|
1230
|
+
while retries < max_retries:
|
1231
|
+
try:
|
1232
|
+
with open(file_path, "rb") as f:
|
1233
|
+
# Read the final chunk from the file
|
928
1234
|
f.seek(chunk_index * self.CHUNK_SIZE)
|
929
1235
|
chunk_data = f.read(self.CHUNK_SIZE)
|
930
1236
|
|
@@ -949,24 +1255,46 @@ class RestClient:
|
|
949
1255
|
"upload_id": upload_id,
|
950
1256
|
}
|
951
1257
|
|
952
|
-
# Send the chunk
|
1258
|
+
# Send the final chunk
|
953
1259
|
response = self.multipart_client.post(
|
954
1260
|
f"/v0.1/crows/{job_name}/upload-chunk",
|
955
1261
|
files=files,
|
956
1262
|
data=data,
|
957
1263
|
)
|
958
|
-
response.raise_for_status()
|
959
1264
|
|
960
|
-
|
1265
|
+
# Handle missing chunks (status 409)
|
1266
|
+
if response.status_code == codes.CONFLICT:
|
1267
|
+
retries += 1
|
1268
|
+
if retries < max_retries:
|
1269
|
+
logger.warning(
|
1270
|
+
f"Missing chunks detected for {file_name}, retrying in {retry_delay}s... (attempt {retries}/{max_retries})"
|
1271
|
+
)
|
1272
|
+
time.sleep(retry_delay)
|
1273
|
+
continue
|
961
1274
|
|
962
|
-
|
963
|
-
|
964
|
-
|
1275
|
+
response.raise_for_status()
|
1276
|
+
response_data = response.json()
|
1277
|
+
status_url = response_data.get("status_url")
|
965
1278
|
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
1279
|
+
logger.debug(
|
1280
|
+
f"Uploaded final chunk {chunk_index + 1}/{total_chunks} of {file_name}"
|
1281
|
+
)
|
1282
|
+
return status_url
|
1283
|
+
|
1284
|
+
except Exception as e:
|
1285
|
+
if retries >= max_retries - 1:
|
1286
|
+
raise FileUploadError(
|
1287
|
+
f"Error uploading final chunk of {file_name}: {e}"
|
1288
|
+
) from e
|
1289
|
+
retries += 1
|
1290
|
+
logger.warning(
|
1291
|
+
f"Error uploading final chunk of {file_name}, retrying in {retry_delay}s... (attempt {retries}/{max_retries}): {e}"
|
1292
|
+
)
|
1293
|
+
time.sleep(retry_delay)
|
1294
|
+
|
1295
|
+
raise FileUploadError(
|
1296
|
+
f"Failed to upload final chunk of {file_name} after {max_retries} retries"
|
1297
|
+
)
|
970
1298
|
|
971
1299
|
@retry(
|
972
1300
|
stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
|
futurehouse_client/models/app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import copy
|
1
2
|
import json
|
2
3
|
import os
|
3
4
|
import re
|
@@ -675,7 +676,8 @@ class TaskResponse(BaseModel):
|
|
675
676
|
|
676
677
|
@model_validator(mode="before")
|
677
678
|
@classmethod
|
678
|
-
def validate_fields(cls,
|
679
|
+
def validate_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
|
680
|
+
data = copy.deepcopy(original_data) # Avoid mutating the original data
|
679
681
|
# Extract fields from environment frame state
|
680
682
|
if not isinstance(data, dict):
|
681
683
|
return data
|
@@ -690,7 +692,72 @@ class TaskResponse(BaseModel):
|
|
690
692
|
return data
|
691
693
|
|
692
694
|
|
695
|
+
class PhoenixTaskResponse(TaskResponse):
|
696
|
+
"""
|
697
|
+
Response scheme for tasks executed with Phoenix.
|
698
|
+
|
699
|
+
Additional fields:
|
700
|
+
answer: Final answer from Phoenix
|
701
|
+
"""
|
702
|
+
|
703
|
+
model_config = ConfigDict(extra="ignore")
|
704
|
+
answer: str | None = None
|
705
|
+
|
706
|
+
@model_validator(mode="before")
|
707
|
+
@classmethod
|
708
|
+
def validate_phoenix_fields(
|
709
|
+
cls, original_data: Mapping[str, Any]
|
710
|
+
) -> Mapping[str, Any]:
|
711
|
+
data = copy.deepcopy(original_data)
|
712
|
+
if not isinstance(data, dict):
|
713
|
+
return data
|
714
|
+
if not (env_frame := data.get("environment_frame", {})):
|
715
|
+
return data
|
716
|
+
state = env_frame.get("state", {}).get("state", {})
|
717
|
+
data["answer"] = state.get("answer")
|
718
|
+
return data
|
719
|
+
|
720
|
+
|
721
|
+
class FinchTaskResponse(TaskResponse):
|
722
|
+
"""
|
723
|
+
Response scheme for tasks executed with Finch.
|
724
|
+
|
725
|
+
Additional fields:
|
726
|
+
answer: Final answer from Finch
|
727
|
+
notebook: a dictionary with `cells` and `metadata` regarding the notebook content
|
728
|
+
"""
|
729
|
+
|
730
|
+
model_config = ConfigDict(extra="ignore")
|
731
|
+
answer: str | None = None
|
732
|
+
notebook: dict[str, Any] | None = None
|
733
|
+
|
734
|
+
@model_validator(mode="before")
|
735
|
+
@classmethod
|
736
|
+
def validate_finch_fields(
|
737
|
+
cls, original_data: Mapping[str, Any]
|
738
|
+
) -> Mapping[str, Any]:
|
739
|
+
data = copy.deepcopy(original_data)
|
740
|
+
if not isinstance(data, dict):
|
741
|
+
return data
|
742
|
+
if not (env_frame := data.get("environment_frame", {})):
|
743
|
+
return data
|
744
|
+
state = env_frame.get("state", {}).get("state", {})
|
745
|
+
data["answer"] = state.get("answer")
|
746
|
+
data["notebook"] = state.get("nb_state")
|
747
|
+
return data
|
748
|
+
|
749
|
+
|
693
750
|
class PQATaskResponse(TaskResponse):
|
751
|
+
"""
|
752
|
+
Response scheme for tasks executed with PQA.
|
753
|
+
|
754
|
+
Additional fields:
|
755
|
+
answer: Final answer from PQA
|
756
|
+
formatted_answer: Formatted answer from PQA
|
757
|
+
answer_reasoning: Reasoning used to generate the final answer, if available
|
758
|
+
has_successful_answer: Whether the answer is successful
|
759
|
+
"""
|
760
|
+
|
694
761
|
model_config = ConfigDict(extra="ignore")
|
695
762
|
|
696
763
|
answer: str | None = None
|
@@ -702,7 +769,8 @@ class PQATaskResponse(TaskResponse):
|
|
702
769
|
|
703
770
|
@model_validator(mode="before")
|
704
771
|
@classmethod
|
705
|
-
def validate_pqa_fields(cls,
|
772
|
+
def validate_pqa_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
|
773
|
+
data = copy.deepcopy(original_data) # Avoid mutating the original data
|
706
774
|
if not isinstance(data, dict):
|
707
775
|
return data
|
708
776
|
if not (env_frame := data.get("environment_frame", {})):
|
futurehouse_client/utils/auth.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import logging
|
2
|
+
from collections.abc import Collection, Generator
|
2
3
|
from typing import ClassVar, Final
|
3
4
|
|
4
5
|
import httpx
|
@@ -42,7 +43,7 @@ def _run_auth(
|
|
42
43
|
class RefreshingJWT(httpx.Auth):
|
43
44
|
"""Automatically (re-)inject a JWT and transparently retry exactly once when we hit a 401/403."""
|
44
45
|
|
45
|
-
RETRY_STATUSES: ClassVar[
|
46
|
+
RETRY_STATUSES: ClassVar[Collection[httpx.codes]] = {
|
46
47
|
httpx.codes.UNAUTHORIZED,
|
47
48
|
httpx.codes.FORBIDDEN,
|
48
49
|
}
|
@@ -64,7 +65,7 @@ class RefreshingJWT(httpx.Auth):
|
|
64
65
|
api_key=api_key,
|
65
66
|
)
|
66
67
|
|
67
|
-
def refresh_token(self):
|
68
|
+
def refresh_token(self) -> None:
|
68
69
|
if self.auth_type == AuthType.JWT:
|
69
70
|
logger.error(INVALID_REFRESH_TYPE_MSG)
|
70
71
|
raise ValueError(INVALID_REFRESH_TYPE_MSG)
|
@@ -74,7 +75,9 @@ class RefreshingJWT(httpx.Auth):
|
|
74
75
|
api_key=self.api_key,
|
75
76
|
)
|
76
77
|
|
77
|
-
def auth_flow(
|
78
|
+
def auth_flow(
|
79
|
+
self, request: httpx.Request
|
80
|
+
) -> Generator[httpx.Request, httpx.Response, None]:
|
78
81
|
request.headers["Authorization"] = f"Bearer {self._jwt}"
|
79
82
|
response = yield request
|
80
83
|
|
{futurehouse_client-0.3.18.dev195.dist-info → futurehouse_client-0.3.19.dev111.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: futurehouse-client
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.19.dev111
|
4
4
|
Summary: A client for interacting with endpoints of the FutureHouse service.
|
5
5
|
Author-email: FutureHouse technical staff <hello@futurehouse.org>
|
6
6
|
Classifier: Operating System :: OS Independent
|
@@ -0,0 +1,17 @@
|
|
1
|
+
futurehouse_client/__init__.py,sha256=OzGDkVm5UTUzd4n8yOmRjMF73YrK0FaIQX5gS3Dk8Zo,304
|
2
|
+
futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
|
3
|
+
futurehouse_client/clients/job_client.py,sha256=JgB5IUAyCmnhGRsYc3bgKldA-lkM1JLwHRwwUeOCdus,11944
|
4
|
+
futurehouse_client/clients/rest_client.py,sha256=_XgkzA9OhUKjL9vpkU6ixh2lUW9StgqfGgLk2qHjGgI,55518
|
5
|
+
futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
|
6
|
+
futurehouse_client/models/app.py,sha256=VCtg0ygd-TSrR6DtfljTBt9jnl1eBNal8UXHFdkDg88,28587
|
7
|
+
futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
|
8
|
+
futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
|
9
|
+
futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHLchk,2960
|
11
|
+
futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
|
12
|
+
futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
|
13
|
+
futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
|
14
|
+
futurehouse_client-0.3.19.dev111.dist-info/METADATA,sha256=N4Msi8W4mMBXFs_-Pl8Ii12RcLRm2eBl9NiIFCy5--E,12767
|
15
|
+
futurehouse_client-0.3.19.dev111.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
16
|
+
futurehouse_client-0.3.19.dev111.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
|
17
|
+
futurehouse_client-0.3.19.dev111.dist-info/RECORD,,
|
@@ -1,17 +0,0 @@
|
|
1
|
-
futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
|
2
|
-
futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
|
3
|
-
futurehouse_client/clients/job_client.py,sha256=uNkqQbeZw7wbA0qDWcIOwOykrosza-jev58paJZ_mbA,11150
|
4
|
-
futurehouse_client/clients/rest_client.py,sha256=6HQF3YXDnSdGxAoXpB_wU6Vhcqhp5OB5SNuGQJ6Hseo,43454
|
5
|
-
futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
|
6
|
-
futurehouse_client/models/app.py,sha256=w_1e4F0IiC-BKeOLqYkABYo4U-Nka1S-F64S_eHB2KM,26421
|
7
|
-
futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
|
8
|
-
futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
|
9
|
-
futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
futurehouse_client/utils/auth.py,sha256=0V161S9jW4vbTCoJJrOtNzWXQkAVyzdGM3yefGgJ578,2808
|
11
|
-
futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
|
12
|
-
futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
|
13
|
-
futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
|
14
|
-
futurehouse_client-0.3.18.dev195.dist-info/METADATA,sha256=yM1NbN2au3MmkfIkkuT85eYahKYTmnBuaWCQ1OvQ97A,12767
|
15
|
-
futurehouse_client-0.3.18.dev195.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
16
|
-
futurehouse_client-0.3.18.dev195.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
|
17
|
-
futurehouse_client-0.3.18.dev195.dist-info/RECORD,,
|
File without changes
|