futurehouse-client 0.3.18.dev195__py3-none-any.whl → 0.3.19.dev111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,11 @@
1
1
  from .clients.job_client import JobClient, JobNames
2
- from .clients.rest_client import PQATaskResponse, TaskResponse, TaskResponseVerbose
3
2
  from .clients.rest_client import RestClient as FutureHouseClient
3
+ from .clients.rest_client import TaskResponse, TaskResponseVerbose
4
4
 
5
5
  __all__ = [
6
6
  "FutureHouseClient",
7
7
  "JobClient",
8
8
  "JobNames",
9
- "PQATaskResponse",
10
9
  "TaskResponse",
11
10
  "TaskResponseVerbose",
12
11
  ]
@@ -1,12 +1,11 @@
1
1
  from .job_client import JobClient, JobNames
2
- from .rest_client import PQATaskResponse, TaskResponse, TaskResponseVerbose
3
2
  from .rest_client import RestClient as FutureHouseClient
3
+ from .rest_client import TaskResponse, TaskResponseVerbose
4
4
 
5
5
  __all__ = [
6
6
  "FutureHouseClient",
7
7
  "JobClient",
8
8
  "JobNames",
9
- "PQATaskResponse",
10
9
  "TaskResponse",
11
10
  "TaskResponseVerbose",
12
11
  ]
@@ -8,7 +8,13 @@ from aviary.env import Frame
8
8
  from pydantic import BaseModel
9
9
  from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponential
10
10
 
11
- from futurehouse_client.models.app import Stage
11
+ from futurehouse_client.models.app import (
12
+ FinchTaskResponse,
13
+ PhoenixTaskResponse,
14
+ PQATaskResponse,
15
+ Stage,
16
+ TaskResponse,
17
+ )
12
18
  from futurehouse_client.models.rest import (
13
19
  FinalEnvironmentRequest,
14
20
  StoreAgentStatePostRequest,
@@ -31,6 +37,19 @@ class JobNames(StrEnum):
31
37
  DUMMY = "job-futurehouse-dummy-env"
32
38
  PHOENIX = "job-futurehouse-phoenix"
33
39
  FINCH = "job-futurehouse-data-analysis-crow-high"
40
+ CHIMP = "job-futurehouse-chimp"
41
+
42
+ @classmethod
43
+ def _get_response_mapping(cls) -> dict[str, type[TaskResponse]]:
44
+ return {
45
+ cls.CROW: PQATaskResponse,
46
+ cls.FALCON: PQATaskResponse,
47
+ cls.OWL: PQATaskResponse,
48
+ cls.CHIMP: PQATaskResponse,
49
+ cls.PHOENIX: PhoenixTaskResponse,
50
+ cls.FINCH: FinchTaskResponse,
51
+ cls.DUMMY: TaskResponse,
52
+ }
34
53
 
35
54
  @classmethod
36
55
  def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:
@@ -52,6 +71,13 @@ class JobNames(StrEnum):
52
71
  f"Invalid job name: {job_name}. \nOptions are: {', '.join([name.name for name in cls])}"
53
72
  ) from e
54
73
 
74
+ @staticmethod
75
+ def get_response_object_from_job(job_name: str) -> type[TaskResponse]:
76
+ return JobNames._get_response_mapping()[job_name]
77
+
78
+ def get_response_object(self) -> type[TaskResponse]:
79
+ return self._get_response_mapping()[self.name]
80
+
55
81
 
56
82
  class JobClient:
57
83
  REQUEST_TIMEOUT: ClassVar[float] = 30.0 # sec
@@ -13,6 +13,7 @@ import tempfile
13
13
  import time
14
14
  import uuid
15
15
  from collections.abc import Collection
16
+ from concurrent.futures import ThreadPoolExecutor, as_completed
16
17
  from pathlib import Path
17
18
  from types import ModuleType
18
19
  from typing import Any, ClassVar, cast
@@ -31,6 +32,7 @@ from httpx import (
31
32
  ReadError,
32
33
  ReadTimeout,
33
34
  RemoteProtocolError,
35
+ codes,
34
36
  )
35
37
  from ldp.agent import AgentConfig
36
38
  from requests.exceptions import RequestException, Timeout
@@ -47,7 +49,6 @@ from futurehouse_client.clients import JobNames
47
49
  from futurehouse_client.models.app import (
48
50
  AuthType,
49
51
  JobDeploymentConfig,
50
- PQATaskResponse,
51
52
  Stage,
52
53
  TaskRequest,
53
54
  TaskResponse,
@@ -133,6 +134,9 @@ class RestClient:
133
134
  MAX_RETRY_WAIT: ClassVar[int] = 10
134
135
  DEFAULT_POLLING_TIME: ClassVar[int] = 5 # seconds
135
136
  CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024 # 16MB chunks
137
+ ASSEMBLY_POLLING_INTERVAL: ClassVar[int] = 10 # seconds
138
+ MAX_ASSEMBLY_WAIT_TIME: ClassVar[int] = 1800 # 30 minutes
139
+ MAX_CONCURRENT_CHUNKS: ClassVar[int] = 12 # Maximum concurrent chunk uploads
136
140
 
137
141
  def __init__(
138
142
  self,
@@ -174,7 +178,7 @@ class RestClient:
174
178
 
175
179
  @property
176
180
  def unauthenticated_client(self) -> Client:
177
- """Unauthenticated HTTP client for auth operations to avoid recursion."""
181
+ """Unauthenticated HTTP client for auth operations."""
178
182
  return cast(Client, self.get_client("application/json", authenticated=False))
179
183
 
180
184
  @property
@@ -219,6 +223,8 @@ class RestClient:
219
223
  if content_type:
220
224
  headers["Content-Type"] = content_type
221
225
 
226
+ headers["x-client"] = "sdk"
227
+
222
228
  self._clients[key] = (
223
229
  AsyncClient(
224
230
  base_url=self.base_url,
@@ -280,6 +286,104 @@ class RestClient:
280
286
  orgs = response.json()
281
287
  return [org["name"] for org in orgs]
282
288
 
289
+ def _check_assembly_status(
290
+ self, job_name: str, upload_id: str, file_name: str
291
+ ) -> dict[str, Any]:
292
+ """Check the assembly status of an uploaded file.
293
+
294
+ Args:
295
+ job_name: The name of the futurehouse job
296
+ upload_id: The upload ID
297
+ file_name: The name of the file
298
+
299
+ Returns:
300
+ Dict containing status information
301
+
302
+ Raises:
303
+ RestClientError: If there's an error checking status
304
+ """
305
+ try:
306
+ url = f"/v0.1/crows/{job_name}/assembly-status/{upload_id}/{file_name}"
307
+ response = self.client.get(url)
308
+ response.raise_for_status()
309
+ return response.json()
310
+ except Exception as e:
311
+ raise RestClientError(f"Error checking assembly status: {e}") from e
312
+
313
+ def _wait_for_all_assemblies_completion(
314
+ self,
315
+ job_name: str,
316
+ upload_id: str,
317
+ file_names: list[str],
318
+ timeout: int = MAX_ASSEMBLY_WAIT_TIME,
319
+ ) -> bool:
320
+ """Wait for all file assemblies to complete.
321
+
322
+ Args:
323
+ job_name: The name of the futurehouse job
324
+ upload_id: The upload ID
325
+ file_names: List of file names to wait for
326
+ timeout: Maximum time to wait in seconds
327
+
328
+ Returns:
329
+ True if all assemblies succeeded, False if any failed or timed out
330
+
331
+ Raises:
332
+ RestClientError: If any assembly fails
333
+ """
334
+ if not file_names:
335
+ return True
336
+
337
+ start_time = time.time()
338
+ logger.info(f"Waiting for assembly of {len(file_names)} file(s) to complete...")
339
+
340
+ completed_files: set[str] = set()
341
+
342
+ while (time.time() - start_time) < timeout and len(completed_files) < len(
343
+ file_names
344
+ ):
345
+ for file_name in file_names:
346
+ if file_name in completed_files:
347
+ continue
348
+
349
+ try:
350
+ status_data = self._check_assembly_status(
351
+ job_name, upload_id, file_name
352
+ )
353
+ status = status_data.get("status")
354
+
355
+ if status == ExecutionStatus.SUCCESS.value:
356
+ logger.info(f"Assembly completed for {file_name}")
357
+ completed_files.add(file_name)
358
+ elif status == ExecutionStatus.FAIL.value:
359
+ error_msg = status_data.get("error", "Unknown assembly error")
360
+ raise RestClientError(
361
+ f"Assembly failed for {file_name}: {error_msg}"
362
+ )
363
+ elif status == ExecutionStatus.IN_PROGRESS.value:
364
+ logger.debug(f"Assembly in progress for {file_name}...")
365
+
366
+ except RestClientError:
367
+ raise # Re-raise assembly errors
368
+ except Exception as e:
369
+ logger.warning(
370
+ f"Error checking assembly status for {file_name}: {e}"
371
+ )
372
+
373
+ # Don't sleep if all files are complete
374
+ if len(completed_files) < len(file_names):
375
+ time.sleep(self.ASSEMBLY_POLLING_INTERVAL)
376
+
377
+ if len(completed_files) < len(file_names):
378
+ remaining_files = set(file_names) - completed_files
379
+ logger.warning(
380
+ f"Assembly timeout for files: {remaining_files} after {timeout} seconds"
381
+ )
382
+ return False
383
+
384
+ logger.info(f"All {len(file_names)} file assemblies completed successfully")
385
+ return True
386
+
283
387
  @staticmethod
284
388
  def _validate_module_path(path: Path) -> None:
285
389
  """Validates that the given path exists and is a directory.
@@ -366,12 +470,9 @@ class RestClient:
366
470
 
367
471
  if verbose:
368
472
  return verbose_response
369
- if any(
370
- JobNames.from_string(job_name) in verbose_response.job_name
371
- for job_name in ["crow", "falcon", "owl", "dummy"]
372
- ):
373
- return PQATaskResponse(**data)
374
- return TaskResponse(**data)
473
+ return JobNames.get_response_object_from_job(verbose_response.job_name)(
474
+ **data
475
+ )
375
476
  except Exception as e:
376
477
  raise TaskFetchError(f"Error getting task: {e!s}") from e
377
478
 
@@ -412,12 +513,9 @@ class RestClient:
412
513
 
413
514
  if verbose:
414
515
  return verbose_response
415
- if any(
416
- JobNames.from_string(job_name) in verbose_response.job_name
417
- for job_name in ["crow", "falcon", "owl", "dummy"]
418
- ):
419
- return PQATaskResponse(**data)
420
- return TaskResponse(**data)
516
+ return JobNames.get_response_object_from_job(verbose_response.job_name)(
517
+ **data
518
+ )
421
519
  except Exception as e:
422
520
  raise TaskFetchError(f"Error getting task: {e!s}") from e
423
521
 
@@ -820,6 +918,8 @@ class RestClient:
820
918
  raise JobCreationError(f"Error generating docker image: {e!s}") from e
821
919
  return build_context
822
920
 
921
+ # TODO: we should have have an async upload_file, check_assembly_status,
922
+ # wait_for_assembly_completion, upload_directory, upload_single_file
823
923
  @retry(
824
924
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
825
925
  wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
@@ -830,6 +930,8 @@ class RestClient:
830
930
  job_name: str,
831
931
  file_path: str | os.PathLike,
832
932
  upload_id: str | None = None,
933
+ wait_for_assembly: bool = True,
934
+ assembly_timeout: int = MAX_ASSEMBLY_WAIT_TIME,
833
935
  ) -> str:
834
936
  """Upload a file or directory to a futurehouse job bucket.
835
937
 
@@ -837,29 +939,47 @@ class RestClient:
837
939
  job_name: The name of the futurehouse job to upload to.
838
940
  file_path: The local path to the file or directory to upload.
839
941
  upload_id: Optional folder name to use for the upload. If not provided, a random UUID will be used.
942
+ wait_for_assembly: After file chunking, wait for the assembly to be processed.
943
+ assembly_timeout: Maximum time to wait for assembly in seconds.
840
944
 
841
945
  Returns:
842
946
  The upload ID used for the upload.
843
947
 
844
948
  Raises:
845
949
  FileUploadError: If there's an error uploading the file.
950
+ RestClientError: If assembly fails or times out.
846
951
  """
847
952
  file_path = Path(file_path)
848
953
  if not file_path.exists():
849
954
  raise FileNotFoundError(f"File or directory not found: {file_path}")
850
955
 
851
956
  upload_id = upload_id or str(uuid.uuid4())
957
+ uploaded_files: list[str] = []
852
958
 
853
959
  if file_path.is_dir():
854
960
  # Process directory recursively
855
- self._upload_directory(job_name, file_path, upload_id)
961
+ uploaded_files = self._upload_directory(job_name, file_path, upload_id)
856
962
  else:
857
963
  # Process single file
858
964
  self._upload_single_file(job_name, file_path, upload_id)
965
+ uploaded_files = [file_path.name]
966
+
967
+ # Wait for all assemblies if requested and we have files
968
+ if wait_for_assembly and uploaded_files:
969
+ success = self._wait_for_all_assemblies_completion(
970
+ job_name, upload_id, uploaded_files, assembly_timeout
971
+ )
972
+ if not success:
973
+ raise RestClientError(
974
+ f"Assembly failed or timed out for one or more files: {uploaded_files}"
975
+ )
976
+
859
977
  logger.info(f"Successfully uploaded {file_path} to {upload_id}")
860
978
  return upload_id
861
979
 
862
- def _upload_directory(self, job_name: str, dir_path: Path, upload_id: str) -> None:
980
+ def _upload_directory(
981
+ self, job_name: str, dir_path: Path, upload_id: str
982
+ ) -> list[str]:
863
983
  """Upload all files in a directory recursively.
864
984
 
865
985
  Args:
@@ -867,12 +987,17 @@ class RestClient:
867
987
  dir_path: The path to the directory to upload.
868
988
  upload_id: The upload ID to use.
869
989
 
990
+ Returns:
991
+ List of uploaded file names.
992
+
870
993
  Raises:
871
994
  FileUploadError: If there's an error uploading any file.
872
995
  """
873
996
  # Skip common directories that shouldn't be uploaded
874
997
  if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
875
- return
998
+ return []
999
+
1000
+ uploaded_files: list[str] = []
876
1001
 
877
1002
  try:
878
1003
  # Upload all files in the directory recursively
@@ -882,23 +1007,27 @@ class RestClient:
882
1007
  ):
883
1008
  # Use path relative to the original directory as file name
884
1009
  rel_path = path.relative_to(dir_path)
1010
+ file_name = str(rel_path)
885
1011
  self._upload_single_file(
886
1012
  job_name,
887
1013
  path,
888
1014
  upload_id,
889
- file_name=str(rel_path),
1015
+ file_name=file_name,
890
1016
  )
1017
+ uploaded_files.append(file_name)
891
1018
  except Exception as e:
892
1019
  raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
893
1020
 
1021
+ return uploaded_files
1022
+
894
1023
  def _upload_single_file(
895
1024
  self,
896
1025
  job_name: str,
897
1026
  file_path: Path,
898
1027
  upload_id: str,
899
1028
  file_name: str | None = None,
900
- ) -> None:
901
- """Upload a single file in chunks.
1029
+ ) -> str | None:
1030
+ """Upload a single file in chunks using parallel uploads.
902
1031
 
903
1032
  Args:
904
1033
  job_name: The key of the crow to upload to.
@@ -906,6 +1035,9 @@ class RestClient:
906
1035
  upload_id: The upload ID to use.
907
1036
  file_name: Optional name to use for the file. If not provided, the file's name will be used.
908
1037
 
1038
+ Returns:
1039
+ The status URL if this was the last chunk, None otherwise.
1040
+
909
1041
  Raises:
910
1042
  FileUploadError: If there's an error uploading the file.
911
1043
  """
@@ -915,16 +1047,190 @@ class RestClient:
915
1047
  # Skip empty files
916
1048
  if file_size == 0:
917
1049
  logger.warning(f"Skipping upload of empty file: {file_path}")
918
- return
1050
+ return None
919
1051
 
920
1052
  total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
921
1053
 
922
1054
  logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
923
1055
 
1056
+ status_url = None
1057
+
924
1058
  try:
925
- with open(file_path, "rb") as f:
926
- for chunk_index in range(total_chunks):
927
- # Read the chunk from the file
1059
+ # Upload all chunks except the last one in parallel
1060
+ if total_chunks > 1:
1061
+ self._upload_chunks_parallel(
1062
+ job_name,
1063
+ file_path,
1064
+ file_name,
1065
+ upload_id,
1066
+ total_chunks - 1,
1067
+ total_chunks,
1068
+ )
1069
+
1070
+ # Upload the last chunk separately (handles assembly)
1071
+ status_url = self._upload_final_chunk(
1072
+ job_name,
1073
+ file_path,
1074
+ file_name,
1075
+ upload_id,
1076
+ total_chunks - 1,
1077
+ total_chunks,
1078
+ )
1079
+
1080
+ logger.info(f"Successfully uploaded {file_name}")
1081
+ except Exception as e:
1082
+ logger.exception(f"Error uploading file {file_path}")
1083
+ raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
1084
+ return status_url
1085
+
1086
+ def _upload_chunks_parallel(
1087
+ self,
1088
+ job_name: str,
1089
+ file_path: Path,
1090
+ file_name: str,
1091
+ upload_id: str,
1092
+ num_regular_chunks: int,
1093
+ total_chunks: int,
1094
+ ) -> None:
1095
+ """Upload chunks in parallel batches.
1096
+
1097
+ Args:
1098
+ job_name: The key of the crow to upload to.
1099
+ file_path: The path to the file to upload.
1100
+ file_name: The name to use for the file.
1101
+ upload_id: The upload ID to use.
1102
+ num_regular_chunks: Number of regular chunks (excluding final chunk).
1103
+ total_chunks: Total number of chunks.
1104
+
1105
+ Raises:
1106
+ FileUploadError: If there's an error uploading any chunk.
1107
+ """
1108
+ if num_regular_chunks <= 0:
1109
+ return
1110
+
1111
+ # Process chunks in batches
1112
+ for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
1113
+ batch_end = min(
1114
+ batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
1115
+ )
1116
+
1117
+ # Upload chunks in this batch concurrently
1118
+ with ThreadPoolExecutor(max_workers=self.MAX_CONCURRENT_CHUNKS) as executor:
1119
+ futures = {
1120
+ executor.submit(
1121
+ self._upload_single_chunk,
1122
+ job_name,
1123
+ file_path,
1124
+ file_name,
1125
+ upload_id,
1126
+ chunk_index,
1127
+ total_chunks,
1128
+ ): chunk_index
1129
+ for chunk_index in range(batch_start, batch_end)
1130
+ }
1131
+
1132
+ for future in as_completed(futures):
1133
+ chunk_index = futures[future]
1134
+ try:
1135
+ future.result()
1136
+ logger.debug(
1137
+ f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1138
+ )
1139
+ except Exception as e:
1140
+ logger.error(f"Error uploading chunk {chunk_index}: {e}")
1141
+ raise FileUploadError(
1142
+ f"Error uploading chunk {chunk_index} of {file_name}: {e}"
1143
+ ) from e
1144
+
1145
+ def _upload_single_chunk(
1146
+ self,
1147
+ job_name: str,
1148
+ file_path: Path,
1149
+ file_name: str,
1150
+ upload_id: str,
1151
+ chunk_index: int,
1152
+ total_chunks: int,
1153
+ ) -> None:
1154
+ """Upload a single chunk.
1155
+
1156
+ Args:
1157
+ job_name: The key of the crow to upload to.
1158
+ file_path: The path to the file to upload.
1159
+ file_name: The name to use for the file.
1160
+ upload_id: The upload ID to use.
1161
+ chunk_index: The index of this chunk.
1162
+ total_chunks: Total number of chunks.
1163
+
1164
+ Raises:
1165
+ Exception: If there's an error uploading the chunk.
1166
+ """
1167
+ with open(file_path, "rb") as f:
1168
+ # Read the chunk from the file
1169
+ f.seek(chunk_index * self.CHUNK_SIZE)
1170
+ chunk_data = f.read(self.CHUNK_SIZE)
1171
+
1172
+ # Prepare and send the chunk
1173
+ with tempfile.NamedTemporaryFile() as temp_file:
1174
+ temp_file.write(chunk_data)
1175
+ temp_file.flush()
1176
+
1177
+ # Create form data
1178
+ with open(temp_file.name, "rb") as chunk_file_obj:
1179
+ files = {
1180
+ "chunk": (
1181
+ file_name,
1182
+ chunk_file_obj,
1183
+ "application/octet-stream",
1184
+ )
1185
+ }
1186
+ data = {
1187
+ "file_name": file_name,
1188
+ "chunk_index": chunk_index,
1189
+ "total_chunks": total_chunks,
1190
+ "upload_id": upload_id,
1191
+ }
1192
+
1193
+ # Send the chunk
1194
+ response = self.multipart_client.post(
1195
+ f"/v0.1/crows/{job_name}/upload-chunk",
1196
+ files=files,
1197
+ data=data,
1198
+ )
1199
+ response.raise_for_status()
1200
+
1201
+ def _upload_final_chunk(
1202
+ self,
1203
+ job_name: str,
1204
+ file_path: Path,
1205
+ file_name: str,
1206
+ upload_id: str,
1207
+ chunk_index: int,
1208
+ total_chunks: int,
1209
+ ) -> str | None:
1210
+ """Upload the final chunk with retry logic for missing chunks.
1211
+
1212
+ Args:
1213
+ job_name: The key of the crow to upload to.
1214
+ file_path: The path to the file to upload.
1215
+ file_name: The name to use for the file.
1216
+ upload_id: The upload ID to use.
1217
+ chunk_index: The index of the final chunk.
1218
+ total_chunks: Total number of chunks.
1219
+
1220
+ Returns:
1221
+ The status URL from the response.
1222
+
1223
+ Raises:
1224
+ FileUploadError: If there's an error uploading the final chunk.
1225
+ """
1226
+ retries = 0
1227
+ max_retries = 3
1228
+ retry_delay = 2.0 # seconds
1229
+
1230
+ while retries < max_retries:
1231
+ try:
1232
+ with open(file_path, "rb") as f:
1233
+ # Read the final chunk from the file
928
1234
  f.seek(chunk_index * self.CHUNK_SIZE)
929
1235
  chunk_data = f.read(self.CHUNK_SIZE)
930
1236
 
@@ -949,24 +1255,46 @@ class RestClient:
949
1255
  "upload_id": upload_id,
950
1256
  }
951
1257
 
952
- # Send the chunk
1258
+ # Send the final chunk
953
1259
  response = self.multipart_client.post(
954
1260
  f"/v0.1/crows/{job_name}/upload-chunk",
955
1261
  files=files,
956
1262
  data=data,
957
1263
  )
958
- response.raise_for_status()
959
1264
 
960
- # Call progress callback if provided
1265
+ # Handle missing chunks (status 409)
1266
+ if response.status_code == codes.CONFLICT:
1267
+ retries += 1
1268
+ if retries < max_retries:
1269
+ logger.warning(
1270
+ f"Missing chunks detected for {file_name}, retrying in {retry_delay}s... (attempt {retries}/{max_retries})"
1271
+ )
1272
+ time.sleep(retry_delay)
1273
+ continue
961
1274
 
962
- logger.debug(
963
- f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
964
- )
1275
+ response.raise_for_status()
1276
+ response_data = response.json()
1277
+ status_url = response_data.get("status_url")
965
1278
 
966
- logger.info(f"Successfully uploaded {file_name}")
967
- except Exception as e:
968
- logger.exception(f"Error uploading file {file_path}")
969
- raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
1279
+ logger.debug(
1280
+ f"Uploaded final chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1281
+ )
1282
+ return status_url
1283
+
1284
+ except Exception as e:
1285
+ if retries >= max_retries - 1:
1286
+ raise FileUploadError(
1287
+ f"Error uploading final chunk of {file_name}: {e}"
1288
+ ) from e
1289
+ retries += 1
1290
+ logger.warning(
1291
+ f"Error uploading final chunk of {file_name}, retrying in {retry_delay}s... (attempt {retries}/{max_retries}): {e}"
1292
+ )
1293
+ time.sleep(retry_delay)
1294
+
1295
+ raise FileUploadError(
1296
+ f"Failed to upload final chunk of {file_name} after {max_retries} retries"
1297
+ )
970
1298
 
971
1299
  @retry(
972
1300
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import json
2
3
  import os
3
4
  import re
@@ -675,7 +676,8 @@ class TaskResponse(BaseModel):
675
676
 
676
677
  @model_validator(mode="before")
677
678
  @classmethod
678
- def validate_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
679
+ def validate_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
680
+ data = copy.deepcopy(original_data) # Avoid mutating the original data
679
681
  # Extract fields from environment frame state
680
682
  if not isinstance(data, dict):
681
683
  return data
@@ -690,7 +692,72 @@ class TaskResponse(BaseModel):
690
692
  return data
691
693
 
692
694
 
695
+ class PhoenixTaskResponse(TaskResponse):
696
+ """
697
+ Response scheme for tasks executed with Phoenix.
698
+
699
+ Additional fields:
700
+ answer: Final answer from Phoenix
701
+ """
702
+
703
+ model_config = ConfigDict(extra="ignore")
704
+ answer: str | None = None
705
+
706
+ @model_validator(mode="before")
707
+ @classmethod
708
+ def validate_phoenix_fields(
709
+ cls, original_data: Mapping[str, Any]
710
+ ) -> Mapping[str, Any]:
711
+ data = copy.deepcopy(original_data)
712
+ if not isinstance(data, dict):
713
+ return data
714
+ if not (env_frame := data.get("environment_frame", {})):
715
+ return data
716
+ state = env_frame.get("state", {}).get("state", {})
717
+ data["answer"] = state.get("answer")
718
+ return data
719
+
720
+
721
+ class FinchTaskResponse(TaskResponse):
722
+ """
723
+ Response scheme for tasks executed with Finch.
724
+
725
+ Additional fields:
726
+ answer: Final answer from Finch
727
+ notebook: a dictionary with `cells` and `metadata` regarding the notebook content
728
+ """
729
+
730
+ model_config = ConfigDict(extra="ignore")
731
+ answer: str | None = None
732
+ notebook: dict[str, Any] | None = None
733
+
734
+ @model_validator(mode="before")
735
+ @classmethod
736
+ def validate_finch_fields(
737
+ cls, original_data: Mapping[str, Any]
738
+ ) -> Mapping[str, Any]:
739
+ data = copy.deepcopy(original_data)
740
+ if not isinstance(data, dict):
741
+ return data
742
+ if not (env_frame := data.get("environment_frame", {})):
743
+ return data
744
+ state = env_frame.get("state", {}).get("state", {})
745
+ data["answer"] = state.get("answer")
746
+ data["notebook"] = state.get("nb_state")
747
+ return data
748
+
749
+
693
750
  class PQATaskResponse(TaskResponse):
751
+ """
752
+ Response scheme for tasks executed with PQA.
753
+
754
+ Additional fields:
755
+ answer: Final answer from PQA
756
+ formatted_answer: Formatted answer from PQA
757
+ answer_reasoning: Reasoning used to generate the final answer, if available
758
+ has_successful_answer: Whether the answer is successful
759
+ """
760
+
694
761
  model_config = ConfigDict(extra="ignore")
695
762
 
696
763
  answer: str | None = None
@@ -702,7 +769,8 @@ class PQATaskResponse(TaskResponse):
702
769
 
703
770
  @model_validator(mode="before")
704
771
  @classmethod
705
- def validate_pqa_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
772
+ def validate_pqa_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
773
+ data = copy.deepcopy(original_data) # Avoid mutating the original data
706
774
  if not isinstance(data, dict):
707
775
  return data
708
776
  if not (env_frame := data.get("environment_frame", {})):
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from collections.abc import Collection, Generator
2
3
  from typing import ClassVar, Final
3
4
 
4
5
  import httpx
@@ -42,7 +43,7 @@ def _run_auth(
42
43
  class RefreshingJWT(httpx.Auth):
43
44
  """Automatically (re-)inject a JWT and transparently retry exactly once when we hit a 401/403."""
44
45
 
45
- RETRY_STATUSES: ClassVar[set[int]] = {
46
+ RETRY_STATUSES: ClassVar[Collection[httpx.codes]] = {
46
47
  httpx.codes.UNAUTHORIZED,
47
48
  httpx.codes.FORBIDDEN,
48
49
  }
@@ -64,7 +65,7 @@ class RefreshingJWT(httpx.Auth):
64
65
  api_key=api_key,
65
66
  )
66
67
 
67
- def refresh_token(self):
68
+ def refresh_token(self) -> None:
68
69
  if self.auth_type == AuthType.JWT:
69
70
  logger.error(INVALID_REFRESH_TYPE_MSG)
70
71
  raise ValueError(INVALID_REFRESH_TYPE_MSG)
@@ -74,7 +75,9 @@ class RefreshingJWT(httpx.Auth):
74
75
  api_key=self.api_key,
75
76
  )
76
77
 
77
- def auth_flow(self, request):
78
+ def auth_flow(
79
+ self, request: httpx.Request
80
+ ) -> Generator[httpx.Request, httpx.Response, None]:
78
81
  request.headers["Authorization"] = f"Bearer {self._jwt}"
79
82
  response = yield request
80
83
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.18.dev195
3
+ Version: 0.3.19.dev111
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent
@@ -0,0 +1,17 @@
1
+ futurehouse_client/__init__.py,sha256=OzGDkVm5UTUzd4n8yOmRjMF73YrK0FaIQX5gS3Dk8Zo,304
2
+ futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
3
+ futurehouse_client/clients/job_client.py,sha256=JgB5IUAyCmnhGRsYc3bgKldA-lkM1JLwHRwwUeOCdus,11944
4
+ futurehouse_client/clients/rest_client.py,sha256=_XgkzA9OhUKjL9vpkU6ixh2lUW9StgqfGgLk2qHjGgI,55518
5
+ futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
6
+ futurehouse_client/models/app.py,sha256=VCtg0ygd-TSrR6DtfljTBt9jnl1eBNal8UXHFdkDg88,28587
7
+ futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
8
+ futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
9
+ futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHLchk,2960
11
+ futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
12
+ futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
13
+ futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
14
+ futurehouse_client-0.3.19.dev111.dist-info/METADATA,sha256=N4Msi8W4mMBXFs_-Pl8Ii12RcLRm2eBl9NiIFCy5--E,12767
15
+ futurehouse_client-0.3.19.dev111.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ futurehouse_client-0.3.19.dev111.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
17
+ futurehouse_client-0.3.19.dev111.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,17 +0,0 @@
1
- futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
2
- futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
3
- futurehouse_client/clients/job_client.py,sha256=uNkqQbeZw7wbA0qDWcIOwOykrosza-jev58paJZ_mbA,11150
4
- futurehouse_client/clients/rest_client.py,sha256=6HQF3YXDnSdGxAoXpB_wU6Vhcqhp5OB5SNuGQJ6Hseo,43454
5
- futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
6
- futurehouse_client/models/app.py,sha256=w_1e4F0IiC-BKeOLqYkABYo4U-Nka1S-F64S_eHB2KM,26421
7
- futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
8
- futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
9
- futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- futurehouse_client/utils/auth.py,sha256=0V161S9jW4vbTCoJJrOtNzWXQkAVyzdGM3yefGgJ578,2808
11
- futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
12
- futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
13
- futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
14
- futurehouse_client-0.3.18.dev195.dist-info/METADATA,sha256=yM1NbN2au3MmkfIkkuT85eYahKYTmnBuaWCQ1OvQ97A,12767
15
- futurehouse_client-0.3.18.dev195.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
16
- futurehouse_client-0.3.18.dev195.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
17
- futurehouse_client-0.3.18.dev195.dist-info/RECORD,,