futurehouse-client 0.3.18.dev195__py3-none-any.whl → 0.3.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,22 @@
1
1
  from .clients.job_client import JobClient, JobNames
2
- from .clients.rest_client import PQATaskResponse, TaskResponse, TaskResponseVerbose
3
2
  from .clients.rest_client import RestClient as FutureHouseClient
3
+ from .models.app import (
4
+ FinchTaskResponse,
5
+ PhoenixTaskResponse,
6
+ PQATaskResponse,
7
+ TaskRequest,
8
+ TaskResponse,
9
+ TaskResponseVerbose,
10
+ )
4
11
 
5
12
  __all__ = [
13
+ "FinchTaskResponse",
6
14
  "FutureHouseClient",
7
15
  "JobClient",
8
16
  "JobNames",
9
17
  "PQATaskResponse",
18
+ "PhoenixTaskResponse",
19
+ "TaskRequest",
10
20
  "TaskResponse",
11
21
  "TaskResponseVerbose",
12
22
  ]
@@ -1,12 +1,11 @@
1
1
  from .job_client import JobClient, JobNames
2
- from .rest_client import PQATaskResponse, TaskResponse, TaskResponseVerbose
3
2
  from .rest_client import RestClient as FutureHouseClient
3
+ from .rest_client import TaskResponse, TaskResponseVerbose
4
4
 
5
5
  __all__ = [
6
6
  "FutureHouseClient",
7
7
  "JobClient",
8
8
  "JobNames",
9
- "PQATaskResponse",
10
9
  "TaskResponse",
11
10
  "TaskResponseVerbose",
12
11
  ]
@@ -8,7 +8,13 @@ from aviary.env import Frame
8
8
  from pydantic import BaseModel
9
9
  from tenacity import before_sleep_log, retry, stop_after_attempt, wait_exponential
10
10
 
11
- from futurehouse_client.models.app import Stage
11
+ from futurehouse_client.models.app import (
12
+ FinchTaskResponse,
13
+ PhoenixTaskResponse,
14
+ PQATaskResponse,
15
+ Stage,
16
+ TaskResponse,
17
+ )
12
18
  from futurehouse_client.models.rest import (
13
19
  FinalEnvironmentRequest,
14
20
  StoreAgentStatePostRequest,
@@ -31,6 +37,19 @@ class JobNames(StrEnum):
31
37
  DUMMY = "job-futurehouse-dummy-env"
32
38
  PHOENIX = "job-futurehouse-phoenix"
33
39
  FINCH = "job-futurehouse-data-analysis-crow-high"
40
+ CHIMP = "job-futurehouse-chimp"
41
+
42
+ @classmethod
43
+ def _get_response_mapping(cls) -> dict[str, type[TaskResponse]]:
44
+ return {
45
+ cls.CROW: PQATaskResponse,
46
+ cls.FALCON: PQATaskResponse,
47
+ cls.OWL: PQATaskResponse,
48
+ cls.CHIMP: PQATaskResponse,
49
+ cls.PHOENIX: PhoenixTaskResponse,
50
+ cls.FINCH: FinchTaskResponse,
51
+ cls.DUMMY: TaskResponse,
52
+ }
34
53
 
35
54
  @classmethod
36
55
  def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:
@@ -52,6 +71,13 @@ class JobNames(StrEnum):
52
71
  f"Invalid job name: {job_name}. \nOptions are: {', '.join([name.name for name in cls])}"
53
72
  ) from e
54
73
 
74
+ @staticmethod
75
+ def get_response_object_from_job(job_name: str) -> type[TaskResponse]:
76
+ return JobNames._get_response_mapping()[job_name]
77
+
78
+ def get_response_object(self) -> type[TaskResponse]:
79
+ return self._get_response_mapping()[self.name]
80
+
55
81
 
56
82
  class JobClient:
57
83
  REQUEST_TIMEOUT: ClassVar[float] = 30.0 # sec
@@ -13,6 +13,7 @@ import tempfile
13
13
  import time
14
14
  import uuid
15
15
  from collections.abc import Collection
16
+ from concurrent.futures import ThreadPoolExecutor, as_completed
16
17
  from pathlib import Path
17
18
  from types import ModuleType
18
19
  from typing import Any, ClassVar, cast
@@ -31,6 +32,7 @@ from httpx import (
31
32
  ReadError,
32
33
  ReadTimeout,
33
34
  RemoteProtocolError,
35
+ codes,
34
36
  )
35
37
  from ldp.agent import AgentConfig
36
38
  from requests.exceptions import RequestException, Timeout
@@ -47,7 +49,6 @@ from futurehouse_client.clients import JobNames
47
49
  from futurehouse_client.models.app import (
48
50
  AuthType,
49
51
  JobDeploymentConfig,
50
- PQATaskResponse,
51
52
  Stage,
52
53
  TaskRequest,
53
54
  TaskResponse,
@@ -133,6 +134,9 @@ class RestClient:
133
134
  MAX_RETRY_WAIT: ClassVar[int] = 10
134
135
  DEFAULT_POLLING_TIME: ClassVar[int] = 5 # seconds
135
136
  CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024 # 16MB chunks
137
+ ASSEMBLY_POLLING_INTERVAL: ClassVar[int] = 10 # seconds
138
+ MAX_ASSEMBLY_WAIT_TIME: ClassVar[int] = 1800 # 30 minutes
139
+ MAX_CONCURRENT_CHUNKS: ClassVar[int] = 12 # Maximum concurrent chunk uploads
136
140
 
137
141
  def __init__(
138
142
  self,
@@ -174,7 +178,7 @@ class RestClient:
174
178
 
175
179
  @property
176
180
  def unauthenticated_client(self) -> Client:
177
- """Unauthenticated HTTP client for auth operations to avoid recursion."""
181
+ """Unauthenticated HTTP client for auth operations."""
178
182
  return cast(Client, self.get_client("application/json", authenticated=False))
179
183
 
180
184
  @property
@@ -219,6 +223,8 @@ class RestClient:
219
223
  if content_type:
220
224
  headers["Content-Type"] = content_type
221
225
 
226
+ headers["x-client"] = "sdk"
227
+
222
228
  self._clients[key] = (
223
229
  AsyncClient(
224
230
  base_url=self.base_url,
@@ -280,6 +286,104 @@ class RestClient:
280
286
  orgs = response.json()
281
287
  return [org["name"] for org in orgs]
282
288
 
289
+ def _check_assembly_status(
290
+ self, job_name: str, upload_id: str, file_name: str
291
+ ) -> dict[str, Any]:
292
+ """Check the assembly status of an uploaded file.
293
+
294
+ Args:
295
+ job_name: The name of the futurehouse job
296
+ upload_id: The upload ID
297
+ file_name: The name of the file
298
+
299
+ Returns:
300
+ Dict containing status information
301
+
302
+ Raises:
303
+ RestClientError: If there's an error checking status
304
+ """
305
+ try:
306
+ url = f"/v0.1/crows/{job_name}/assembly-status/{upload_id}/{file_name}"
307
+ response = self.client.get(url)
308
+ response.raise_for_status()
309
+ return response.json()
310
+ except Exception as e:
311
+ raise RestClientError(f"Error checking assembly status: {e}") from e
312
+
313
+ def _wait_for_all_assemblies_completion(
314
+ self,
315
+ job_name: str,
316
+ upload_id: str,
317
+ file_names: list[str],
318
+ timeout: int = MAX_ASSEMBLY_WAIT_TIME,
319
+ ) -> bool:
320
+ """Wait for all file assemblies to complete.
321
+
322
+ Args:
323
+ job_name: The name of the futurehouse job
324
+ upload_id: The upload ID
325
+ file_names: List of file names to wait for
326
+ timeout: Maximum time to wait in seconds
327
+
328
+ Returns:
329
+ True if all assemblies succeeded, False if any failed or timed out
330
+
331
+ Raises:
332
+ RestClientError: If any assembly fails
333
+ """
334
+ if not file_names:
335
+ return True
336
+
337
+ start_time = time.time()
338
+ logger.info(f"Waiting for assembly of {len(file_names)} file(s) to complete...")
339
+
340
+ completed_files: set[str] = set()
341
+
342
+ while (time.time() - start_time) < timeout and len(completed_files) < len(
343
+ file_names
344
+ ):
345
+ for file_name in file_names:
346
+ if file_name in completed_files:
347
+ continue
348
+
349
+ try:
350
+ status_data = self._check_assembly_status(
351
+ job_name, upload_id, file_name
352
+ )
353
+ status = status_data.get("status")
354
+
355
+ if status == ExecutionStatus.SUCCESS.value:
356
+ logger.info(f"Assembly completed for {file_name}")
357
+ completed_files.add(file_name)
358
+ elif status == ExecutionStatus.FAIL.value:
359
+ error_msg = status_data.get("error", "Unknown assembly error")
360
+ raise RestClientError(
361
+ f"Assembly failed for {file_name}: {error_msg}"
362
+ )
363
+ elif status == ExecutionStatus.IN_PROGRESS.value:
364
+ logger.debug(f"Assembly in progress for {file_name}...")
365
+
366
+ except RestClientError:
367
+ raise # Re-raise assembly errors
368
+ except Exception as e:
369
+ logger.warning(
370
+ f"Error checking assembly status for {file_name}: {e}"
371
+ )
372
+
373
+ # Don't sleep if all files are complete
374
+ if len(completed_files) < len(file_names):
375
+ time.sleep(self.ASSEMBLY_POLLING_INTERVAL)
376
+
377
+ if len(completed_files) < len(file_names):
378
+ remaining_files = set(file_names) - completed_files
379
+ logger.warning(
380
+ f"Assembly timeout for files: {remaining_files} after {timeout} seconds"
381
+ )
382
+ return False
383
+
384
+ logger.info(f"All {len(file_names)} file assemblies completed successfully")
385
+ return True
386
+
283
387
  @staticmethod
284
388
  def _validate_module_path(path: Path) -> None:
285
389
  """Validates that the given path exists and is a directory.
@@ -340,40 +444,36 @@ class RestClient:
340
444
  self, task_id: str | None = None, history: bool = False, verbose: bool = False
341
445
  ) -> "TaskResponse":
342
446
  """Get details for a specific task."""
343
- try:
344
- task_id = task_id or self.trajectory_id
345
- url = f"/v0.1/trajectories/{task_id}"
346
- full_url = f"{self.base_url}{url}"
347
-
348
- with (
349
- external_trace(
350
- url=full_url,
351
- method="GET",
352
- library="httpx",
353
- custom_params={
354
- "operation": "get_job",
355
- "job_id": task_id,
356
- },
357
- ),
358
- self.client.stream("GET", url, params={"history": history}) as response,
359
- ):
360
- response.raise_for_status()
361
- json_data = "".join(response.iter_text(chunk_size=1024))
362
- data = json.loads(json_data)
363
- if "id" not in data:
364
- data["id"] = task_id
365
- verbose_response = TaskResponseVerbose(**data)
447
+ task_id = task_id or self.trajectory_id
448
+ url = f"/v0.1/trajectories/{task_id}"
449
+ full_url = f"{self.base_url}{url}"
366
450
 
367
- if verbose:
368
- return verbose_response
369
- if any(
370
- JobNames.from_string(job_name) in verbose_response.job_name
371
- for job_name in ["crow", "falcon", "owl", "dummy"]
372
- ):
373
- return PQATaskResponse(**data)
374
- return TaskResponse(**data)
375
- except Exception as e:
376
- raise TaskFetchError(f"Error getting task: {e!s}") from e
451
+ with (
452
+ external_trace(
453
+ url=full_url,
454
+ method="GET",
455
+ library="httpx",
456
+ custom_params={
457
+ "operation": "get_job",
458
+ "job_id": task_id,
459
+ },
460
+ ),
461
+ self.client.stream("GET", url, params={"history": history}) as response,
462
+ ):
463
+ if response.status_code in {401, 403}:
464
+ raise PermissionError(
465
+ f"Error getting task: Permission denied for task {task_id}"
466
+ )
467
+ response.raise_for_status()
468
+ json_data = "".join(response.iter_text(chunk_size=1024))
469
+ data = json.loads(json_data)
470
+ if "id" not in data:
471
+ data["id"] = task_id
472
+ verbose_response = TaskResponseVerbose(**data)
473
+
474
+ if verbose:
475
+ return verbose_response
476
+ return JobNames.get_response_object_from_job(verbose_response.job_name)(**data)
377
477
 
378
478
  @retry(
379
479
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -384,42 +484,36 @@ class RestClient:
384
484
  self, task_id: str | None = None, history: bool = False, verbose: bool = False
385
485
  ) -> "TaskResponse":
386
486
  """Get details for a specific task asynchronously."""
387
- try:
388
- task_id = task_id or self.trajectory_id
389
- url = f"/v0.1/trajectories/{task_id}"
390
- full_url = f"{self.base_url}{url}"
487
+ task_id = task_id or self.trajectory_id
488
+ url = f"/v0.1/trajectories/{task_id}"
489
+ full_url = f"{self.base_url}{url}"
490
+
491
+ with external_trace(
492
+ url=full_url,
493
+ method="GET",
494
+ library="httpx",
495
+ custom_params={
496
+ "operation": "get_job",
497
+ "job_id": task_id,
498
+ },
499
+ ):
500
+ async with self.async_client.stream(
501
+ "GET", url, params={"history": history}
502
+ ) as response:
503
+ if response.status_code in {401, 403}:
504
+ raise PermissionError(
505
+ f"Error getting task: Permission denied for task {task_id}"
506
+ )
507
+ response.raise_for_status()
508
+ json_data = "".join([chunk async for chunk in response.aiter_text()])
509
+ data = json.loads(json_data)
510
+ if "id" not in data:
511
+ data["id"] = task_id
512
+ verbose_response = TaskResponseVerbose(**data)
391
513
 
392
- with external_trace(
393
- url=full_url,
394
- method="GET",
395
- library="httpx",
396
- custom_params={
397
- "operation": "get_job",
398
- "job_id": task_id,
399
- },
400
- ):
401
- async with self.async_client.stream(
402
- "GET", url, params={"history": history}
403
- ) as response:
404
- response.raise_for_status()
405
- json_data = "".join([
406
- chunk async for chunk in response.aiter_text()
407
- ])
408
- data = json.loads(json_data)
409
- if "id" not in data:
410
- data["id"] = task_id
411
- verbose_response = TaskResponseVerbose(**data)
412
-
413
- if verbose:
414
- return verbose_response
415
- if any(
416
- JobNames.from_string(job_name) in verbose_response.job_name
417
- for job_name in ["crow", "falcon", "owl", "dummy"]
418
- ):
419
- return PQATaskResponse(**data)
420
- return TaskResponse(**data)
421
- except Exception as e:
422
- raise TaskFetchError(f"Error getting task: {e!s}") from e
514
+ if verbose:
515
+ return verbose_response
516
+ return JobNames.get_response_object_from_job(verbose_response.job_name)(**data)
423
517
 
424
518
  @retry(
425
519
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -437,15 +531,16 @@ class RestClient:
437
531
  self.stage,
438
532
  )
439
533
 
440
- try:
441
- response = self.client.post(
442
- "/v0.1/crows", json=task_data.model_dump(mode="json")
534
+ response = self.client.post(
535
+ "/v0.1/crows", json=task_data.model_dump(mode="json")
536
+ )
537
+ if response.status_code in {401, 403}:
538
+ raise PermissionError(
539
+ f"Error creating task: Permission denied for task {task_data.name}"
443
540
  )
444
- response.raise_for_status()
445
- trajectory_id = response.json()["trajectory_id"]
446
- self.trajectory_id = trajectory_id
447
- except Exception as e:
448
- raise TaskFetchError(f"Error creating task: {e!s}") from e
541
+ response.raise_for_status()
542
+ trajectory_id = response.json()["trajectory_id"]
543
+ self.trajectory_id = trajectory_id
449
544
  return trajectory_id
450
545
 
451
546
  @retry(
@@ -463,16 +558,16 @@ class RestClient:
463
558
  task_data.name.name,
464
559
  self.stage,
465
560
  )
466
-
467
- try:
468
- response = await self.async_client.post(
469
- "/v0.1/crows", json=task_data.model_dump(mode="json")
561
+ response = await self.async_client.post(
562
+ "/v0.1/crows", json=task_data.model_dump(mode="json")
563
+ )
564
+ if response.status_code in {401, 403}:
565
+ raise PermissionError(
566
+ f"Error creating task: Permission denied for task {task_data.name}"
470
567
  )
471
- response.raise_for_status()
472
- trajectory_id = response.json()["trajectory_id"]
473
- self.trajectory_id = trajectory_id
474
- except Exception as e:
475
- raise TaskFetchError(f"Error creating task: {e!s}") from e
568
+ response.raise_for_status()
569
+ trajectory_id = response.json()["trajectory_id"]
570
+ self.trajectory_id = trajectory_id
476
571
  return trajectory_id
477
572
 
478
573
  async def arun_tasks_until_done(
@@ -820,6 +915,8 @@ class RestClient:
820
915
  raise JobCreationError(f"Error generating docker image: {e!s}") from e
821
916
  return build_context
822
917
 
918
+ # TODO: we should have have an async upload_file, check_assembly_status,
919
+ # wait_for_assembly_completion, upload_directory, upload_single_file
823
920
  @retry(
824
921
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
825
922
  wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
@@ -830,6 +927,8 @@ class RestClient:
830
927
  job_name: str,
831
928
  file_path: str | os.PathLike,
832
929
  upload_id: str | None = None,
930
+ wait_for_assembly: bool = True,
931
+ assembly_timeout: int = MAX_ASSEMBLY_WAIT_TIME,
833
932
  ) -> str:
834
933
  """Upload a file or directory to a futurehouse job bucket.
835
934
 
@@ -837,29 +936,47 @@ class RestClient:
837
936
  job_name: The name of the futurehouse job to upload to.
838
937
  file_path: The local path to the file or directory to upload.
839
938
  upload_id: Optional folder name to use for the upload. If not provided, a random UUID will be used.
939
+ wait_for_assembly: After file chunking, wait for the assembly to be processed.
940
+ assembly_timeout: Maximum time to wait for assembly in seconds.
840
941
 
841
942
  Returns:
842
943
  The upload ID used for the upload.
843
944
 
844
945
  Raises:
845
946
  FileUploadError: If there's an error uploading the file.
947
+ RestClientError: If assembly fails or times out.
846
948
  """
847
949
  file_path = Path(file_path)
848
950
  if not file_path.exists():
849
951
  raise FileNotFoundError(f"File or directory not found: {file_path}")
850
952
 
851
953
  upload_id = upload_id or str(uuid.uuid4())
954
+ uploaded_files: list[str] = []
852
955
 
853
956
  if file_path.is_dir():
854
957
  # Process directory recursively
855
- self._upload_directory(job_name, file_path, upload_id)
958
+ uploaded_files = self._upload_directory(job_name, file_path, upload_id)
856
959
  else:
857
960
  # Process single file
858
961
  self._upload_single_file(job_name, file_path, upload_id)
962
+ uploaded_files = [file_path.name]
963
+
964
+ # Wait for all assemblies if requested and we have files
965
+ if wait_for_assembly and uploaded_files:
966
+ success = self._wait_for_all_assemblies_completion(
967
+ job_name, upload_id, uploaded_files, assembly_timeout
968
+ )
969
+ if not success:
970
+ raise RestClientError(
971
+ f"Assembly failed or timed out for one or more files: {uploaded_files}"
972
+ )
973
+
859
974
  logger.info(f"Successfully uploaded {file_path} to {upload_id}")
860
975
  return upload_id
861
976
 
862
- def _upload_directory(self, job_name: str, dir_path: Path, upload_id: str) -> None:
977
+ def _upload_directory(
978
+ self, job_name: str, dir_path: Path, upload_id: str
979
+ ) -> list[str]:
863
980
  """Upload all files in a directory recursively.
864
981
 
865
982
  Args:
@@ -867,12 +984,17 @@ class RestClient:
867
984
  dir_path: The path to the directory to upload.
868
985
  upload_id: The upload ID to use.
869
986
 
987
+ Returns:
988
+ List of uploaded file names.
989
+
870
990
  Raises:
871
991
  FileUploadError: If there's an error uploading any file.
872
992
  """
873
993
  # Skip common directories that shouldn't be uploaded
874
994
  if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
875
- return
995
+ return []
996
+
997
+ uploaded_files: list[str] = []
876
998
 
877
999
  try:
878
1000
  # Upload all files in the directory recursively
@@ -882,23 +1004,27 @@ class RestClient:
882
1004
  ):
883
1005
  # Use path relative to the original directory as file name
884
1006
  rel_path = path.relative_to(dir_path)
1007
+ file_name = str(rel_path)
885
1008
  self._upload_single_file(
886
1009
  job_name,
887
1010
  path,
888
1011
  upload_id,
889
- file_name=str(rel_path),
1012
+ file_name=file_name,
890
1013
  )
1014
+ uploaded_files.append(file_name)
891
1015
  except Exception as e:
892
1016
  raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
893
1017
 
1018
+ return uploaded_files
1019
+
894
1020
  def _upload_single_file(
895
1021
  self,
896
1022
  job_name: str,
897
1023
  file_path: Path,
898
1024
  upload_id: str,
899
1025
  file_name: str | None = None,
900
- ) -> None:
901
- """Upload a single file in chunks.
1026
+ ) -> str | None:
1027
+ """Upload a single file in chunks using parallel uploads.
902
1028
 
903
1029
  Args:
904
1030
  job_name: The key of the crow to upload to.
@@ -906,6 +1032,9 @@ class RestClient:
906
1032
  upload_id: The upload ID to use.
907
1033
  file_name: Optional name to use for the file. If not provided, the file's name will be used.
908
1034
 
1035
+ Returns:
1036
+ The status URL if this was the last chunk, None otherwise.
1037
+
909
1038
  Raises:
910
1039
  FileUploadError: If there's an error uploading the file.
911
1040
  """
@@ -915,17 +1044,103 @@ class RestClient:
915
1044
  # Skip empty files
916
1045
  if file_size == 0:
917
1046
  logger.warning(f"Skipping upload of empty file: {file_path}")
918
- return
1047
+ return None
919
1048
 
920
1049
  total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
921
1050
 
922
1051
  logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
923
1052
 
1053
+ status_url = None
1054
+
924
1055
  try:
925
- with open(file_path, "rb") as f:
926
- for chunk_index in range(total_chunks):
927
- # Read the chunk from the file
928
- f.seek(chunk_index * self.CHUNK_SIZE)
1056
+ status_url = self._upload_chunks_parallel(
1057
+ job_name,
1058
+ file_path,
1059
+ file_name,
1060
+ upload_id,
1061
+ total_chunks,
1062
+ )
1063
+
1064
+ logger.info(f"Successfully uploaded {file_name}")
1065
+ except Exception as e:
1066
+ logger.exception(f"Error uploading file {file_path}")
1067
+ raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
1068
+ return status_url
1069
+
1070
+ def _upload_chunks_parallel(
1071
+ self,
1072
+ job_name: str,
1073
+ file_path: Path,
1074
+ file_name: str,
1075
+ upload_id: str,
1076
+ total_chunks: int,
1077
+ ) -> str | None:
1078
+ """Upload all chunks in parallel batches, including the final chunk.
1079
+
1080
+ Args:
1081
+ job_name: The key of the crow to upload to.
1082
+ file_path: The path to the file to upload.
1083
+ file_name: The name to use for the file.
1084
+ upload_id: The upload ID to use.
1085
+ total_chunks: Total number of chunks.
1086
+
1087
+ Returns:
1088
+ The status URL from the final chunk response, or None if no chunks.
1089
+
1090
+ Raises:
1091
+ FileUploadError: If there's an error uploading any chunk.
1092
+ """
1093
+ if total_chunks <= 0:
1094
+ return None
1095
+
1096
+ if total_chunks > 1:
1097
+ num_regular_chunks = total_chunks - 1
1098
+ for batch_start in range(0, num_regular_chunks, self.MAX_CONCURRENT_CHUNKS):
1099
+ batch_end = min(
1100
+ batch_start + self.MAX_CONCURRENT_CHUNKS, num_regular_chunks
1101
+ )
1102
+
1103
+ # Upload chunks in this batch concurrently
1104
+ with ThreadPoolExecutor(
1105
+ max_workers=self.MAX_CONCURRENT_CHUNKS
1106
+ ) as executor:
1107
+ futures = {
1108
+ executor.submit(
1109
+ self._upload_single_chunk,
1110
+ job_name,
1111
+ file_path,
1112
+ file_name,
1113
+ upload_id,
1114
+ chunk_index,
1115
+ total_chunks,
1116
+ ): chunk_index
1117
+ for chunk_index in range(batch_start, batch_end)
1118
+ }
1119
+
1120
+ for future in as_completed(futures):
1121
+ chunk_index = futures[future]
1122
+ try:
1123
+ future.result()
1124
+ logger.debug(
1125
+ f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1126
+ )
1127
+ except Exception as e:
1128
+ logger.error(f"Error uploading chunk {chunk_index}: {e}")
1129
+ raise FileUploadError(
1130
+ f"Error uploading chunk {chunk_index} of {file_name}: {e}"
1131
+ ) from e
1132
+
1133
+ # Upload the final chunk with retry logic
1134
+ final_chunk_index = total_chunks - 1
1135
+ retries = 0
1136
+ max_retries = 3
1137
+ retry_delay = 2.0
1138
+
1139
+ while retries < max_retries:
1140
+ try:
1141
+ with open(file_path, "rb") as f:
1142
+ # Read the final chunk from the file
1143
+ f.seek(final_chunk_index * self.CHUNK_SIZE)
929
1144
  chunk_data = f.read(self.CHUNK_SIZE)
930
1145
 
931
1146
  # Prepare and send the chunk
@@ -944,29 +1159,107 @@ class RestClient:
944
1159
  }
945
1160
  data = {
946
1161
  "file_name": file_name,
947
- "chunk_index": chunk_index,
1162
+ "chunk_index": final_chunk_index,
948
1163
  "total_chunks": total_chunks,
949
1164
  "upload_id": upload_id,
950
1165
  }
951
1166
 
952
- # Send the chunk
1167
+ # Send the final chunk
953
1168
  response = self.multipart_client.post(
954
1169
  f"/v0.1/crows/{job_name}/upload-chunk",
955
1170
  files=files,
956
1171
  data=data,
957
1172
  )
1173
+
1174
+ # Handle missing chunks (status 409)
1175
+ if response.status_code == codes.CONFLICT:
1176
+ retries += 1
1177
+ if retries < max_retries:
1178
+ logger.warning(
1179
+ f"Missing chunks detected for {file_name}, retrying in {retry_delay}s... (attempt {retries}/{max_retries})"
1180
+ )
1181
+ time.sleep(retry_delay)
1182
+ continue
1183
+
958
1184
  response.raise_for_status()
1185
+ response_data = response.json()
1186
+ status_url = response_data.get("status_url")
959
1187
 
960
- # Call progress callback if provided
1188
+ logger.debug(
1189
+ f"Uploaded final chunk {final_chunk_index + 1}/{total_chunks} of {file_name}"
1190
+ )
1191
+ return status_url
1192
+
1193
+ except Exception as e:
1194
+ if retries >= max_retries - 1:
1195
+ raise FileUploadError(
1196
+ f"Error uploading final chunk of {file_name}: {e}"
1197
+ ) from e
1198
+ retries += 1
1199
+ logger.warning(
1200
+ f"Error uploading final chunk of {file_name}, retrying in {retry_delay}s... (attempt {retries}/{max_retries}): {e}"
1201
+ )
1202
+ time.sleep(retry_delay)
961
1203
 
962
- logger.debug(
963
- f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
964
- )
1204
+ raise FileUploadError(
1205
+ f"Failed to upload final chunk of {file_name} after {max_retries} retries"
1206
+ )
965
1207
 
966
- logger.info(f"Successfully uploaded {file_name}")
967
- except Exception as e:
968
- logger.exception(f"Error uploading file {file_path}")
969
- raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
1208
+ def _upload_single_chunk(
1209
+ self,
1210
+ job_name: str,
1211
+ file_path: Path,
1212
+ file_name: str,
1213
+ upload_id: str,
1214
+ chunk_index: int,
1215
+ total_chunks: int,
1216
+ ) -> None:
1217
+ """Upload a single chunk.
1218
+
1219
+ Args:
1220
+ job_name: The key of the crow to upload to.
1221
+ file_path: The path to the file to upload.
1222
+ file_name: The name to use for the file.
1223
+ upload_id: The upload ID to use.
1224
+ chunk_index: The index of this chunk.
1225
+ total_chunks: Total number of chunks.
1226
+
1227
+ Raises:
1228
+ Exception: If there's an error uploading the chunk.
1229
+ """
1230
+ with open(file_path, "rb") as f:
1231
+ # Read the chunk from the file
1232
+ f.seek(chunk_index * self.CHUNK_SIZE)
1233
+ chunk_data = f.read(self.CHUNK_SIZE)
1234
+
1235
+ # Prepare and send the chunk
1236
+ with tempfile.NamedTemporaryFile() as temp_file:
1237
+ temp_file.write(chunk_data)
1238
+ temp_file.flush()
1239
+
1240
+ # Create form data
1241
+ with open(temp_file.name, "rb") as chunk_file_obj:
1242
+ files = {
1243
+ "chunk": (
1244
+ file_name,
1245
+ chunk_file_obj,
1246
+ "application/octet-stream",
1247
+ )
1248
+ }
1249
+ data = {
1250
+ "file_name": file_name,
1251
+ "chunk_index": chunk_index,
1252
+ "total_chunks": total_chunks,
1253
+ "upload_id": upload_id,
1254
+ }
1255
+
1256
+ # Send the chunk
1257
+ response = self.multipart_client.post(
1258
+ f"/v0.1/crows/{job_name}/upload-chunk",
1259
+ files=files,
1260
+ data=data,
1261
+ )
1262
+ response.raise_for_status()
970
1263
 
971
1264
  @retry(
972
1265
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import json
2
3
  import os
3
4
  import re
@@ -675,7 +676,8 @@ class TaskResponse(BaseModel):
675
676
 
676
677
  @model_validator(mode="before")
677
678
  @classmethod
678
- def validate_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
679
+ def validate_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
680
+ data = copy.deepcopy(original_data) # Avoid mutating the original data
679
681
  # Extract fields from environment frame state
680
682
  if not isinstance(data, dict):
681
683
  return data
@@ -690,7 +692,72 @@ class TaskResponse(BaseModel):
690
692
  return data
691
693
 
692
694
 
695
+ class PhoenixTaskResponse(TaskResponse):
696
+ """
697
+ Response scheme for tasks executed with Phoenix.
698
+
699
+ Additional fields:
700
+ answer: Final answer from Phoenix
701
+ """
702
+
703
+ model_config = ConfigDict(extra="ignore")
704
+ answer: str | None = None
705
+
706
+ @model_validator(mode="before")
707
+ @classmethod
708
+ def validate_phoenix_fields(
709
+ cls, original_data: Mapping[str, Any]
710
+ ) -> Mapping[str, Any]:
711
+ data = copy.deepcopy(original_data)
712
+ if not isinstance(data, dict):
713
+ return data
714
+ if not (env_frame := data.get("environment_frame", {})):
715
+ return data
716
+ state = env_frame.get("state", {}).get("state", {})
717
+ data["answer"] = state.get("answer")
718
+ return data
719
+
720
+
721
+ class FinchTaskResponse(TaskResponse):
722
+ """
723
+ Response scheme for tasks executed with Finch.
724
+
725
+ Additional fields:
726
+ answer: Final answer from Finch
727
+ notebook: a dictionary with `cells` and `metadata` regarding the notebook content
728
+ """
729
+
730
+ model_config = ConfigDict(extra="ignore")
731
+ answer: str | None = None
732
+ notebook: dict[str, Any] | None = None
733
+
734
+ @model_validator(mode="before")
735
+ @classmethod
736
+ def validate_finch_fields(
737
+ cls, original_data: Mapping[str, Any]
738
+ ) -> Mapping[str, Any]:
739
+ data = copy.deepcopy(original_data)
740
+ if not isinstance(data, dict):
741
+ return data
742
+ if not (env_frame := data.get("environment_frame", {})):
743
+ return data
744
+ state = env_frame.get("state", {}).get("state", {})
745
+ data["answer"] = state.get("answer")
746
+ data["notebook"] = state.get("nb_state")
747
+ return data
748
+
749
+
693
750
  class PQATaskResponse(TaskResponse):
751
+ """
752
+ Response scheme for tasks executed with PQA.
753
+
754
+ Additional fields:
755
+ answer: Final answer from PQA
756
+ formatted_answer: Formatted answer from PQA
757
+ answer_reasoning: Reasoning used to generate the final answer, if available
758
+ has_successful_answer: Whether the answer is successful
759
+ """
760
+
694
761
  model_config = ConfigDict(extra="ignore")
695
762
 
696
763
  answer: str | None = None
@@ -702,7 +769,8 @@ class PQATaskResponse(TaskResponse):
702
769
 
703
770
  @model_validator(mode="before")
704
771
  @classmethod
705
- def validate_pqa_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
772
+ def validate_pqa_fields(cls, original_data: Mapping[str, Any]) -> Mapping[str, Any]:
773
+ data = copy.deepcopy(original_data) # Avoid mutating the original data
706
774
  if not isinstance(data, dict):
707
775
  return data
708
776
  if not (env_frame := data.get("environment_frame", {})):
File without changes
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from collections.abc import Collection, Generator
2
3
  from typing import ClassVar, Final
3
4
 
4
5
  import httpx
@@ -42,7 +43,7 @@ def _run_auth(
42
43
  class RefreshingJWT(httpx.Auth):
43
44
  """Automatically (re-)inject a JWT and transparently retry exactly once when we hit a 401/403."""
44
45
 
45
- RETRY_STATUSES: ClassVar[set[int]] = {
46
+ RETRY_STATUSES: ClassVar[Collection[httpx.codes]] = {
46
47
  httpx.codes.UNAUTHORIZED,
47
48
  httpx.codes.FORBIDDEN,
48
49
  }
@@ -64,7 +65,7 @@ class RefreshingJWT(httpx.Auth):
64
65
  api_key=api_key,
65
66
  )
66
67
 
67
- def refresh_token(self):
68
+ def refresh_token(self) -> None:
68
69
  if self.auth_type == AuthType.JWT:
69
70
  logger.error(INVALID_REFRESH_TYPE_MSG)
70
71
  raise ValueError(INVALID_REFRESH_TYPE_MSG)
@@ -74,7 +75,9 @@ class RefreshingJWT(httpx.Auth):
74
75
  api_key=self.api_key,
75
76
  )
76
77
 
77
- def auth_flow(self, request):
78
+ def auth_flow(
79
+ self, request: httpx.Request
80
+ ) -> Generator[httpx.Request, httpx.Response, None]:
78
81
  request.headers["Authorization"] = f"Bearer {self._jwt}"
79
82
  response = yield request
80
83
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.18.dev195
3
+ Version: 0.3.19
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent
@@ -8,10 +8,9 @@ Classifier: Programming Language :: Python :: 3 :: Only
8
8
  Classifier: Programming Language :: Python :: 3.11
9
9
  Classifier: Programming Language :: Python :: 3.12
10
10
  Classifier: Programming Language :: Python
11
- Requires-Python: <3.13,>=3.11
11
+ Requires-Python: <3.14,>=3.11
12
12
  Description-Content-Type: text/markdown
13
13
  Requires-Dist: cloudpickle
14
- Requires-Dist: dm-tree<0.1.9
15
14
  Requires-Dist: fhaviary
16
15
  Requires-Dist: httpx
17
16
  Requires-Dist: ldp>=0.22.0
@@ -0,0 +1,18 @@
1
+ futurehouse_client/__init__.py,sha256=BztM_ntbgmIEjzvnBWcvPhvLjM8xGDFCK0Upf3-nIn8,488
2
+ futurehouse_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ futurehouse_client/clients/__init__.py,sha256=-HXNj-XJ3LRO5XM6MZ709iPs29YpApss0Q2YYg1qMZw,280
4
+ futurehouse_client/clients/job_client.py,sha256=JgB5IUAyCmnhGRsYc3bgKldA-lkM1JLwHRwwUeOCdus,11944
5
+ futurehouse_client/clients/rest_client.py,sha256=3wfVz6d2KuRQUr_nms7P25yVR6aTjsRrSkqmVs55soA,54552
6
+ futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
7
+ futurehouse_client/models/app.py,sha256=VCtg0ygd-TSrR6DtfljTBt9jnl1eBNal8UXHFdkDg88,28587
8
+ futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
9
+ futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
10
+ futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ futurehouse_client/utils/auth.py,sha256=tgWELjKfg8eWme_qdcRmc8TjQN9DVZuHHaVXZNHLchk,2960
12
+ futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
13
+ futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
14
+ futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
15
+ futurehouse_client-0.3.19.dist-info/METADATA,sha256=FbtQGStv4salVccxR5wtpdlGbufSqxoiCtM44qDOHJs,12731
16
+ futurehouse_client-0.3.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ futurehouse_client-0.3.19.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
18
+ futurehouse_client-0.3.19.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,17 +0,0 @@
1
- futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
2
- futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
3
- futurehouse_client/clients/job_client.py,sha256=uNkqQbeZw7wbA0qDWcIOwOykrosza-jev58paJZ_mbA,11150
4
- futurehouse_client/clients/rest_client.py,sha256=6HQF3YXDnSdGxAoXpB_wU6Vhcqhp5OB5SNuGQJ6Hseo,43454
5
- futurehouse_client/models/__init__.py,sha256=5x-f9AoM1hGzJBEHcHAXSt7tPeImST5oZLuMdwp0mXc,554
6
- futurehouse_client/models/app.py,sha256=w_1e4F0IiC-BKeOLqYkABYo4U-Nka1S-F64S_eHB2KM,26421
7
- futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
8
- futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
9
- futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- futurehouse_client/utils/auth.py,sha256=0V161S9jW4vbTCoJJrOtNzWXQkAVyzdGM3yefGgJ578,2808
11
- futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
12
- futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
13
- futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
14
- futurehouse_client-0.3.18.dev195.dist-info/METADATA,sha256=yM1NbN2au3MmkfIkkuT85eYahKYTmnBuaWCQ1OvQ97A,12767
15
- futurehouse_client-0.3.18.dev195.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
16
- futurehouse_client-0.3.18.dev195.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
17
- futurehouse_client-0.3.18.dev195.dist-info/RECORD,,