futurehouse-client 0.3.17.dev94__py3-none-any.whl → 0.3.18.dev80__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@ import inspect
8
8
  import json
9
9
  import logging
10
10
  import os
11
+ import sys
11
12
  import tempfile
12
13
  import time
13
14
  import uuid
@@ -63,24 +64,14 @@ from futurehouse_client.utils.monitoring import (
63
64
  )
64
65
 
65
66
  logger = logging.getLogger(__name__)
66
-
67
+ logging.basicConfig(
68
+ level=logging.INFO,
69
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
70
+ stream=sys.stdout,
71
+ )
72
+ logging.getLogger("httpx").setLevel(logging.WARNING)
67
73
  TaskRequest.model_rebuild()
68
74
 
69
- retry_if_connection_error = retry_if_exception_type((
70
- # From requests
71
- Timeout,
72
- ConnectionError,
73
- RequestException,
74
- # From httpx
75
- ConnectError,
76
- ConnectTimeout,
77
- ReadTimeout,
78
- ReadError,
79
- NetworkError,
80
- RemoteProtocolError,
81
- CloseError,
82
- ))
83
-
84
75
  FILE_UPLOAD_IGNORE_PARTS = {
85
76
  ".ruff_cache",
86
77
  "__pycache__",
@@ -111,6 +102,27 @@ class InvalidTaskDescriptionError(Exception):
111
102
  """Raised when the task description is invalid or empty."""
112
103
 
113
104
 
105
+ class FileUploadError(RestClientError):
106
+ """Raised when there's an error uploading a file."""
107
+
108
+
109
+ retry_if_connection_error = retry_if_exception_type((
110
+ # From requests
111
+ Timeout,
112
+ ConnectionError,
113
+ RequestException,
114
+ # From httpx
115
+ ConnectError,
116
+ ConnectTimeout,
117
+ ReadTimeout,
118
+ ReadError,
119
+ NetworkError,
120
+ RemoteProtocolError,
121
+ CloseError,
122
+ FileUploadError,
123
+ ))
124
+
125
+
114
126
  class SimpleOrganization(BaseModel):
115
127
  id: int
116
128
  name: str
@@ -207,10 +219,6 @@ class TaskResponseVerbose(TaskResponse):
207
219
  shared_with: list[SimpleOrganization] | None = None
208
220
 
209
221
 
210
- class FileUploadError(RestClientError):
211
- """Raised when there's an error uploading a file."""
212
-
213
-
214
222
  class RestClient:
215
223
  REQUEST_TIMEOUT: ClassVar[float] = 30.0 # sec
216
224
  MAX_RETRY_ATTEMPTS: ClassVar[int] = 3
@@ -917,14 +925,14 @@ class RestClient:
917
925
  self,
918
926
  job_name: str,
919
927
  file_path: str | os.PathLike,
920
- folder_name: str | None = None,
928
+ upload_id: str | None = None,
921
929
  ) -> str:
922
930
  """Upload a file or directory to a futurehouse job bucket.
923
931
 
924
932
  Args:
925
933
  job_name: The name of the futurehouse job to upload to.
926
934
  file_path: The local path to the file or directory to upload.
927
- folder_name: Optional folder name to use for the upload. If not provided, a random UUID will be used.
935
+ upload_id: Optional folder name to use for the upload. If not provided, a random UUID will be used.
928
936
 
929
937
  Returns:
930
938
  The upload ID used for the upload.
@@ -936,7 +944,7 @@ class RestClient:
936
944
  if not file_path.exists():
937
945
  raise FileNotFoundError(f"File or directory not found: {file_path}")
938
946
 
939
- upload_id = folder_name or str(uuid.uuid4())
947
+ upload_id = upload_id or str(uuid.uuid4())
940
948
 
941
949
  if file_path.is_dir():
942
950
  # Process directory recursively
@@ -999,6 +1007,12 @@ class RestClient:
999
1007
  """
1000
1008
  file_name = file_name or file_path.name
1001
1009
  file_size = file_path.stat().st_size
1010
+
1011
+ # Skip empty files
1012
+ if file_size == 0:
1013
+ logger.warning(f"Skipping upload of empty file: {file_path}")
1014
+ return
1015
+
1002
1016
  total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
1003
1017
 
1004
1018
  logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
@@ -1056,12 +1070,18 @@ class RestClient:
1056
1070
  wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
1057
1071
  retry=retry_if_connection_error,
1058
1072
  )
1059
- def list_files(self, job_name: str, folder_name: str) -> dict[str, list[str]]:
1073
+ def list_files(
1074
+ self,
1075
+ job_name: str,
1076
+ trajectory_id: str | None = None,
1077
+ upload_id: str | None = None,
1078
+ ) -> dict[str, list[str]]:
1060
1079
  """List files and directories in a GCS location for a given job_name and upload_id.
1061
1080
 
1062
1081
  Args:
1063
1082
  job_name: The name of the futurehouse job.
1064
- folder_name: The specific folder name (upload_id) to list files from.
1083
+ trajectory_id: The specific trajectory id to list files from.
1084
+ upload_id: The specific upload id to list files from.
1065
1085
 
1066
1086
  Returns:
1067
1087
  A list of files in the GCS folder.
@@ -1069,22 +1089,27 @@ class RestClient:
1069
1089
  Raises:
1070
1090
  RestClientError: If there is an error listing the files.
1071
1091
  """
1092
+ if not bool(trajectory_id) ^ bool(upload_id):
1093
+ raise RestClientError(
1094
+ "Must at least specify one of trajectory_id or upload_id, but not both"
1095
+ )
1072
1096
  try:
1073
1097
  url = f"/v0.1/crows/{job_name}/list-files"
1074
- params = {"upload_id": folder_name}
1098
+ params = {"trajectory_id": trajectory_id, "upload_id": upload_id}
1099
+ params = {k: v for k, v in params.items() if v is not None}
1075
1100
  response = self.client.get(url, params=params)
1076
1101
  response.raise_for_status()
1077
1102
  return response.json()
1078
1103
  except HTTPStatusError as e:
1079
1104
  logger.exception(
1080
- f"Error listing files for job {job_name}, folder {folder_name}: {e.response.text}"
1105
+ f"Error listing files for job {job_name}, trajectory {trajectory_id}, upload_id {upload_id}: {e.response.text}"
1081
1106
  )
1082
1107
  raise RestClientError(
1083
1108
  f"Error listing files: {e.response.status_code} - {e.response.text}"
1084
1109
  ) from e
1085
1110
  except Exception as e:
1086
1111
  logger.exception(
1087
- f"Error listing files for job {job_name}, folder {folder_name}"
1112
+ f"Error listing files for job {job_name}, trajectory {trajectory_id}, upload_id {upload_id}"
1088
1113
  )
1089
1114
  raise RestClientError(f"Error listing files: {e!s}") from e
1090
1115
 
@@ -1096,7 +1121,7 @@ class RestClient:
1096
1121
  def download_file(
1097
1122
  self,
1098
1123
  job_name: str,
1099
- folder_name: str,
1124
+ trajectory_id: str,
1100
1125
  file_path: str,
1101
1126
  destination_path: str | os.PathLike,
1102
1127
  ) -> None:
@@ -1104,14 +1129,14 @@ class RestClient:
1104
1129
 
1105
1130
  Args:
1106
1131
  job_name: The name of the futurehouse job.
1107
- folder_name: The specific folder name (upload_id) the file belongs to.
1132
+ trajectory_id: The specific trajectory id the file belongs to.
1108
1133
  file_path: The relative path of the file to download
1109
1134
  (e.g., 'data/my_file.csv' or 'my_image.png').
1110
1135
  destination_path: The local path where the file should be saved.
1111
1136
 
1112
1137
  Raises:
1113
1138
  RestClientError: If there is an error downloading the file.
1114
- FileNotFoundError: If the destination directory does not exist.
1139
+ FileNotFoundError: If the destination directory does not exist or if the file is not found.
1115
1140
  """
1116
1141
  destination_path = Path(destination_path)
1117
1142
  # Ensure the destination directory exists
@@ -1119,17 +1144,24 @@ class RestClient:
1119
1144
 
1120
1145
  try:
1121
1146
  url = f"/v0.1/crows/{job_name}/download-file"
1122
- params = {"upload_id": folder_name, "file_path": file_path}
1147
+ params = {"trajectory_id": trajectory_id, "file_path": file_path}
1123
1148
 
1124
1149
  with self.client.stream("GET", url, params=params) as response:
1125
1150
  response.raise_for_status() # Check for HTTP errors before streaming
1126
1151
  with open(destination_path, "wb") as f:
1127
1152
  for chunk in response.iter_bytes(chunk_size=8192):
1128
1153
  f.write(chunk)
1154
+
1155
+ # Check if the downloaded file is empty
1156
+ if destination_path.stat().st_size == 0:
1157
+ # Remove the empty file
1158
+ destination_path.unlink()
1159
+ raise FileNotFoundError(f"File not found or is empty: {file_path}")
1160
+
1129
1161
  logger.info(f"File {file_path} downloaded to {destination_path}")
1130
1162
  except HTTPStatusError as e:
1131
1163
  logger.exception(
1132
- f"Error downloading file {file_path} for job {job_name}, folder {folder_name}: {e.response.text}"
1164
+ f"Error downloading file {file_path} for job {job_name}, trajectory_id {trajectory_id}: {e.response.text}"
1133
1165
  )
1134
1166
  # Clean up partially downloaded file if an error occurs
1135
1167
  if destination_path.exists():
@@ -1137,9 +1169,20 @@ class RestClient:
1137
1169
  raise RestClientError(
1138
1170
  f"Error downloading file: {e.response.status_code} - {e.response.text}"
1139
1171
  ) from e
1172
+ except RemoteProtocolError as e:
1173
+ logger.error(
1174
+ f"Connection error while downloading file {file_path} for job {job_name}, trajectory_id {trajectory_id}"
1175
+ )
1176
+ # Clean up partially downloaded file
1177
+ if destination_path.exists():
1178
+ destination_path.unlink()
1179
+
1180
+ # Often RemoteProtocolError during download means the file wasn't found
1181
+ # or was empty/corrupted on the server side
1182
+ raise FileNotFoundError(f"File not found or corrupted: {file_path}") from e
1140
1183
  except Exception as e:
1141
1184
  logger.exception(
1142
- f"Error downloading file {file_path} for job {job_name}, folder {folder_name}"
1185
+ f"Error downloading file {file_path} for job {job_name}, trajectory_id {trajectory_id}"
1143
1186
  )
1144
1187
  if destination_path.exists():
1145
1188
  destination_path.unlink() # Clean up partial file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.17.dev94
3
+ Version: 0.3.18.dev80
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent
@@ -1,7 +1,7 @@
1
1
  futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
2
2
  futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
3
3
  futurehouse_client/clients/job_client.py,sha256=Fi3YvN4k82AuXCe8vlwxhkK8CXS164NQrs7paj9qIek,11096
4
- futurehouse_client/clients/rest_client.py,sha256=dsUmpgV5sfyb4GDv6whWVwRN1z2LOfZsPF8vjoioNfY,45472
4
+ futurehouse_client/clients/rest_client.py,sha256=Qv54VFcGnCDbOoGFmfx8AZsXyAyZyZ4weK9RGKVePOE,47214
5
5
  futurehouse_client/models/__init__.py,sha256=ta3jFLM_LsDz1rKDmx8rja8sT7WtSKoFvMgLF0yFpvA,342
6
6
  futurehouse_client/models/app.py,sha256=yfZ9tyw4VATVAfYrU7aTdCNPSljLEho09_nIbh8oZDY,23174
7
7
  futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
@@ -10,7 +10,7 @@ futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
10
10
  futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
11
11
  futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
12
12
  futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
13
- futurehouse_client-0.3.17.dev94.dist-info/METADATA,sha256=acLPon9oE1ecVZzz8JrpumcSLmhRkqGGG62gjGEW1IQ,12766
14
- futurehouse_client-0.3.17.dev94.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
15
- futurehouse_client-0.3.17.dev94.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
16
- futurehouse_client-0.3.17.dev94.dist-info/RECORD,,
13
+ futurehouse_client-0.3.18.dev80.dist-info/METADATA,sha256=XVCM841O8K4E7OWwDGUZPI3c1Odio5nnBk-HOCHBj4Q,12766
14
+ futurehouse_client-0.3.18.dev80.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
15
+ futurehouse_client-0.3.18.dev80.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
16
+ futurehouse_client-0.3.18.dev80.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5