futurehouse-client 0.3.16__py3-none-any.whl → 0.3.17.dev94__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ class JobNames(StrEnum):
29
29
  FALCON = "job-futurehouse-paperqa2-deep"
30
30
  OWL = "job-futurehouse-hasanyone"
31
31
  DUMMY = "job-futurehouse-dummy-env"
32
+ PHOENIX = "job-futurehouse-phoenix"
32
33
 
33
34
  @classmethod
34
35
  def from_stage(cls, job_name: str, stage: Stage | None = None) -> str:
@@ -1,12 +1,17 @@
1
1
  import ast
2
+ import asyncio
2
3
  import base64
4
+ import contextlib
3
5
  import copy
4
6
  import importlib.metadata
5
7
  import inspect
6
8
  import json
7
9
  import logging
8
10
  import os
9
- from collections.abc import Mapping
11
+ import tempfile
12
+ import time
13
+ import uuid
14
+ from collections.abc import Collection, Mapping
10
15
  from datetime import datetime
11
16
  from pathlib import Path
12
17
  from types import ModuleType
@@ -16,6 +21,7 @@ from uuid import UUID
16
21
  import cloudpickle
17
22
  from aviary.functional import EnvironmentBuilder
18
23
  from httpx import (
24
+ AsyncClient,
19
25
  Client,
20
26
  CloseError,
21
27
  ConnectError,
@@ -35,6 +41,8 @@ from tenacity import (
35
41
  stop_after_attempt,
36
42
  wait_exponential,
37
43
  )
44
+ from tqdm import tqdm as sync_tqdm
45
+ from tqdm.asyncio import tqdm
38
46
 
39
47
  from futurehouse_client.clients import JobNames
40
48
  from futurehouse_client.models.app import (
@@ -44,6 +52,8 @@ from futurehouse_client.models.app import (
44
52
  Stage,
45
53
  TaskRequest,
46
54
  )
55
+ from futurehouse_client.models.rest import ExecutionStatus
56
+ from futurehouse_client.utils.general import gather_with_concurrency
47
57
  from futurehouse_client.utils.module_utils import (
48
58
  OrganizationSelector,
49
59
  fetch_environment_function_docstring,
@@ -109,6 +119,7 @@ class SimpleOrganization(BaseModel):
109
119
 
110
120
  # 5 minute default for JWTs
111
121
  JWT_TOKEN_CACHE_EXPIRY: int = 300 # seconds
122
+ DEFAULT_AGENT_TIMEOUT: int = 2400 # seconds
112
123
 
113
124
 
114
125
  class TaskResponse(BaseModel):
@@ -137,10 +148,7 @@ class TaskResponse(BaseModel):
137
148
  # TODO: We probably want to remove these two once we define the final names.
138
149
  data["job_name"] = data.get("crow")
139
150
  data["query"] = data.get("task")
140
- if not (env_frame := data.get("environment_frame", {})):
141
- return data
142
- state = env_frame.get("state", {}).get("state", {})
143
- data["task_id"] = cast(UUID, state.get("id")) if state.get("id") else None
151
+ data["task_id"] = cast(UUID, data.get("id")) if data.get("id") else None
144
152
  if not (metadata := data.get("metadata", {})):
145
153
  return data
146
154
  data["environment_name"] = metadata.get("environment_name")
@@ -161,7 +169,6 @@ class PQATaskResponse(TaskResponse):
161
169
  @model_validator(mode="before")
162
170
  @classmethod
163
171
  def validate_pqa_fields(cls, data: Mapping[str, Any]) -> Mapping[str, Any]:
164
- # Extract fields from environment frame state
165
172
  if not isinstance(data, dict):
166
173
  return data
167
174
  if not (env_frame := data.get("environment_frame", {})):
@@ -200,11 +207,17 @@ class TaskResponseVerbose(TaskResponse):
200
207
  shared_with: list[SimpleOrganization] | None = None
201
208
 
202
209
 
210
+ class FileUploadError(RestClientError):
211
+ """Raised when there's an error uploading a file."""
212
+
213
+
203
214
  class RestClient:
204
215
  REQUEST_TIMEOUT: ClassVar[float] = 30.0 # sec
205
216
  MAX_RETRY_ATTEMPTS: ClassVar[int] = 3
206
217
  RETRY_MULTIPLIER: ClassVar[int] = 1
207
218
  MAX_RETRY_WAIT: ClassVar[int] = 10
219
+ DEFAULT_POLLING_TIME: ClassVar[int] = 5 # seconds
220
+ CHUNK_SIZE: ClassVar[int] = 16 * 1024 * 1024 # 16MB chunks
208
221
 
209
222
  def __init__(
210
223
  self,
@@ -220,7 +233,7 @@ class RestClient:
220
233
  self.stage = stage
221
234
  self.auth_type = auth_type
222
235
  self.api_key = api_key
223
- self._clients: dict[str, Client] = {}
236
+ self._clients: dict[str, Client | AsyncClient] = {}
224
237
  self.headers = headers or {}
225
238
  self.auth_jwt = self._run_auth(jwt=jwt)
226
239
  self.organizations: list[str] = self._filter_orgs(organization)
@@ -228,49 +241,81 @@ class RestClient:
228
241
  @property
229
242
  def client(self) -> Client:
230
243
  """Lazily initialized and cached HTTP client with authentication."""
231
- return self.get_client("application/json", with_auth=True)
244
+ return cast(Client, self.get_client("application/json", with_auth=True))
245
+
246
+ @property
247
+ def async_client(self) -> AsyncClient:
248
+ """Lazily initialized and cached HTTP client with authentication."""
249
+ return cast(
250
+ AsyncClient,
251
+ self.get_client("application/json", with_auth=True, with_async=True),
252
+ )
232
253
 
233
254
  @property
234
255
  def auth_client(self) -> Client:
235
256
  """Lazily initialized and cached HTTP client without authentication."""
236
- return self.get_client("application/json", with_auth=False)
257
+ return cast(Client, self.get_client("application/json", with_auth=False))
237
258
 
238
259
  @property
239
260
  def multipart_client(self) -> Client:
240
261
  """Lazily initialized and cached HTTP client for multipart uploads."""
241
- return self.get_client(None, with_auth=True)
262
+ return cast(Client, self.get_client(None, with_auth=True))
242
263
 
243
264
  def get_client(
244
- self, content_type: str | None = "application/json", with_auth: bool = True
245
- ) -> Client:
265
+ self,
266
+ content_type: str | None = "application/json",
267
+ with_auth: bool = True,
268
+ with_async: bool = False,
269
+ ) -> Client | AsyncClient:
246
270
  """Return a cached HTTP client or create one if needed.
247
271
 
248
272
  Args:
249
273
  content_type: The desired content type header. Use None for multipart uploads.
250
274
  with_auth: Whether the client should include an Authorization header.
275
+ with_async: Whether to use an async client.
251
276
 
252
277
  Returns:
253
278
  An HTTP client configured with the appropriate headers.
254
279
  """
255
280
  # Create a composite key based on content type and auth flag.
256
- key = f"{content_type or 'multipart'}_{with_auth}"
281
+ key = f"{content_type or 'multipart'}_{with_auth}_{with_async}"
257
282
  if key not in self._clients:
258
283
  headers = copy.deepcopy(self.headers)
259
284
  if with_auth:
260
285
  headers["Authorization"] = f"Bearer {self.auth_jwt}"
261
286
  if content_type:
262
287
  headers["Content-Type"] = content_type
263
- self._clients[key] = Client(
264
- base_url=self.base_url,
265
- headers=headers,
266
- timeout=self.REQUEST_TIMEOUT,
288
+ self._clients[key] = (
289
+ AsyncClient(
290
+ base_url=self.base_url,
291
+ headers=headers,
292
+ timeout=self.REQUEST_TIMEOUT,
293
+ )
294
+ if with_async
295
+ else Client(
296
+ base_url=self.base_url,
297
+ headers=headers,
298
+ timeout=self.REQUEST_TIMEOUT,
299
+ )
267
300
  )
268
301
  return self._clients[key]
269
302
 
270
- def __del__(self):
271
- """Ensure all cached clients are properly closed when the instance is destroyed."""
303
+ def close(self):
304
+ """Explicitly close all cached clients."""
305
+ for client in self._clients.values():
306
+ if isinstance(client, Client):
307
+ with contextlib.suppress(RuntimeError, CloseError):
308
+ client.close()
309
+
310
+ async def aclose(self):
311
+ """Asynchronously close all cached clients."""
272
312
  for client in self._clients.values():
273
- client.close()
313
+ if isinstance(client, AsyncClient):
314
+ with contextlib.suppress(RuntimeError, CloseError):
315
+ await client.aclose()
316
+
317
+ def __del__(self):
318
+ self.close()
274
319
 
275
320
  def _filter_orgs(self, organization: str | None = None) -> list[str]:
276
321
  filtered_orgs = [
@@ -402,6 +447,8 @@ class RestClient:
402
447
  ):
403
448
  json_data = "".join(response.iter_text(chunk_size=1024))
404
449
  data = json.loads(json_data)
450
+ if "id" not in data:
451
+ data["id"] = task_id
405
452
  verbose_response = TaskResponseVerbose(**data)
406
453
 
407
454
  if verbose:
@@ -417,6 +464,54 @@ class RestClient:
417
464
  except Exception as e:
418
465
  raise TaskFetchError(f"Error getting task: {e!s}") from e
419
466
 
467
+ @retry(
468
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
469
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
470
+ retry=retry_if_connection_error,
471
+ )
472
+ async def aget_task(
473
+ self, task_id: str | None = None, history: bool = False, verbose: bool = False
474
+ ) -> "TaskResponse":
475
+ """Get details for a specific task asynchronously."""
476
+ try:
477
+ task_id = task_id or self.trajectory_id
478
+ url = f"/v0.1/trajectories/{task_id}"
479
+ full_url = f"{self.base_url}{url}"
480
+
481
+ with external_trace(
482
+ url=full_url,
483
+ method="GET",
484
+ library="httpx",
485
+ custom_params={
486
+ "operation": "get_job",
487
+ "job_id": task_id,
488
+ },
489
+ ):
490
+ async with self.async_client.stream(
491
+ "GET", url, params={"history": history}
492
+ ) as response:
493
+ response.raise_for_status()
494
+ json_data = "".join([
495
+ chunk async for chunk in response.aiter_text()
496
+ ])
497
+ data = json.loads(json_data)
498
+ if "id" not in data:
499
+ data["id"] = task_id
500
+ verbose_response = TaskResponseVerbose(**data)
501
+
502
+ if verbose:
503
+ return verbose_response
504
+ if any(
505
+ JobNames.from_string(job_name) in verbose_response.job_name
506
+ for job_name in ["crow", "falcon", "owl", "dummy"]
507
+ ):
508
+ return PQATaskResponse(**data)
509
+ return TaskResponse(**data)
510
+ except ValueError as e:
511
+ raise ValueError("Invalid task ID format. Must be a valid UUID.") from e
512
+ except Exception as e:
513
+ raise TaskFetchError(f"Error getting task: {e!s}") from e
514
+
420
515
  @retry(
421
516
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
422
517
  wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
@@ -438,10 +533,179 @@ class RestClient:
438
533
  "/v0.1/crows", json=task_data.model_dump(mode="json")
439
534
  )
440
535
  response.raise_for_status()
441
- self.trajectory_id = response.json()["trajectory_id"]
536
+ trajectory_id = response.json()["trajectory_id"]
537
+ self.trajectory_id = trajectory_id
442
538
  except Exception as e:
443
539
  raise TaskFetchError(f"Error creating task: {e!s}") from e
444
- return self.trajectory_id
540
+ return trajectory_id
541
+
542
+ @retry(
543
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
544
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
545
+ retry=retry_if_connection_error,
546
+ )
547
+ async def acreate_task(self, task_data: TaskRequest | dict[str, Any]):
548
+ """Create a new futurehouse task."""
549
+ if isinstance(task_data, dict):
550
+ task_data = TaskRequest.model_validate(task_data)
551
+
552
+ if isinstance(task_data.name, JobNames):
553
+ task_data.name = task_data.name.from_stage(
554
+ task_data.name.name,
555
+ self.stage,
556
+ )
557
+
558
+ try:
559
+ response = await self.async_client.post(
560
+ "/v0.1/crows", json=task_data.model_dump(mode="json")
561
+ )
562
+ response.raise_for_status()
563
+ trajectory_id = response.json()["trajectory_id"]
564
+ self.trajectory_id = trajectory_id
565
+ except Exception as e:
566
+ raise TaskFetchError(f"Error creating task: {e!s}") from e
567
+ return trajectory_id
568
+
569
+ async def arun_tasks_until_done(
570
+ self,
571
+ task_data: TaskRequest
572
+ | dict[str, Any]
573
+ | Collection[TaskRequest]
574
+ | Collection[dict[str, Any]],
575
+ verbose: bool = False,
576
+ progress_bar: bool = False,
577
+ concurrency: int = 10,
578
+ timeout: int = DEFAULT_AGENT_TIMEOUT,
579
+ ) -> list[TaskResponse]:
580
+ all_tasks: Collection[TaskRequest | dict[str, Any]] = (
581
+ cast(Collection[TaskRequest | dict[str, Any]], [task_data])
582
+ if (isinstance(task_data, dict) or not isinstance(task_data, Collection))
583
+ else cast(Collection[TaskRequest | dict[str, Any]], task_data)
584
+ )
585
+
586
+ trajectory_ids = await gather_with_concurrency(
587
+ concurrency,
588
+ [self.acreate_task(task) for task in all_tasks],
589
+ progress=progress_bar,
590
+ )
591
+
592
+ start_time = time.monotonic()
593
+ completed_tasks: dict[str, TaskResponse] = {}
594
+
595
+ if progress_bar:
596
+ progress = tqdm(
597
+ total=len(trajectory_ids), desc="Waiting for tasks to finish", ncols=0
598
+ )
599
+
600
+ while (time.monotonic() - start_time) < timeout:
601
+ task_results = await gather_with_concurrency(
602
+ concurrency,
603
+ [
604
+ self.aget_task(task_id, verbose=verbose)
605
+ for task_id in trajectory_ids
606
+ if task_id not in completed_tasks
607
+ ],
608
+ )
609
+
610
+ for task in task_results:
611
+ task_id = str(task.task_id)
612
+ if (
613
+ task_id not in completed_tasks
614
+ and ExecutionStatus(task.status).is_terminal_state()
615
+ ):
616
+ completed_tasks[task_id] = task
617
+ if progress_bar:
618
+ progress.update(1)
619
+
620
+ all_done = len(completed_tasks) == len(trajectory_ids)
621
+
622
+ if all_done:
623
+ break
624
+ await asyncio.sleep(self.DEFAULT_POLLING_TIME)
625
+
626
+ else:
627
+ logger.warning(
628
+ f"Timed out waiting for tasks to finish after {timeout} seconds. Returning with {len(completed_tasks)} completed tasks and {len(trajectory_ids)} total tasks."
629
+ )
630
+
631
+ if progress_bar:
632
+ progress.close()
633
+
634
+ return [
635
+ completed_tasks.get(task_id)
636
+ or (await self.aget_task(task_id, verbose=verbose))
637
+ for task_id in trajectory_ids
638
+ ]
639
+
640
+ def run_tasks_until_done(
641
+ self,
642
+ task_data: TaskRequest
643
+ | dict[str, Any]
644
+ | Collection[TaskRequest]
645
+ | Collection[dict[str, Any]],
646
+ verbose: bool = False,
647
+ progress_bar: bool = False,
648
+ timeout: int = DEFAULT_AGENT_TIMEOUT,
649
+ ) -> list[TaskResponse]:
650
+ """Run multiple tasks and wait for them to complete.
651
+
652
+ Args:
653
+ task_data: A single task or collection of tasks to run
654
+ verbose: Whether to return verbose task responses
655
+ progress_bar: Whether to display a progress bar
656
+ timeout: Maximum time to wait for task completion in seconds
657
+
658
+ Returns:
659
+ A list of completed task responses
660
+ """
661
+ all_tasks: Collection[TaskRequest | dict[str, Any]] = (
662
+ cast(Collection[TaskRequest | dict[str, Any]], [task_data])
663
+ if (isinstance(task_data, dict) or not isinstance(task_data, Collection))
664
+ else cast(Collection[TaskRequest | dict[str, Any]], task_data)
665
+ )
666
+
667
+ trajectory_ids = [self.create_task(task) for task in all_tasks]
668
+
669
+ start_time = time.monotonic()
670
+ completed_tasks: dict[str, TaskResponse] = {}
671
+
672
+ if progress_bar:
673
+ progress = sync_tqdm(
674
+ total=len(trajectory_ids), desc="Waiting for tasks to finish", ncols=0
675
+ )
676
+
677
+ while (time.monotonic() - start_time) < timeout:
678
+ all_done = True
679
+
680
+ for task_id in trajectory_ids:
681
+ if task_id in completed_tasks:
682
+ continue
683
+
684
+ task = self.get_task(task_id, verbose=verbose)
685
+
686
+ if not ExecutionStatus(task.status).is_terminal_state():
687
+ all_done = False
688
+ elif task_id not in completed_tasks:
689
+ completed_tasks[task_id] = task
690
+ if progress_bar:
691
+ progress.update(1)
692
+
693
+ if all_done:
694
+ break
695
+ time.sleep(self.DEFAULT_POLLING_TIME)
696
+
697
+ else:
698
+ logger.warning(
699
+ f"Timed out waiting for tasks to finish after {timeout} seconds. Returning with {len(completed_tasks)} completed tasks and {len(trajectory_ids)} total tasks."
700
+ )
701
+
702
+ if progress_bar:
703
+ progress.close()
704
+
705
+ return [
706
+ completed_tasks.get(task_id) or self.get_task(task_id, verbose=verbose)
707
+ for task_id in trajectory_ids
708
+ ]
445
709
 
446
710
  @retry(
447
711
  stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
@@ -644,6 +908,243 @@ class RestClient:
644
908
  raise JobCreationError(f"Error generating docker image: {e!s}") from e
645
909
  return build_context
646
910
 
911
+ @retry(
912
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
913
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
914
+ retry=retry_if_connection_error,
915
+ )
916
+ def upload_file(
917
+ self,
918
+ job_name: str,
919
+ file_path: str | os.PathLike,
920
+ folder_name: str | None = None,
921
+ ) -> str:
922
+ """Upload a file or directory to a futurehouse job bucket.
923
+
924
+ Args:
925
+ job_name: The name of the futurehouse job to upload to.
926
+ file_path: The local path to the file or directory to upload.
927
+ folder_name: Optional folder name to use for the upload. If not provided, a random UUID will be used.
928
+
929
+ Returns:
930
+ The upload ID used for the upload.
931
+
932
+ Raises:
933
+ FileUploadError: If there's an error uploading the file.
934
+ """
935
+ file_path = Path(file_path)
936
+ if not file_path.exists():
937
+ raise FileNotFoundError(f"File or directory not found: {file_path}")
938
+
939
+ upload_id = folder_name or str(uuid.uuid4())
940
+
941
+ if file_path.is_dir():
942
+ # Process directory recursively
943
+ self._upload_directory(job_name, file_path, upload_id)
944
+ else:
945
+ # Process single file
946
+ self._upload_single_file(job_name, file_path, upload_id)
947
+ logger.info(f"Successfully uploaded {file_path} to {upload_id}")
948
+ return upload_id
949
+
950
+ def _upload_directory(self, job_name: str, dir_path: Path, upload_id: str) -> None:
951
+ """Upload all files in a directory recursively.
952
+
953
+ Args:
954
+ job_name: The key of the crow to upload to.
955
+ dir_path: The path to the directory to upload.
956
+ upload_id: The upload ID to use.
957
+
958
+ Raises:
959
+ FileUploadError: If there's an error uploading any file.
960
+ """
961
+ # Skip common directories that shouldn't be uploaded
962
+ if any(ignore in dir_path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS):
963
+ return
964
+
965
+ try:
966
+ # Upload all files in the directory recursively
967
+ for path in dir_path.rglob("*"):
968
+ if path.is_file() and not any(
969
+ ignore in path.parts for ignore in FILE_UPLOAD_IGNORE_PARTS
970
+ ):
971
+ # Use path relative to the original directory as file name
972
+ rel_path = path.relative_to(dir_path)
973
+ self._upload_single_file(
974
+ job_name,
975
+ path,
976
+ upload_id,
977
+ file_name=str(rel_path),
978
+ )
979
+ except Exception as e:
980
+ raise FileUploadError(f"Error uploading directory {dir_path}: {e}") from e
981
+
982
+ def _upload_single_file(
983
+ self,
984
+ job_name: str,
985
+ file_path: Path,
986
+ upload_id: str,
987
+ file_name: str | None = None,
988
+ ) -> None:
989
+ """Upload a single file in chunks.
990
+
991
+ Args:
992
+ job_name: The key of the crow to upload to.
993
+ file_path: The path to the file to upload.
994
+ upload_id: The upload ID to use.
995
+ file_name: Optional name to use for the file. If not provided, the file's name will be used.
996
+
997
+ Raises:
998
+ FileUploadError: If there's an error uploading the file.
999
+ """
1000
+ file_name = file_name or file_path.name
1001
+ file_size = file_path.stat().st_size
1002
+ total_chunks = (file_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
1003
+
1004
+ logger.info(f"Uploading {file_path} as {file_name} ({total_chunks} chunks)")
1005
+
1006
+ try:
1007
+ with open(file_path, "rb") as f:
1008
+ for chunk_index in range(total_chunks):
1009
+ # Read the chunk from the file
1010
+ f.seek(chunk_index * self.CHUNK_SIZE)
1011
+ chunk_data = f.read(self.CHUNK_SIZE)
1012
+
1013
+ # Prepare and send the chunk
1014
+ with tempfile.NamedTemporaryFile() as temp_file:
1015
+ temp_file.write(chunk_data)
1016
+ temp_file.flush()
1017
+
1018
+ # Create form data
1019
+ with open(temp_file.name, "rb") as chunk_file_obj:
1020
+ files = {
1021
+ "chunk": (
1022
+ file_name,
1023
+ chunk_file_obj,
1024
+ "application/octet-stream",
1025
+ )
1026
+ }
1027
+ data = {
1028
+ "file_name": file_name,
1029
+ "chunk_index": chunk_index,
1030
+ "total_chunks": total_chunks,
1031
+ "upload_id": upload_id,
1032
+ }
1033
+
1034
+ # Send the chunk
1035
+ response = self.multipart_client.post(
1036
+ f"/v0.1/crows/{job_name}/upload-chunk",
1037
+ files=files,
1038
+ data=data,
1039
+ )
1040
+ response.raise_for_status()
1041
+
1042
+ # Call progress callback if provided
1043
+
1044
+ logger.debug(
1045
+ f"Uploaded chunk {chunk_index + 1}/{total_chunks} of {file_name}"
1046
+ )
1047
+
1048
+ logger.info(f"Successfully uploaded {file_name}")
1049
+
1050
+ except Exception as e:
1051
+ logger.exception(f"Error uploading file {file_path}")
1052
+ raise FileUploadError(f"Error uploading file {file_path}: {e}") from e
1053
+
1054
+ @retry(
1055
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
1056
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
1057
+ retry=retry_if_connection_error,
1058
+ )
1059
+ def list_files(self, job_name: str, folder_name: str) -> dict[str, list[str]]:
1060
+ """List files and directories in a GCS location for a given job_name and upload_id.
1061
+
1062
+ Args:
1063
+ job_name: The name of the futurehouse job.
1064
+ folder_name: The specific folder name (upload_id) to list files from.
1065
+
1066
+ Returns:
1067
+ A list of files in the GCS folder.
1068
+
1069
+ Raises:
1070
+ RestClientError: If there is an error listing the files.
1071
+ """
1072
+ try:
1073
+ url = f"/v0.1/crows/{job_name}/list-files"
1074
+ params = {"upload_id": folder_name}
1075
+ response = self.client.get(url, params=params)
1076
+ response.raise_for_status()
1077
+ return response.json()
1078
+ except HTTPStatusError as e:
1079
+ logger.exception(
1080
+ f"Error listing files for job {job_name}, folder {folder_name}: {e.response.text}"
1081
+ )
1082
+ raise RestClientError(
1083
+ f"Error listing files: {e.response.status_code} - {e.response.text}"
1084
+ ) from e
1085
+ except Exception as e:
1086
+ logger.exception(
1087
+ f"Error listing files for job {job_name}, folder {folder_name}"
1088
+ )
1089
+ raise RestClientError(f"Error listing files: {e!s}") from e
1090
+
1091
+ @retry(
1092
+ stop=stop_after_attempt(MAX_RETRY_ATTEMPTS),
1093
+ wait=wait_exponential(multiplier=RETRY_MULTIPLIER, max=MAX_RETRY_WAIT),
1094
+ retry=retry_if_connection_error,
1095
+ )
1096
+ def download_file(
1097
+ self,
1098
+ job_name: str,
1099
+ folder_name: str,
1100
+ file_path: str,
1101
+ destination_path: str | os.PathLike,
1102
+ ) -> None:
1103
+ """Download a file from GCS to a local path.
1104
+
1105
+ Args:
1106
+ job_name: The name of the futurehouse job.
1107
+ folder_name: The specific folder name (upload_id) the file belongs to.
1108
+ file_path: The relative path of the file to download
1109
+ (e.g., 'data/my_file.csv' or 'my_image.png').
1110
+ destination_path: The local path where the file should be saved.
1111
+
1112
+ Raises:
1113
+ RestClientError: If there is an error downloading the file.
1114
+ FileNotFoundError: If the destination directory does not exist.
1115
+ """
1116
+ destination_path = Path(destination_path)
1117
+ # Ensure the destination directory exists
1118
+ destination_path.parent.mkdir(parents=True, exist_ok=True)
1119
+
1120
+ try:
1121
+ url = f"/v0.1/crows/{job_name}/download-file"
1122
+ params = {"upload_id": folder_name, "file_path": file_path}
1123
+
1124
+ with self.client.stream("GET", url, params=params) as response:
1125
+ response.raise_for_status() # Check for HTTP errors before streaming
1126
+ with open(destination_path, "wb") as f:
1127
+ for chunk in response.iter_bytes(chunk_size=8192):
1128
+ f.write(chunk)
1129
+ logger.info(f"File {file_path} downloaded to {destination_path}")
1130
+ except HTTPStatusError as e:
1131
+ logger.exception(
1132
+ f"Error downloading file {file_path} for job {job_name}, folder {folder_name}: {e.response.text}"
1133
+ )
1134
+ # Clean up partially downloaded file if an error occurs
1135
+ if destination_path.exists():
1136
+ destination_path.unlink()
1137
+ raise RestClientError(
1138
+ f"Error downloading file: {e.response.status_code} - {e.response.text}"
1139
+ ) from e
1140
+ except Exception as e:
1141
+ logger.exception(
1142
+ f"Error downloading file {file_path} for job {job_name}, folder {folder_name}"
1143
+ )
1144
+ if destination_path.exists():
1145
+ destination_path.unlink() # Clean up partial file
1146
+ raise RestClientError(f"Error downloading file: {e!s}") from e
1147
+
647
1148
 
648
1149
  def get_installed_packages() -> dict[str, str]:
649
1150
  """Returns a dictionary of installed packages and their versions."""
@@ -1,3 +1,5 @@
1
+ from enum import StrEnum, auto
2
+
1
3
  from pydantic import BaseModel, JsonValue
2
4
 
3
5
 
@@ -17,3 +19,18 @@ class StoreEnvironmentFrameRequest(BaseModel):
17
19
  current_agent_step: str
18
20
  state: JsonValue
19
21
  trajectory_timestep: int
22
+
23
+
24
+ class ExecutionStatus(StrEnum):
25
+ QUEUED = auto()
26
+ IN_PROGRESS = "in progress"
27
+ FAIL = auto()
28
+ SUCCESS = auto()
29
+ CANCELLED = auto()
30
+
31
+ def is_terminal_state(self) -> bool:
32
+ return self in self.terminal_states()
33
+
34
+ @classmethod
35
+ def terminal_states(cls) -> set["ExecutionStatus"]:
36
+ return {cls.SUCCESS, cls.FAIL, cls.CANCELLED}
@@ -0,0 +1,29 @@
1
+ import asyncio
2
+ from collections.abc import Awaitable, Iterable
3
+ from typing import TypeVar
4
+
5
+ from tqdm.asyncio import tqdm
6
+
7
+ T = TypeVar("T")
8
+
9
+
10
+ async def gather_with_concurrency(
11
+ n: int | asyncio.Semaphore, coros: Iterable[Awaitable[T]], progress: bool = False
12
+ ) -> list[T]:
13
+ """
14
+ Run asyncio.gather with a concurrency limit.
15
+
16
+ SEE: https://stackoverflow.com/a/61478547/2392535
17
+ """
18
+ semaphore = asyncio.Semaphore(n) if isinstance(n, int) else n
19
+
20
+ async def sem_coro(coro: Awaitable[T]) -> T:
21
+ async with semaphore:
22
+ return await coro
23
+
24
+ if progress:
25
+ return await tqdm.gather(
26
+ *(sem_coro(c) for c in coros), desc="Gathering", ncols=0
27
+ )
28
+
29
+ return await asyncio.gather(*(sem_coro(c) for c in coros))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: futurehouse-client
3
- Version: 0.3.16
3
+ Version: 0.3.17.dev94
4
4
  Summary: A client for interacting with endpoints of the FutureHouse service.
5
5
  Author-email: FutureHouse technical staff <hello@futurehouse.org>
6
6
  Classifier: Operating System :: OS Independent
@@ -19,6 +19,7 @@ Requires-Dist: litellm==1.67.4.post1
19
19
  Requires-Dist: pydantic
20
20
  Requires-Dist: python-dotenv
21
21
  Requires-Dist: tenacity
22
+ Requires-Dist: tqdm>=4.62
22
23
  Provides-Extra: dev
23
24
  Requires-Dist: black; extra == "dev"
24
25
  Requires-Dist: jupyter; extra == "dev"
@@ -30,6 +31,7 @@ Requires-Dist: pylint; extra == "dev"
30
31
  Requires-Dist: pylint-per-file-ignores; extra == "dev"
31
32
  Requires-Dist: pylint-pydantic; extra == "dev"
32
33
  Requires-Dist: pytest; extra == "dev"
34
+ Requires-Dist: pytest-asyncio; extra == "dev"
33
35
  Requires-Dist: pytest-rerunfailures; extra == "dev"
34
36
  Requires-Dist: pytest-subtests; extra == "dev"
35
37
  Requires-Dist: pytest-timeout; extra == "dev"
@@ -49,9 +51,9 @@ Documentation and tutorials for futurehouse-client, a client for interacting wit
49
51
  - [Quickstart](#quickstart)
50
52
  - [Functionalities](#functionalities)
51
53
  - [Authentication](#authentication)
52
- - [Task submission](#task-submission)
54
+ - [Simple task running](#simple-task-running)
53
55
  - [Task Continuation](#task-continuation)
54
- - [Task retrieval](#task-retrieval)
56
+ - [Asynchronous tasks](#asynchronous-tasks)
55
57
 
56
58
  <!--TOC-->
57
59
 
@@ -78,19 +80,17 @@ task_data = {
78
80
  "query": "Which neglected diseases had a treatment developed by artificial intelligence?",
79
81
  }
80
82
 
81
- task_run_id = client.create_task(task_data)
82
-
83
- task_status = client.get_task(task_run_id)
83
+ task_response = client.run_tasks_until_done(task_data)
84
84
  ```
85
85
 
86
- A quickstart example can be found in the [client_notebook.ipynb](https://github.com/Future-House/futurehouse-client-docs/blob/main/docs/client_notebook.ipynb) file, where we show how to submit and retrieve a job task, pass runtime configuration to the agent, and ask follow-up questions to the previous job.
86
+ A quickstart example can be found in the [client_notebook.ipynb](https://futurehouse.gitbook.io/futurehouse-cookbook/futurehouse-client/docs/client_notebook) file, where we show how to submit and retrieve a job task, pass runtime configuration to the agent, and ask follow-up questions to the previous job.
87
87
 
88
88
  ## Functionalities
89
89
 
90
90
  FutureHouse client implements a RestClient (called `FutureHouseClient`) with the following functionalities:
91
91
 
92
- - [Task submission](#task-submission): `create_task(TaskRequest)`
93
- - [Task status](#task-status): `get_task(task_id)`
92
+ - [Simple task running](#simple-task-running): `run_tasks_until_done(TaskRequest)` or `await arun_tasks_until_done(TaskRequest)`
93
+ - [Asynchronous tasks](#asynchronous-tasks): `get_task(task_id)` or `aget_task(task_id)` and `create_task(TaskRequest)` or `acreate_task(TaskRequest)`
94
94
 
95
95
  To create a `FutureHouseClient`, you need to pass an FutureHouse platform api key (see [Authentication](#authentication)):
96
96
 
@@ -106,9 +106,9 @@ client = FutureHouseClient(
106
106
 
107
107
  In order to use the `FutureHouseClient`, you need to authenticate yourself. Authentication is done by providing an API key, which can be obtained directly from your [profile page in the FutureHouse platform](https://platform.futurehouse.org/profile).
108
108
 
109
- ## Task submission
109
+ ## Simple task running
110
110
 
111
- In the futurehouse platform, we define the deployed combination of an agent and an environment as a `job`. To invoke a job, we need to submit a `task` (also called a `query`) to it.
111
+ In the FutureHouse platform, we define the deployed combination of an agent and an environment as a `job`. To invoke a job, we need to submit a `task` (also called a `query`) to it.
112
112
  `FutureHouseClient` can be used to submit tasks/queries to available jobs in the FutureHouse platform. Using a `FutureHouseClient` instance, you can submit tasks to the platform by calling the `create_task` method, which receives a `TaskRequest` (or a dictionary with `kwargs`) and returns the task id.
113
113
  Aiming to make the submission of tasks as simple as possible, we have created a `JobNames` `enum` that contains the available task types.
114
114
 
@@ -118,10 +118,10 @@ The available supported jobs are:
118
118
  | `JobNames.CROW` | `job-futurehouse-paperqa2` | Fast Search | Ask a question of scientific data sources, and receive a high-accuracy, cited response. Built with [PaperQA2](https://github.com/Future-House/paper-qa). |
119
119
  | `JobNames.FALCON` | `job-futurehouse-paperqa2-deep` | Deep Search | Use a plethora of sources to deeply research. Receive a detailed, structured report as a response. |
120
120
  | `JobNames.OWL` | `job-futurehouse-hasanyone` | Precedent Search | Formerly known as HasAnyone, query if anyone has ever done something in science. |
121
+ | `JobNames.PHOENIX` | `job-futurehouse-phoenix` | Chemistry Tasks | A new iteration of ChemCrow, Phoenix uses cheminformatics tools to do chemistry. Good for planning synthesis and design of new molecules. |
121
122
  | `JobNames.DUMMY` | `job-futurehouse-dummy` | Dummy Task | This is a dummy task. Mainly for testing purposes. |
122
123
 
123
- Using `JobNames`, the client automatically adapts the job name to the current stage.
124
- The task submission looks like this:
124
+ Using `JobNames`, the task submission looks like this:
125
125
 
126
126
  ```python
127
127
  from futurehouse_client import FutureHouseClient, JobNames
@@ -135,10 +135,73 @@ task_data = {
135
135
  "query": "Has anyone tested therapeutic exerkines in humans or NHPs?",
136
136
  }
137
137
 
138
- task_id = client.create_task(task_data)
138
+ task_response = client.run_tasks_until_done(task_data)
139
+
140
+ print(task_response.answer)
141
+ ```
142
+
143
+ Or if running async code:
144
+
145
+ ```python
146
+ import asyncio
147
+ from futurehouse_client import FutureHouseClient, JobNames
148
+
149
+
150
+ async def main():
151
+ client = FutureHouseClient(
152
+ api_key="your_api_key",
153
+ )
154
+
155
+ task_data = {
156
+ "name": JobNames.OWL,
157
+ "query": "Has anyone tested therapeutic exerkines in humans or NHPs?",
158
+ }
159
+
160
+ task_response = await client.arun_tasks_until_done(task_data)
161
+ print(task_response.answer)
162
+ return task_id
163
+
164
+
165
+ # For Python 3.7+
166
+ if __name__ == "__main__":
167
+ task_id = asyncio.run(main())
139
168
  ```
140
169
 
141
- `TaskRequest` has the following fields:
170
+ Note that in either the sync or the async code, collections of tasks can be given to the client to run them in a batch:
171
+
172
+ ```python
173
+ import asyncio
174
+ from futurehouse_client import FutureHouseClient, JobNames
175
+
176
+
177
+ async def main():
178
+ client = FutureHouseClient(
179
+ api_key="your_api_key",
180
+ )
181
+
182
+ task_data = [
183
+ {
184
+ "name": JobNames.OWL,
185
+ "query": "Has anyone tested therapeutic exerkines in humans or NHPs?",
186
+ },
187
+ {
188
+ "name": JobNames.CROW,
189
+ "query": "Are there any clinically validated therapeutic exerkines for humans?",
190
+ },
191
+ ]
192
+
193
+ task_responses = await client.arun_tasks_until_done(task_data)
194
+ print(task_responses[0].answer)
195
+ print(task_responses[1].answer)
196
+ return task_id
197
+
198
+
199
+ # For Python 3.7+
200
+ if __name__ == "__main__":
201
+ task_id = asyncio.run(main())
202
+ ```
203
+
204
+ `TaskRequest` can also be used to submit jobs and it has the following fields:
142
205
 
143
206
  | Field | Type | Description |
144
207
  | -------------- | ------------- | ------------------------------------------------------------------------------------------------------------------- |
@@ -148,13 +211,67 @@ task_id = client.create_task(task_data)
148
211
  | runtime_config | RuntimeConfig | Optional runtime parameters for the job |
149
212
 
150
213
  `runtime_config` can receive a `AgentConfig` object with the desired kwargs. Check the available `AgentConfig` fields in the [LDP documentation](https://github.com/Future-House/ldp/blob/main/src/ldp/agent/agent.py#L87). Besides the `AgentConfig` object, we can also pass `timeout` and `max_steps` to limit the execution time and the number of steps the agent can take.
151
- Other especialised configurations are also available but are outside the scope of this documentation.
214
+
215
+ ```python
216
+ from futurehouse_client import FutureHouseClient, JobNames
217
+ from futurehouse_client.models.app import TaskRequest
218
+
219
+ client = FutureHouseClient(
220
+ api_key="your_api_key",
221
+ )
222
+
223
+ task_response = client.run_tasks_until_done(
224
+ TaskRequest(
225
+ name=JobNames.OWL,
226
+ query="Has anyone tested therapeutic exerkines in humans or NHPs?",
227
+ )
228
+ )
229
+
230
+ print(task_response.answer)
231
+ ```
232
+
233
+ A `TaskResponse` will be returned from using our agents. For Owl, Crow, and Falcon, we default to a subclass, `PQATaskResponse` which has some key attributes:
234
+
235
+ | Field | Type | Description |
236
+ | --------------------- | ---- | ------------------------------------------------------------------------------- |
237
+ | answer | str | Answer to your query. |
238
+ | formatted_answer | str | Specially formatted answer with references. |
239
+ | has_successful_answer | bool | Flag for whether the agent was able to find a good answer to your query or not. |
240
+
241
+ If using the `verbose` setting, much more data can be pulled down from your `TaskResponse`, which will exist across all agents (not just Owl, Crow, and Falcon).
242
+
243
+ ```python
244
+ from futurehouse_client import FutureHouseClient, JobNames
245
+ from futurehouse_client.models.app import TaskRequest
246
+
247
+ client = FutureHouseClient(
248
+ api_key="your_api_key",
249
+ )
250
+
251
+ task_response = client.run_tasks_until_done(
252
+ TaskRequest(
253
+ name=JobNames.OWL,
254
+ query="Has anyone tested therapeutic exerkines in humans or NHPs?",
255
+ ),
256
+ verbose=True,
257
+ )
258
+
259
+ print(task_response.environment_frame)
260
+ ```
261
+
262
+ In that case, a `TaskResponseVerbose` will have the following fields:
263
+
264
+ | Field | Type | Description |
265
+ | ----------------- | ---- | ---------------------------------------------------------------------------------------------------------------------- | --- |
266
+ | agent_state | dict | Large object with all agent states during the progress of your task. |
267
+ | environment_frame | dict | Large nested object with all environment data, for PQA environments it includes contexts, paper metadata, and answers. |
268
+ | metadata | dict | Extra metadata about your query. | |
152
269
 
153
270
  ## Task Continuation
154
271
 
155
272
  Once a task is submitted and the answer is returned, FutureHouse platform allow you to ask follow-up questions to the previous task.
156
273
  It is also possible through the platform API.
157
- To accomplish that, we can use the `runtime_config` we discussed in the [Task submission](#task-submission) section.
274
+ To accomplish that, we can use the `runtime_config` we discussed in the [Simple task running](#simple-task-running) section.
158
275
 
159
276
  ```python
160
277
  from futurehouse_client import FutureHouseClient, JobNames
@@ -173,12 +290,12 @@ continued_task_data = {
173
290
  "runtime_config": {"continued_task_id": task_id},
174
291
  }
175
292
 
176
- continued_task_id = client.create_task(continued_task_data)
293
+ task_result = client.run_tasks_until_done(continued_task_data)
177
294
  ```
178
295
 
179
- ## Task retrieval
296
+ ## Asynchronous tasks
180
297
 
181
- Once a task is submitted, you can retrieve it by calling the `get_task` method, which receives a task id and returns a `TaskResponse` object.
298
+ Sometimes you may want to submit many jobs, while querying results at a later time. In this way you can do other things while waiting for a response. The platform API supports this as well rather than waiting for a result.
182
299
 
183
300
  ```python
184
301
  from futurehouse_client import FutureHouseClient
@@ -187,9 +304,13 @@ client = FutureHouseClient(
187
304
  api_key="your_api_key",
188
305
  )
189
306
 
190
- task_id = "task_id"
307
+ task_data = {"name": JobNames.CROW, "query": "How many species of birds are there?"}
308
+
309
+ task_id = client.create_task(task_data)
310
+
311
+ # move on to do other things
191
312
 
192
313
  task_status = client.get_task(task_id)
193
314
  ```
194
315
 
195
- `task_status` contains information about the task. For instance, its `status`, `task`, `environment_name` and `agent_name`, and other fields specific to the job.
316
+ `task_status` contains information about the task. For instance, its `status`, `task`, `environment_name` and `agent_name`, and other fields specific to the job. You can continually query the status until it's `success` before moving on.
@@ -0,0 +1,16 @@
1
+ futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
2
+ futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
3
+ futurehouse_client/clients/job_client.py,sha256=Fi3YvN4k82AuXCe8vlwxhkK8CXS164NQrs7paj9qIek,11096
4
+ futurehouse_client/clients/rest_client.py,sha256=dsUmpgV5sfyb4GDv6whWVwRN1z2LOfZsPF8vjoioNfY,45472
5
+ futurehouse_client/models/__init__.py,sha256=ta3jFLM_LsDz1rKDmx8rja8sT7WtSKoFvMgLF0yFpvA,342
6
+ futurehouse_client/models/app.py,sha256=yfZ9tyw4VATVAfYrU7aTdCNPSljLEho09_nIbh8oZDY,23174
7
+ futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
8
+ futurehouse_client/models/rest.py,sha256=lgwkMIXz0af-49BYSkKeS7SRqvN3motqnAikDN4YGTc,789
9
+ futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ futurehouse_client/utils/general.py,sha256=A_rtTiYW30ELGEZlWCIArO7q1nEmqi8hUlmBRYkMQ_c,767
11
+ futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
12
+ futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
13
+ futurehouse_client-0.3.17.dev94.dist-info/METADATA,sha256=acLPon9oE1ecVZzz8JrpumcSLmhRkqGGG62gjGEW1IQ,12766
14
+ futurehouse_client-0.3.17.dev94.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
15
+ futurehouse_client-0.3.17.dev94.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
16
+ futurehouse_client-0.3.17.dev94.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- futurehouse_client/__init__.py,sha256=ddxO7JE97c6bt7LjNglZZ2Ql8bYCGI9laSFeh9MP6VU,344
2
- futurehouse_client/clients/__init__.py,sha256=tFWqwIAY5PvwfOVsCje4imjTpf6xXNRMh_UHIKVI1_0,320
3
- futurehouse_client/clients/job_client.py,sha256=yBFKDNcFnuZDNgoK2d5037rbuzQ7TlSK6MmklEKV8EA,11056
4
- futurehouse_client/clients/rest_client.py,sha256=Dc29QRNZMO4uxaXNGKyx18Tn-vLaJ6P5fCbM_0u-Z3I,26379
5
- futurehouse_client/models/__init__.py,sha256=ta3jFLM_LsDz1rKDmx8rja8sT7WtSKoFvMgLF0yFpvA,342
6
- futurehouse_client/models/app.py,sha256=yfZ9tyw4VATVAfYrU7aTdCNPSljLEho09_nIbh8oZDY,23174
7
- futurehouse_client/models/client.py,sha256=n4HD0KStKLm6Ek9nL9ylP-bkK10yzAaD1uIDF83Qp_A,1828
8
- futurehouse_client/models/rest.py,sha256=W-wNFTN7HALYFFphw-RQYRMm6_TSa1cl4T-mZ1msk90,393
9
- futurehouse_client/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- futurehouse_client/utils/module_utils.py,sha256=aFyd-X-pDARXz9GWpn8SSViUVYdSbuy9vSkrzcVIaGI,4955
11
- futurehouse_client/utils/monitoring.py,sha256=UjRlufe67kI3VxRHOd5fLtJmlCbVA2Wqwpd4uZhXkQM,8728
12
- futurehouse_client-0.3.16.dist-info/METADATA,sha256=uCvzXKeI6i8PRvike8YKVa7-IJQAwL8G2ILjjOf6xIo,8175
13
- futurehouse_client-0.3.16.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
14
- futurehouse_client-0.3.16.dist-info/top_level.txt,sha256=TRuLUCt_qBnggdFHCX4O_BoCu1j2X43lKfIZC-ElwWY,19
15
- futurehouse_client-0.3.16.dist-info/RECORD,,