PyPI - fal - Versions diffs - 1.3.3__py3-none-any.whl → 1.7.3__py3-none-any.whl - Mend

fal 1.3.3py3-none-any.whl → 1.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of fal might be problematic. Click here for more details.

Files changed (31) hide show

fal/_fal_version.py +2 -2
fal/api.py +46 -14
fal/app.py +157 -17
fal/apps.py +138 -3
fal/auth/__init__.py +50 -2
fal/cli/_utils.py +8 -2
fal/cli/apps.py +1 -1
fal/cli/deploy.py +34 -8
fal/cli/main.py +2 -2
fal/cli/run.py +1 -1
fal/cli/runners.py +44 -0
fal/config.py +23 -0
fal/container.py +1 -1
fal/sdk.py +34 -9
fal/toolkit/file/file.py +92 -19
fal/toolkit/file/providers/fal.py +571 -83
fal/toolkit/file/providers/gcp.py +8 -1
fal/toolkit/file/providers/r2.py +8 -1
fal/toolkit/file/providers/s3.py +80 -0
fal/toolkit/file/types.py +11 -4
fal/toolkit/image/__init__.py +3 -3
fal/toolkit/image/image.py +25 -2
fal/toolkit/types.py +140 -0
fal/toolkit/utils/download_utils.py +4 -0
fal/toolkit/utils/retry.py +45 -0
fal/workflows.py +10 -4
{fal-1.3.3.dist-info → fal-1.7.3.dist-info}/METADATA +14 -9
{fal-1.3.3.dist-info → fal-1.7.3.dist-info}/RECORD +31 -26
{fal-1.3.3.dist-info → fal-1.7.3.dist-info}/WHEEL +1 -1
{fal-1.3.3.dist-info → fal-1.7.3.dist-info}/entry_points.txt +0 -0
{fal-1.3.3.dist-info → fal-1.7.3.dist-info}/top_level.txt +0 -0

fal/apps.py CHANGED Viewed

@@ -4,15 +4,19 @@ import json
 import time
 from contextlib import contextmanager
 from dataclasses import dataclass, field
-from typing import Any, Iterator
+from typing import TYPE_CHECKING, Any, Iterator
 import httpx
 from fal import flags
 from fal.sdk import Credentials, get_default_credentials
+if TYPE_CHECKING:
+    from websockets.sync.connection import Connection
 _QUEUE_URL_FORMAT = f"https://queue.{flags.FAL_RUN_HOST}/{{app_id}}"
 _REALTIME_URL_FORMAT = f"wss://{flags.FAL_RUN_HOST}/{{app_id}}"
+_WS_URL_FORMAT = f"wss://ws.{flags.FAL_RUN_HOST}/{{app_id}}"
 def _backwards_compatible_app_id(app_id: str) -> str:
@@ -173,7 +177,8 @@ def submit(app_id: str, arguments: dict[str, Any], *, path: str = "") -> Request
     app_id = _backwards_compatible_app_id(app_id)
     url = _QUEUE_URL_FORMAT.format(app_id=app_id)
     if path:
-        url += "/" + path.removeprefix("/")
+        _path = path[len("/") :] if path.startswith("/") else path
+        url += "/" + _path
     creds = get_default_credentials()
@@ -235,7 +240,8 @@ def _connect(app_id: str, *, path: str = "/realtime") -> Iterator[_RealtimeConne
     app_id = _backwards_compatible_app_id(app_id)
     url = _REALTIME_URL_FORMAT.format(app_id=app_id)
     if path:
-        url += "/" + path.removeprefix("/")
+        _path = path[len("/") :] if path.startswith("/") else path
+        url += "/" + _path
     creds = get_default_credentials()
@@ -243,3 +249,132 @@ def _connect(app_id: str, *, path: str = "/realtime") -> Iterator[_RealtimeConne
         url, additional_headers=creds.to_headers(), open_timeout=90
     ) as ws:
         yield _RealtimeConnection(ws)
+class _MetaMessageFound(Exception): ...
+@dataclass
+class _WSConnection:
+    """A WS connection to an HTTP Fal app."""
+    _ws: Connection
+    _buffer: str | bytes | None = None
+    def run(self, arguments: dict[str, Any]) -> bytes:
+        """Run an inference task on the app and return the result."""
+        self.send(arguments)
+        return self.recv()
+    def send(self, arguments: dict[str, Any]) -> None:
+        import json
+        payload = json.dumps(arguments)
+        self._ws.send(payload)
+    def _peek(self) -> bytes | str:
+        if self._buffer is None:
+            self._buffer = self._ws.recv()
+        return self._buffer
+    def _consume(self) -> None:
+        if self._buffer is None:
+            raise ValueError("No data to consume")
+        self._buffer = None
+    @contextmanager
+    def _recv(self) -> Iterator[str | bytes]:
+        res = self._peek()
+        yield res
+        # Only consume if it went through the context manager without raising
+        self._consume()
+    def _is_meta(self, res: str | bytes) -> bool:
+        if not isinstance(res, str):
+            return False
+        try:
+            json_payload: Any = json.loads(res)
+        except json.JSONDecodeError:
+            return False
+        if not isinstance(json_payload, dict):
+            return False
+        return "type" in json_payload and "request_id" in json_payload
+    def _recv_meta(self, type: str) -> dict[str, Any]:
+        with self._recv() as res:
+            if not self._is_meta(res):
+                raise ValueError(f"Expected a {type} message")
+            json_payload: dict = json.loads(res)
+            if json_payload.get("type") != type:
+                raise ValueError(f"Expected a {type} message")
+            return json_payload
+    def _recv_response(self) -> Iterator[str | bytes]:
+        while True:
+            try:
+                with self._recv() as res:
+                    if self._is_meta(res):
+                        # Raise so we dont consume the message
+                        raise _MetaMessageFound()
+                    yield res
+            except _MetaMessageFound:
+                break
+    def recv(self) -> bytes:
+        start = self._recv_meta("start")
+        request_id = start["request_id"]
+        response = b""
+        for part in self._recv_response():
+            if isinstance(part, str):
+                response += part.encode()
+            else:
+                response += part
+        end = self._recv_meta("end")
+        if end["request_id"] != request_id:
+            raise ValueError("Mismatched request_id in end message")
+        return response
+    def stream(self) -> Iterator[str | bytes]:
+        start = self._recv_meta("start")
+        request_id = start["request_id"]
+        yield from self._recv_response()
+        # Make sure we consume the end message
+        end = self._recv_meta("end")
+        if end["request_id"] != request_id:
+            raise ValueError("Mismatched request_id in end message")
+@contextmanager
+def ws(app_id: str, *, path: str = "") -> Iterator[_WSConnection]:
+    """Connect to a HTTP endpoint but with websocket protocol. This is an internal and
+    experimental API, use it at your own risk."""
+    from websockets.sync import client
+    app_id = _backwards_compatible_app_id(app_id)
+    url = _WS_URL_FORMAT.format(app_id=app_id)
+    if path:
+        _path = path[len("/") :] if path.startswith("/") else path
+        url += "/" + _path
+    creds = get_default_credentials()
+    with client.connect(
+        url, additional_headers=creds.to_headers(), open_timeout=90
+    ) as ws:
+        yield _WSConnection(ws)

fal/auth/__init__.py CHANGED Viewed

@@ -2,22 +2,70 @@ from __future__ import annotations
 import os
 from dataclasses import dataclass, field
+from threading import Lock
+from typing import Optional
 import click
 from fal.auth import auth0, local
+from fal.config import Config
 from fal.console import console
 from fal.console.icons import CHECK_ICON
 from fal.exceptions.auth import UnauthenticatedException
+class GoogleColabState:
+    def __init__(self):
+        self.is_checked = False
+        self.lock = Lock()
+        self.secret: Optional[str] = None
+_colab_state = GoogleColabState()
+def is_google_colab() -> bool:
+    try:
+        from IPython import get_ipython
+        return "google.colab" in str(get_ipython())
+    except ModuleNotFoundError:
+        return False
+    except NameError:
+        return False
+def get_colab_token() -> Optional[str]:
+    if not is_google_colab():
+        return None
+    with _colab_state.lock:
+        if _colab_state.is_checked:  # request access only once
+            return _colab_state.secret
+        try:
+            from google.colab import userdata  # noqa: I001
+        except ImportError:
+            return None
+        try:
+            token = userdata.get("FAL_KEY")
+            _colab_state.secret = token.strip()
+        except Exception:
+            _colab_state.secret = None
+        _colab_state.is_checked = True
+        return _colab_state.secret
 def key_credentials() -> tuple[str, str] | None:
     # Ignore key credentials when the user forces auth by user.
     if os.environ.get("FAL_FORCE_AUTH_BY_USER") == "1":
         return None
-    if "FAL_KEY" in os.environ:
-        key = os.environ["FAL_KEY"]
+    config = Config()
+    key = os.environ.get("FAL_KEY") or config.get("key") or get_colab_token()
+    if key:
         key_id, key_secret = key.split(":", 1)
         return (key_id, key_secret)
     elif "FAL_KEY_ID" in os.environ and "FAL_KEY_SECRET" in os.environ:

fal/cli/_utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from fal.files import find_pyproject_toml, parse_pyproject_toml
+from fal.files import find_project_root, find_pyproject_toml, parse_pyproject_toml
 def is_app_name(app_ref: tuple[str, str | None]) -> bool:
@@ -29,6 +29,12 @@ def get_app_data_from_toml(app_name):
     except KeyError:
         raise ValueError(f"App {app_name} does not have a ref key in pyproject.toml")
+    # Convert the app_ref to a path relative to the project root
+    project_root, _ = find_project_root(None)
+    app_ref = str(project_root / app_ref)
     app_auth = app_data.get("auth", "private")
+    app_deployment_strategy = app_data.get("deployment_strategy", "recreate")
+    app_no_scale = app_data.get("no_scale", False)
-    return app_ref, app_auth
+    return app_ref, app_auth, app_deployment_strategy, app_no_scale

fal/cli/apps.py CHANGED Viewed

@@ -221,7 +221,7 @@ def _runners(args):
             str(runner.in_flight_requests),
             (
                 "N/A (active)"
-                if not runner.expiration_countdown
+                if runner.expiration_countdown is None
                 else f"{runner.expiration_countdown}s"
             ),
             f"{runner.uptime} ({runner.uptime.total_seconds()}s)",

fal/cli/deploy.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import argparse
 from collections import namedtuple
 from pathlib import Path
-from typing import Optional, Union
+from typing import Literal, Optional, Tuple, Union
 from ._utils import get_app_data_from_toml, is_app_name
 from .parser import FalClientParser, RefAction
@@ -63,7 +63,12 @@ def _get_user() -> User:
 def _deploy_from_reference(
-    app_ref: tuple[Optional[Union[Path, str]], ...], app_name: str, auth: str, args
+    app_ref: Tuple[Optional[Union[Path, str]], ...],
+    app_name: str,
+    args,
+    auth: Optional[Literal["public", "shared", "private"]] = None,
+    deployment_strategy: Optional[Literal["recreate", "rolling"]] = None,
+    no_scale: bool = False,
 ):
     from fal.api import FalServerlessError, FalServerlessHost
     from fal.utils import load_function_from
@@ -93,7 +98,7 @@ def _deploy_from_reference(
     isolated_function = loaded.function
     app_name = app_name or loaded.app_name  # type: ignore
     app_auth = auth or loaded.app_auth or "private"
-    deployment_strategy = args.strategy or "default"
+    deployment_strategy = deployment_strategy or "recreate"
     app_id = host.register(
         func=isolated_function.func,
@@ -102,6 +107,7 @@ def _deploy_from_reference(
         application_auth_mode=app_auth,
         metadata=isolated_function.options.host.get("metadata", {}),
         deployment_strategy=deployment_strategy,
+        scale=not no_scale,
     )
     if app_id:
@@ -134,7 +140,9 @@ def _deploy(args):
             raise ValueError("Cannot use --app-name or --auth with app name reference.")
         app_name = args.app_ref[0]
-        app_ref, app_auth = get_app_data_from_toml(app_name)
+        app_ref, app_auth, app_deployment_strategy, app_no_scale = (
+            get_app_data_from_toml(app_name)
+        )
         file_path, func_name = RefAction.split_ref(app_ref)
     # path/to/myfile.py::MyApp
@@ -142,8 +150,17 @@ def _deploy(args):
         file_path, func_name = args.app_ref
         app_name = args.app_name
         app_auth = args.auth
-    _deploy_from_reference((file_path, func_name), app_name, app_auth, args)
+        app_deployment_strategy = args.strategy
+        app_no_scale = args.no_scale
+    _deploy_from_reference(
+        (file_path, func_name),
+        app_name,
+        args,
+        app_auth,
+        app_deployment_strategy,
+        app_no_scale,
+    )
 def add_parser(main_subparsers, parents):
@@ -204,9 +221,18 @@ def add_parser(main_subparsers, parents):
     )
     parser.add_argument(
         "--strategy",
-        choices=["default", "rolling"],
+        choices=["recreate", "rolling"],
         help="Deployment strategy.",
-        default="default",
+        default="recreate",
+    )
+    parser.add_argument(
+        "--no-scale",
+        action="store_true",
+        help=(
+            "Use min_concurrency/max_concurrency/max_multiplexing from previous "
+            "deployment of application with this name, if exists. Otherwise will "
+            "use the values from the application code."
+        ),
     )
     parser.set_defaults(func=_deploy)

fal/cli/main.py CHANGED Viewed

@@ -6,7 +6,7 @@ from fal import __version__
 from fal.console import console
 from fal.console.icons import CROSS_ICON
-from . import apps, auth, create, deploy, doctor, keys, run, secrets
+from . import apps, auth, create, deploy, doctor, keys, run, runners, secrets
 from .debug import debugtools, get_debug_parser
 from .parser import FalParser, FalParserExit
@@ -31,7 +31,7 @@ def _get_main_parser() -> argparse.ArgumentParser:
         required=True,
     )
-    for cmd in [auth, apps, deploy, run, keys, secrets, doctor, create]:
+    for cmd in [auth, apps, deploy, run, keys, secrets, doctor, create, runners]:
         cmd.add_parser(subparsers, parents)
     return parser

fal/cli/run.py CHANGED Viewed

@@ -10,7 +10,7 @@ def _run(args):
     if is_app_name(args.func_ref):
         app_name = args.func_ref[0]
-        app_ref, _ = get_app_data_from_toml(app_name)
+        app_ref, *_ = get_app_data_from_toml(app_name)
         file_path, func_name = RefAction.split_ref(app_ref)
     else:
         file_path, func_name = args.func_ref

fal/cli/runners.py ADDED Viewed

@@ -0,0 +1,44 @@
+from .parser import FalClientParser
+def _kill(args):
+    from fal.sdk import FalServerlessClient
+    client = FalServerlessClient(args.host)
+    with client.connect() as connection:
+        connection.kill_runner(args.id)
+def _add_kill_parser(subparsers, parents):
+    kill_help = "Kill a runner."
+    parser = subparsers.add_parser(
+        "kill",
+        description=kill_help,
+        help=kill_help,
+        parents=parents,
+    )
+    parser.add_argument(
+        "id",
+        help="Runner ID.",
+    )
+    parser.set_defaults(func=_kill)
+def add_parser(main_subparsers, parents):
+    runners_help = "Manage fal runners."
+    parser = main_subparsers.add_parser(
+        "runners",
+        description=runners_help,
+        help=runners_help,
+        parents=parents,
+        aliases=["machine"],  # backwards compatibility
+    )
+    subparsers = parser.add_subparsers(
+        title="Commands",
+        metavar="command",
+        required=True,
+        parser_class=FalClientParser,
+    )
+    _add_kill_parser(subparsers, parents)

fal/config.py ADDED Viewed

@@ -0,0 +1,23 @@
+import os
+import tomli
+class Config:
+    DEFAULT_CONFIG_PATH = "~/.fal/config.toml"
+    DEFAULT_PROFILE = "default"
+    def __init__(self):
+        self.config_path = os.path.expanduser(
+            os.getenv("FAL_CONFIG_PATH", self.DEFAULT_CONFIG_PATH)
+        )
+        self.profile = os.getenv("FAL_PROFILE", self.DEFAULT_PROFILE)
+        try:
+            with open(self.config_path, "rb") as file:
+                self.config = tomli.load(file)
+        except FileNotFoundError:
+            self.config = {}
+    def get(self, key):
+        return self.config.get(self.profile, {}).get(key)

fal/container.py CHANGED Viewed

@@ -3,7 +3,7 @@ class ContainerImage:
     from a Dockerfile.
     """
-    _known_keys = {"dockerfile_str", "build_env", "build_args"}
+    _known_keys = {"dockerfile_str", "build_args", "registries", "builder"}
     @classmethod
     def from_dockerfile_str(cls, text: str, **kwargs):

fal/sdk.py CHANGED Viewed

@@ -5,7 +5,7 @@ from contextlib import ExitStack
 from dataclasses import dataclass, field
 from datetime import datetime, timedelta
 from enum import Enum
-from typing import Any, Callable, Generic, Iterator, Literal, TypeVar
+from typing import Any, Callable, Generic, Iterator, Literal, Optional, TypeVar
 import grpc
 import isolate_proto
@@ -214,7 +214,7 @@ class AliasInfo:
 class RunnerInfo:
     runner_id: str
     in_flight_requests: int
-    expiration_countdown: int
+    expiration_countdown: Optional[int]
     uptime: timedelta
@@ -344,7 +344,9 @@ def _from_grpc_runner_info(message: isolate_proto.RunnerInfo) -> RunnerInfo:
     return RunnerInfo(
         runner_id=message.runner_id,
         in_flight_requests=message.in_flight_requests,
-        expiration_countdown=message.expiration_countdown,
+        expiration_countdown=message.expiration_countdown
+        if message.HasField("expiration_countdown")
+        else None,
         uptime=timedelta(seconds=message.uptime),
     )
@@ -389,7 +391,8 @@ def _from_grpc_hosted_run_result(
 @dataclass
 class MachineRequirements:
-    machine_type: str
+    machine_types: list[str]
+    num_gpus: int | None = field(default=None)
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE
     base_image: str | None = None
     exposed_port: int | None = None
@@ -398,6 +401,17 @@ class MachineRequirements:
     max_concurrency: int | None = None
     max_multiplexing: int | None = None
     min_concurrency: int | None = None
+    request_timeout: int | None = None
+    def __post_init__(self):
+        if isinstance(self.machine_types, str):
+            self.machine_types = [self.machine_types]
+        if not isinstance(self.machine_types, list):
+            raise ValueError("machine_types must be a list of strings.")
+        if not self.machine_types:
+            raise ValueError("No machine type provided.")
 @dataclass
@@ -485,11 +499,15 @@ class FalServerlessConnection:
         machine_requirements: MachineRequirements | None = None,
         metadata: dict[str, Any] | None = None,
         deployment_strategy: Literal["recreate", "rolling"] = "recreate",
+        scale: bool = True,
     ) -> Iterator[isolate_proto.RegisterApplicationResult]:
         wrapped_function = to_serialized_object(function, serialization_method)
         if machine_requirements:
             wrapped_requirements = isolate_proto.MachineRequirements(
-                machine_type=machine_requirements.machine_type,
+                # NOTE: backwards compatibility with old API
+                machine_type=machine_requirements.machine_types[0],
+                machine_types=machine_requirements.machine_types,
+                num_gpus=machine_requirements.num_gpus,
                 keep_alive=machine_requirements.keep_alive,
                 base_image=machine_requirements.base_image,
                 exposed_port=machine_requirements.exposed_port,
@@ -500,6 +518,7 @@ class FalServerlessConnection:
                 max_concurrency=machine_requirements.max_concurrency,
                 min_concurrency=machine_requirements.min_concurrency,
                 max_multiplexing=machine_requirements.max_multiplexing,
+                request_timeout=machine_requirements.request_timeout,
             )
         else:
             wrapped_requirements = None
@@ -516,9 +535,6 @@ class FalServerlessConnection:
             struct_metadata = isolate_proto.Struct()
             struct_metadata.update(metadata)
-        if deployment_strategy == "default":
-            deployment_strategy = "recreate"
         deployment_strategy_proto = DeploymentStrategy[
             deployment_strategy.upper()
         ].to_proto()
@@ -531,6 +547,7 @@ class FalServerlessConnection:
             auth_mode=auth_mode,
             metadata=struct_metadata,
             deployment_strategy=deployment_strategy_proto,
+            scale=scale,
         )
         for partial_result in self.stub.RegisterApplication(request):
             yield from_grpc(partial_result)
@@ -582,7 +599,10 @@ class FalServerlessConnection:
         wrapped_function = to_serialized_object(function, serialization_method)
         if machine_requirements:
             wrapped_requirements = isolate_proto.MachineRequirements(
-                machine_type=machine_requirements.machine_type,
+                # NOTE: backwards compatibility with old API
+                machine_type=machine_requirements.machine_types[0],
+                machine_types=machine_requirements.machine_types,
+                num_gpus=machine_requirements.num_gpus,
                 keep_alive=machine_requirements.keep_alive,
                 base_image=machine_requirements.base_image,
                 exposed_port=machine_requirements.exposed_port,
@@ -593,6 +613,7 @@ class FalServerlessConnection:
                 max_concurrency=machine_requirements.max_concurrency,
                 max_multiplexing=machine_requirements.max_multiplexing,
                 min_concurrency=machine_requirements.min_concurrency,
+                request_timeout=machine_requirements.request_timeout,
             )
         else:
             wrapped_requirements = None
@@ -665,3 +686,7 @@ class FalServerlessConnection:
             )
             for secret in response.secrets
         ]
+    def kill_runner(self, runner_id: str) -> None:
+        request = isolate_proto.KillRunnerRequest(runner_id=runner_id)
+        self.stub.KillRunner(request)

fal 1.3.3__py3-none-any.whl → 1.7.3__py3-none-any.whl

Potentially problematic release.

fal 1.3.3py3-none-any.whl → 1.7.3py3-none-any.whl