PyPI - fal - Versions diffs - 1.3.3__py3-none-any.whl → 1.7.2__py3-none-any.whl - Mend

fal 1.3.3py3-none-any.whl → 1.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of fal might be problematic. Click here for more details.

Files changed (31) hide show

fal/_fal_version.py +2 -2
fal/api.py +46 -14
fal/app.py +157 -17
fal/apps.py +138 -3
fal/auth/__init__.py +50 -2
fal/cli/_utils.py +8 -2
fal/cli/apps.py +1 -1
fal/cli/deploy.py +34 -8
fal/cli/main.py +2 -2
fal/cli/run.py +1 -1
fal/cli/runners.py +44 -0
fal/config.py +23 -0
fal/container.py +1 -1
fal/sdk.py +34 -9
fal/toolkit/file/file.py +92 -19
fal/toolkit/file/providers/fal.py +418 -46
fal/toolkit/file/providers/gcp.py +8 -1
fal/toolkit/file/providers/r2.py +8 -1
fal/toolkit/file/providers/s3.py +80 -0
fal/toolkit/file/types.py +11 -4
fal/toolkit/image/__init__.py +3 -3
fal/toolkit/image/image.py +25 -2
fal/toolkit/types.py +140 -0
fal/toolkit/utils/download_utils.py +4 -0
fal/toolkit/utils/retry.py +45 -0
fal/workflows.py +10 -4
{fal-1.3.3.dist-info → fal-1.7.2.dist-info}/METADATA +14 -9
{fal-1.3.3.dist-info → fal-1.7.2.dist-info}/RECORD +31 -26
{fal-1.3.3.dist-info → fal-1.7.2.dist-info}/WHEEL +1 -1
{fal-1.3.3.dist-info → fal-1.7.2.dist-info}/entry_points.txt +0 -0
{fal-1.3.3.dist-info → fal-1.7.2.dist-info}/top_level.txt +0 -0

fal/_fal_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.3.3'
-__version_tuple__ = version_tuple = (1, 3, 3)
+__version__ = version = '1.7.2'
+__version_tuple__ = version_tuple = (1, 7, 2)

fal/api.py CHANGED Viewed

@@ -76,6 +76,8 @@ SERVE_REQUIREMENTS = [
     f"pydantic=={pydantic_version}",
     "uvicorn",
     "starlette_exporter",
+    "structlog",
+    "tomli",
 ]
@@ -170,6 +172,7 @@ class Host(Generic[ArgsT, ReturnT]):
         application_name: str | None = None,
         application_auth_mode: Literal["public", "shared", "private"] | None = None,
         metadata: dict[str, Any] | None = None,
+        scale: bool = True,
     ) -> str | None:
         """Register the given function on the host for API call execution."""
         raise NotImplementedError
@@ -389,12 +392,15 @@ class FalServerlessHost(Host):
     _SUPPORTED_KEYS = frozenset(
         {
             "machine_type",
+            "machine_types",
+            "num_gpus",
             "keep_alive",
             "max_concurrency",
             "min_concurrency",
             "max_multiplexing",
             "setup_function",
             "metadata",
+            "request_timeout",
             "_base_image",
             "_scheduler",
             "_scheduler_options",
@@ -426,25 +432,27 @@ class FalServerlessHost(Host):
         application_auth_mode: Literal["public", "shared", "private"] | None = None,
         metadata: dict[str, Any] | None = None,
         deployment_strategy: Literal["recreate", "rolling"] = "recreate",
+        scale: bool = True,
     ) -> str | None:
         environment_options = options.environment.copy()
         environment_options.setdefault("python_version", active_python())
         environments = [self._connection.define_environment(**environment_options)]
-        machine_type = options.host.get(
+        machine_type: list[str] | str = options.host.get(
             "machine_type", FAL_SERVERLESS_DEFAULT_MACHINE_TYPE
         )
         keep_alive = options.host.get("keep_alive", FAL_SERVERLESS_DEFAULT_KEEP_ALIVE)
-        max_concurrency = options.host.get("max_concurrency")
-        min_concurrency = options.host.get("min_concurrency")
-        max_multiplexing = options.host.get("max_multiplexing")
         base_image = options.host.get("_base_image", None)
         scheduler = options.host.get("_scheduler", None)
         scheduler_options = options.host.get("_scheduler_options", None)
+        max_concurrency = options.host.get("max_concurrency")
+        min_concurrency = options.host.get("min_concurrency")
+        max_multiplexing = options.host.get("max_multiplexing")
         exposed_port = options.get_exposed_port()
+        request_timeout = options.host.get("request_timeout")
         machine_requirements = MachineRequirements(
-            machine_type=machine_type,
+            machine_types=machine_type,  # type: ignore
+            num_gpus=options.host.get("num_gpus"),
             keep_alive=keep_alive,
             base_image=base_image,
             exposed_port=exposed_port,
@@ -453,6 +461,7 @@ class FalServerlessHost(Host):
             max_multiplexing=max_multiplexing,
             max_concurrency=max_concurrency,
             min_concurrency=min_concurrency,
+            request_timeout=request_timeout,
         )
         partial_func = _prepare_partial_func(func)
@@ -479,6 +488,7 @@ class FalServerlessHost(Host):
             machine_requirements=machine_requirements,
             metadata=metadata,
             deployment_strategy=deployment_strategy,
+            scale=scale,
         ):
             for log in partial_result.logs:
                 self._log_printer.print(log)
@@ -501,7 +511,7 @@ class FalServerlessHost(Host):
         environment_options.setdefault("python_version", active_python())
         environments = [self._connection.define_environment(**environment_options)]
-        machine_type = options.host.get(
+        machine_type: list[str] | str = options.host.get(
             "machine_type", FAL_SERVERLESS_DEFAULT_MACHINE_TYPE
         )
         keep_alive = options.host.get("keep_alive", FAL_SERVERLESS_DEFAULT_KEEP_ALIVE)
@@ -513,9 +523,11 @@ class FalServerlessHost(Host):
         scheduler_options = options.host.get("_scheduler_options", None)
         exposed_port = options.get_exposed_port()
         setup_function = options.host.get("setup_function", None)
+        request_timeout = options.host.get("request_timeout")
         machine_requirements = MachineRequirements(
-            machine_type=machine_type,
+            machine_types=machine_type,  # type: ignore
+            num_gpus=options.host.get("num_gpus"),
             keep_alive=keep_alive,
             base_image=base_image,
             exposed_port=exposed_port,
@@ -524,6 +536,7 @@ class FalServerlessHost(Host):
             max_multiplexing=max_multiplexing,
             max_concurrency=max_concurrency,
             min_concurrency=min_concurrency,
+            request_timeout=request_timeout,
         )
         return_value = _UNSET
@@ -684,10 +697,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -709,10 +724,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -784,10 +801,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -814,10 +833,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -838,10 +859,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -862,10 +885,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -950,6 +975,8 @@ class RouteSignature(NamedTuple):
 class BaseServable:
+    version: ClassVar[str] = "unknown"
     def collect_routes(self) -> dict[RouteSignature, Callable[..., Any]]:
         raise NotImplementedError
@@ -1078,9 +1105,14 @@ class BaseServable:
     def serve(self) -> None:
         import asyncio
+        from prometheus_client import Gauge
         from starlette_exporter import handle_metrics
         from uvicorn import Config
+        # NOTE: this uses the global prometheus registry
+        app_info = Gauge("fal_app_info", "Fal application information", ["version"])
+        app_info.labels(version=self.version).set(1)
         app = self._build_app()
         server = Server(
             config=Config(app, host="0.0.0.0", port=8080, timeout_keep_alive=300)

fal/app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import asyncio
 import inspect
 import json
 import os
@@ -8,20 +9,25 @@ import re
 import threading
 import time
 import typing
-from contextlib import asynccontextmanager, contextmanager
+from contextlib import AsyncExitStack, asynccontextmanager, contextmanager
+from dataclasses import dataclass
 from typing import Any, Callable, ClassVar, Literal, TypeVar
+import fastapi
+import grpc.aio as async_grpc
 import httpx
-from fastapi import FastAPI
+from isolate.server import definitions
 import fal.api
 from fal._serialization import include_modules_from
 from fal.api import RouteSignature
-from fal.exceptions import RequestCancelledException
+from fal.exceptions import FalServerlessException, RequestCancelledException
 from fal.logging import get_logger
-from fal.toolkit.file.providers import fal as fal_provider_module
+from fal.toolkit.file import request_lifecycle_preference
+from fal.toolkit.file.providers.fal import LIFECYCLE_PREFERENCE
 REALTIME_APP_REQUIREMENTS = ["websockets", "msgpack"]
+REQUEST_ID_KEY = "x-fal-request-id"
 EndpointT = TypeVar("EndpointT", bound=Callable[..., Any])
 logger = get_logger(__name__)
@@ -34,6 +40,56 @@ async def _call_any_fn(fn, *args, **kwargs):
         return fn(*args, **kwargs)
+async def open_isolate_channel(address: str) -> async_grpc.Channel:
+    _stack = AsyncExitStack()
+    channel = await _stack.enter_async_context(
+        async_grpc.insecure_channel(
+            address,
+            options=[
+                ("grpc.max_send_message_length", -1),
+                ("grpc.max_receive_message_length", -1),
+                ("grpc.min_reconnect_backoff_ms", 0),
+                ("grpc.max_reconnect_backoff_ms", 100),
+                ("grpc.dns_min_time_between_resolutions_ms", 100),
+            ],
+        )
+    )
+    channel_status = channel.channel_ready()
+    try:
+        await asyncio.wait_for(channel_status, timeout=1)
+    except asyncio.TimeoutError:
+        await _stack.aclose()
+        raise Exception("Timed out trying to connect to local isolate")
+    return channel
+async def _set_logger_labels(
+    logger_labels: dict[str, str], channel: async_grpc.Channel
+):
+    try:
+        import sys
+        # Flush any prints that were buffered before setting the logger labels
+        sys.stderr.flush()
+        sys.stdout.flush()
+        isolate = definitions.IsolateStub(channel)
+        isolate_request = definitions.SetMetadataRequest(
+            # TODO: when submit is shipped, get task_id from an env var
+            task_id="RUN",
+            metadata=definitions.TaskMetadata(logger_labels=logger_labels),
+        )
+        res = isolate.SetMetadata(isolate_request)
+        code = await res.code()
+        assert str(code) == "StatusCode.OK", str(code)
+    except BaseException:
+        # NOTE hiding this for now to not print on every request
+        # logger.debug("Failed to set logger labels", exc_info=True)
+        pass
 def wrap_app(cls: type[App], **kwargs) -> fal.api.IsolatedFunction:
     include_modules_from(cls)
@@ -60,6 +116,7 @@ def wrap_app(cls: type[App], **kwargs) -> fal.api.IsolatedFunction:
         kind,
         requirements=cls.requirements,
         machine_type=cls.machine_type,
+        num_gpus=cls.num_gpus,
         **cls.host_kwargs,
         **kwargs,
         metadata=metadata,
@@ -74,6 +131,12 @@ def wrap_app(cls: type[App], **kwargs) -> fal.api.IsolatedFunction:
     return fn
+@dataclass
+class AppClientError(FalServerlessException):
+    message: str
+    status_code: int
 class EndpointClient:
     def __init__(self, url, endpoint, signature, timeout: int | None = None):
         self.url = url
@@ -86,12 +149,19 @@ class EndpointClient:
     def __call__(self, data):
         with httpx.Client() as client:
+            url = self.url + self.signature.path
             resp = client.post(
                 self.url + self.signature.path,
                 json=data.dict() if hasattr(data, "dict") else dict(data),
                 timeout=self.timeout,
             )
-            resp.raise_for_status()
+            if not resp.is_success:
+                # allow logs to be printed before raising the exception
+                time.sleep(1)
+                raise AppClientError(
+                    f"Failed to POST {url}: {resp.status_code} {resp.text}",
+                    status_code=resp.status_code,
+                )
             resp_dict = resp.json()
         if not self.return_type:
@@ -144,12 +214,16 @@ class AppClient:
             with httpx.Client() as client:
                 retries = 100
                 for _ in range(retries):
-                    resp = client.get(info.url + "/health", timeout=60)
+                    url = info.url + "/health"
+                    resp = client.get(url, timeout=60)
                     if resp.is_success:
                         break
                     elif resp.status_code not in (500, 404):
-                        resp.raise_for_status()
+                        raise AppClientError(
+                            f"Failed to GET {url}: {resp.status_code} {resp.text}",
+                            status_code=resp.status_code,
+                        )
                     time.sleep(0.1)
             client = cls(app_cls, info.url)
@@ -174,9 +248,18 @@ def _to_fal_app_name(name: str) -> str:
     return "-".join(part.lower() for part in PART_FINDER_RE.findall(name))
+def _print_python_packages() -> None:
+    from importlib.metadata import distributions
+    packages = [f"{dist.metadata['Name']}=={dist.version}" for dist in distributions()]
+    print("[debug] Python packages installed:", ", ".join(packages))
 class App(fal.api.BaseServable):
     requirements: ClassVar[list[str]] = []
     machine_type: ClassVar[str] = "S"
+    num_gpus: ClassVar[int | None] = None
     host_kwargs: ClassVar[dict[str, Any]] = {
         "_scheduler": "nomad",
         "_scheduler_options": {
@@ -187,11 +270,18 @@ class App(fal.api.BaseServable):
     }
     app_name: ClassVar[str]
     app_auth: ClassVar[Literal["private", "public", "shared"]] = "private"
+    request_timeout: ClassVar[int | None] = None
+    isolate_channel: async_grpc.Channel | None = None
     def __init_subclass__(cls, **kwargs):
         app_name = kwargs.pop("name", None) or _to_fal_app_name(cls.__name__)
         parent_settings = getattr(cls, "host_kwargs", {})
         cls.host_kwargs = {**parent_settings, **kwargs}
+        if cls.request_timeout is not None:
+            cls.host_kwargs["request_timeout"] = cls.request_timeout
         cls.app_name = getattr(cls, "app_name", app_name)
         if cls.__init__ is not App.__init__:
@@ -222,7 +312,8 @@ class App(fal.api.BaseServable):
         }
     @asynccontextmanager
-    async def lifespan(self, app: FastAPI):
+    async def lifespan(self, app: fastapi.FastAPI):
+        _print_python_packages()
         await _call_any_fn(self.setup)
         try:
             yield
@@ -230,7 +321,7 @@ class App(fal.api.BaseServable):
             await _call_any_fn(self.teardown)
     def health(self):
-        return {}
+        return {"version": self.version}
     def setup(self):
         """Setup the application before serving."""
@@ -238,7 +329,7 @@ class App(fal.api.BaseServable):
     def teardown(self):
         """Teardown the application after serving."""
-    def _add_extra_middlewares(self, app: FastAPI):
+    def _add_extra_middlewares(self, app: fastapi.FastAPI):
         @app.middleware("http")
         async def provide_hints_headers(request, call_next):
             response = await call_next(request)
@@ -259,11 +350,12 @@ class App(fal.api.BaseServable):
         @app.middleware("http")
         async def set_global_object_preference(request, call_next):
-            response = await call_next(request)
             try:
-                fal_provider_module.GLOBAL_LIFECYCLE_PREFERENCE = request.headers.get(
-                    "X-Fal-Object-Lifecycle-Preference"
-                )
+                preference_dict = request_lifecycle_preference(request)
+                if preference_dict is not None:
+                    # This will not work properly for apps with multiplexing enabled
+                    # we may mix up the preferences between requests
+                    LIFECYCLE_PREFERENCE.set(preference_dict)
             except Exception:
                 from fastapi.logger import logger
@@ -271,7 +363,52 @@ class App(fal.api.BaseServable):
                     "Failed set a global lifecycle preference %s",
                     self.__class__.__name__,
                 )
-            return response
+            try:
+                return await call_next(request)
+            finally:
+                # We may miss the global preference if there are operations
+                # being done in the background that go beyond the request
+                LIFECYCLE_PREFERENCE.set(None)
+        @app.middleware("http")
+        async def set_request_id(request, call_next):
+            # NOTE: Setting request_id is not supported for websocket/realtime endpoints
+            if self.isolate_channel is None:
+                grpc_port = os.environ.get("NOMAD_ALLOC_PORT_grpc")
+                self.isolate_channel = await open_isolate_channel(
+                    f"localhost:{grpc_port}"
+                )
+            request_id = request.headers.get(REQUEST_ID_KEY)
+            if request_id is None:
+                # Cut it short
+                return await call_next(request)
+            await _set_logger_labels(
+                {"fal_request_id": request_id}, channel=self.isolate_channel
+            )
+            async def _unset_at_end():
+                await _set_logger_labels({}, channel=self.isolate_channel)  # type: ignore
+            try:
+                response: fastapi.responses.Response = await call_next(request)
+            except BaseException:
+                await _unset_at_end()
+                raise
+            else:
+                # We need to wait for the entire response to be sent before
+                # we can set the logger labels back to the default.
+                background_tasks = fastapi.BackgroundTasks()
+                background_tasks.add_task(_unset_at_end)
+                if response.background:
+                    # We normally have no background tasks, but we should handle it
+                    background_tasks.add_task(response.background)
+                response.background = background_tasks
+                return response
         @app.exception_handler(RequestCancelledException)
         async def value_error_exception_handler(
@@ -284,7 +421,7 @@ class App(fal.api.BaseServable):
             # the connection without receiving a response
             return JSONResponse({"detail": str(exc)}, 499)
-    def _add_extra_routes(self, app: FastAPI):
+    def _add_extra_routes(self, app: fastapi.FastAPI):
         @app.get("/health")
         def health():
             return self.health()
@@ -395,7 +532,10 @@ def _fal_websocket_template(
                 batch.append(next_input)
             t0 = loop.time()
-            output = await loop.run_in_executor(None, func, self, *batch)  # type: ignore
+            if inspect.iscoroutinefunction(func):
+                output = await func(self, *batch)
+            else:
+                output = await loop.run_in_executor(None, func, self, *batch)  # type: ignore
             total_time = loop.time() - t0
             if not isinstance(output, dict):
                 # Handle pydantic output modal

fal 1.3.3__py3-none-any.whl → 1.7.2__py3-none-any.whl

Potentially problematic release.

fal 1.3.3py3-none-any.whl → 1.7.2py3-none-any.whl