PyPI - fal - Versions diffs - 1.2.1__py3-none-any.whl → 1.7.2__py3-none-any.whl - Mend

fal 1.2.1py3-none-any.whl → 1.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of fal might be problematic. Click here for more details.

Files changed (45) hide show

fal/__main__.py +3 -1
fal/_fal_version.py +2 -2
fal/api.py +88 -20
fal/app.py +221 -27
fal/apps.py +147 -3
fal/auth/__init__.py +50 -2
fal/cli/_utils.py +40 -0
fal/cli/apps.py +5 -3
fal/cli/create.py +26 -0
fal/cli/deploy.py +97 -16
fal/cli/main.py +2 -2
fal/cli/parser.py +11 -7
fal/cli/run.py +12 -1
fal/cli/runners.py +44 -0
fal/config.py +23 -0
fal/container.py +1 -1
fal/exceptions/__init__.py +7 -1
fal/exceptions/_base.py +51 -0
fal/exceptions/_cuda.py +44 -0
fal/files.py +81 -0
fal/sdk.py +67 -6
fal/toolkit/file/file.py +103 -13
fal/toolkit/file/providers/fal.py +572 -24
fal/toolkit/file/providers/gcp.py +8 -1
fal/toolkit/file/providers/r2.py +8 -1
fal/toolkit/file/providers/s3.py +80 -0
fal/toolkit/file/types.py +28 -3
fal/toolkit/image/__init__.py +71 -0
fal/toolkit/image/image.py +25 -2
fal/toolkit/image/nsfw_filter/__init__.py +11 -0
fal/toolkit/image/nsfw_filter/env.py +9 -0
fal/toolkit/image/nsfw_filter/inference.py +77 -0
fal/toolkit/image/nsfw_filter/model.py +18 -0
fal/toolkit/image/nsfw_filter/requirements.txt +4 -0
fal/toolkit/image/safety_checker.py +107 -0
fal/toolkit/types.py +140 -0
fal/toolkit/utils/download_utils.py +4 -0
fal/toolkit/utils/retry.py +45 -0
fal/utils.py +20 -4
fal/workflows.py +10 -4
{fal-1.2.1.dist-info → fal-1.7.2.dist-info}/METADATA +47 -40
{fal-1.2.1.dist-info → fal-1.7.2.dist-info}/RECORD +45 -30
{fal-1.2.1.dist-info → fal-1.7.2.dist-info}/WHEEL +1 -1
{fal-1.2.1.dist-info → fal-1.7.2.dist-info}/entry_points.txt +0 -0
{fal-1.2.1.dist-info → fal-1.7.2.dist-info}/top_level.txt +0 -0

fal/__main__.py CHANGED Viewed

@@ -1,4 +1,6 @@
+import sys
 from .cli import main
 if __name__ == "__main__":
-    main()
+    sys.exit(main())

fal/_fal_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.2.1'
-__version_tuple__ = version_tuple = (1, 2, 1)
+__version__ = version = '1.7.2'
+__version_tuple__ = version_tuple = (1, 7, 2)

fal/api.py CHANGED Viewed

@@ -44,7 +44,13 @@ from typing_extensions import Concatenate, ParamSpec
 import fal.flags as flags
 from fal._serialization import include_modules_from, patch_pickle
 from fal.container import ContainerImage
-from fal.exceptions import FalServerlessException
+from fal.exceptions import (
+    AppException,
+    CUDAOutOfMemoryException,
+    FalServerlessException,
+    FieldException,
+)
+from fal.exceptions._cuda import _is_cuda_oom_exception
 from fal.logging.isolate import IsolateLogPrinter
 from fal.sdk import (
     FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
@@ -70,6 +76,8 @@ SERVE_REQUIREMENTS = [
     f"pydantic=={pydantic_version}",
     "uvicorn",
     "starlette_exporter",
+    "structlog",
+    "tomli",
 ]
@@ -164,6 +172,7 @@ class Host(Generic[ArgsT, ReturnT]):
         application_name: str | None = None,
         application_auth_mode: Literal["public", "shared", "private"] | None = None,
         metadata: dict[str, Any] | None = None,
+        scale: bool = True,
     ) -> str | None:
         """Register the given function on the host for API call execution."""
         raise NotImplementedError
@@ -383,12 +392,15 @@ class FalServerlessHost(Host):
     _SUPPORTED_KEYS = frozenset(
         {
             "machine_type",
+            "machine_types",
+            "num_gpus",
             "keep_alive",
             "max_concurrency",
             "min_concurrency",
             "max_multiplexing",
             "setup_function",
             "metadata",
+            "request_timeout",
             "_base_image",
             "_scheduler",
             "_scheduler_options",
@@ -419,25 +431,28 @@ class FalServerlessHost(Host):
         application_name: str | None = None,
         application_auth_mode: Literal["public", "shared", "private"] | None = None,
         metadata: dict[str, Any] | None = None,
+        deployment_strategy: Literal["recreate", "rolling"] = "recreate",
+        scale: bool = True,
     ) -> str | None:
         environment_options = options.environment.copy()
         environment_options.setdefault("python_version", active_python())
         environments = [self._connection.define_environment(**environment_options)]
-        machine_type = options.host.get(
+        machine_type: list[str] | str = options.host.get(
             "machine_type", FAL_SERVERLESS_DEFAULT_MACHINE_TYPE
         )
         keep_alive = options.host.get("keep_alive", FAL_SERVERLESS_DEFAULT_KEEP_ALIVE)
-        max_concurrency = options.host.get("max_concurrency")
-        min_concurrency = options.host.get("min_concurrency")
-        max_multiplexing = options.host.get("max_multiplexing")
         base_image = options.host.get("_base_image", None)
         scheduler = options.host.get("_scheduler", None)
         scheduler_options = options.host.get("_scheduler_options", None)
+        max_concurrency = options.host.get("max_concurrency")
+        min_concurrency = options.host.get("min_concurrency")
+        max_multiplexing = options.host.get("max_multiplexing")
         exposed_port = options.get_exposed_port()
+        request_timeout = options.host.get("request_timeout")
         machine_requirements = MachineRequirements(
-            machine_type=machine_type,
+            machine_types=machine_type,  # type: ignore
+            num_gpus=options.host.get("num_gpus"),
             keep_alive=keep_alive,
             base_image=base_image,
             exposed_port=exposed_port,
@@ -446,6 +461,7 @@ class FalServerlessHost(Host):
             max_multiplexing=max_multiplexing,
             max_concurrency=max_concurrency,
             min_concurrency=min_concurrency,
+            request_timeout=request_timeout,
         )
         partial_func = _prepare_partial_func(func)
@@ -471,6 +487,8 @@ class FalServerlessHost(Host):
             application_auth_mode=application_auth_mode,
             machine_requirements=machine_requirements,
             metadata=metadata,
+            deployment_strategy=deployment_strategy,
+            scale=scale,
         ):
             for log in partial_result.logs:
                 self._log_printer.print(log)
@@ -493,7 +511,7 @@ class FalServerlessHost(Host):
         environment_options.setdefault("python_version", active_python())
         environments = [self._connection.define_environment(**environment_options)]
-        machine_type = options.host.get(
+        machine_type: list[str] | str = options.host.get(
             "machine_type", FAL_SERVERLESS_DEFAULT_MACHINE_TYPE
         )
         keep_alive = options.host.get("keep_alive", FAL_SERVERLESS_DEFAULT_KEEP_ALIVE)
@@ -505,9 +523,11 @@ class FalServerlessHost(Host):
         scheduler_options = options.host.get("_scheduler_options", None)
         exposed_port = options.get_exposed_port()
         setup_function = options.host.get("setup_function", None)
+        request_timeout = options.host.get("request_timeout")
         machine_requirements = MachineRequirements(
-            machine_type=machine_type,
+            machine_types=machine_type,  # type: ignore
+            num_gpus=options.host.get("num_gpus"),
             keep_alive=keep_alive,
             base_image=base_image,
             exposed_port=exposed_port,
@@ -516,6 +536,7 @@ class FalServerlessHost(Host):
             max_multiplexing=max_multiplexing,
             max_concurrency=max_concurrency,
             min_concurrency=min_concurrency,
+            request_timeout=request_timeout,
         )
         return_value = _UNSET
@@ -676,10 +697,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -701,10 +724,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -776,10 +801,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -806,10 +833,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -830,10 +859,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -854,10 +885,12 @@ def function(
     max_concurrency: int | None = None,
     # FalServerlessHost options
     metadata: dict[str, Any] | None = None,
-    machine_type: str = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    machine_type: str | list[str] = FAL_SERVERLESS_DEFAULT_MACHINE_TYPE,
+    num_gpus: int | None = None,
     keep_alive: int = FAL_SERVERLESS_DEFAULT_KEEP_ALIVE,
     max_multiplexing: int = FAL_SERVERLESS_DEFAULT_MAX_MULTIPLEXING,
     min_concurrency: int = FAL_SERVERLESS_DEFAULT_MIN_CONCURRENCY,
+    request_timeout: int | None = None,
     setup_function: Callable[..., None] | None = None,
     _base_image: str | None = None,
     _scheduler: str | None = None,
@@ -942,6 +975,8 @@ class RouteSignature(NamedTuple):
 class BaseServable:
+    version: ClassVar[str] = "unknown"
     def collect_routes(self) -> dict[RouteSignature, Callable[..., Any]]:
         raise NotImplementedError
@@ -1002,13 +1037,39 @@ class BaseServable:
                 # If it's not a generic 404, just return the original message.
                 return JSONResponse({"detail": exc.detail}, 404)
+        @_app.exception_handler(AppException)
+        async def app_exception_handler(request: Request, exc: AppException):
+            return JSONResponse({"detail": exc.message}, exc.status_code)
+        @_app.exception_handler(FieldException)
+        async def field_exception_handler(request: Request, exc: FieldException):
+            return JSONResponse(exc.to_pydantic_format(), exc.status_code)
+        @_app.exception_handler(CUDAOutOfMemoryException)
+        async def cuda_out_of_memory_exception_handler(
+            request: Request, exc: CUDAOutOfMemoryException
+        ):
+            return JSONResponse({"detail": exc.message}, exc.status_code)
         @_app.exception_handler(Exception)
         async def traceback_logging_exception_handler(request: Request, exc: Exception):
-            print(
-                json.dumps(
-                    {"traceback": "".join(traceback.format_exception(exc)[::-1])}  # type: ignore
+            _, MINOR, *_ = sys.version_info
+            # traceback.format_exception() has a different signature in Python >=3.10
+            if MINOR >= 10:
+                formatted_exception = traceback.format_exception(exc)  # type: ignore
+            else:
+                formatted_exception = traceback.format_exception(
+                    type(exc), exc, exc.__traceback__
                 )
-            )
+            print(json.dumps({"traceback": "".join(formatted_exception[::-1])}))
+            if _is_cuda_oom_exception(exc):
+                return await cuda_out_of_memory_exception_handler(
+                    request, CUDAOutOfMemoryException()
+                )
             return JSONResponse({"detail": "Internal Server Error"}, 500)
         routes = self.collect_routes()
@@ -1044,11 +1105,18 @@ class BaseServable:
     def serve(self) -> None:
         import asyncio
+        from prometheus_client import Gauge
         from starlette_exporter import handle_metrics
         from uvicorn import Config
+        # NOTE: this uses the global prometheus registry
+        app_info = Gauge("fal_app_info", "Fal application information", ["version"])
+        app_info.labels(version=self.version).set(1)
         app = self._build_app()
-        server = Server(config=Config(app, host="0.0.0.0", port=8080))
+        server = Server(
+            config=Config(app, host="0.0.0.0", port=8080, timeout_keep_alive=300)
+        )
         metrics_app = FastAPI()
         metrics_app.add_route("/metrics", handle_metrics)
         metrics_server = Server(config=Config(metrics_app, host="0.0.0.0", port=9090))

fal 1.2.1__py3-none-any.whl → 1.7.2__py3-none-any.whl

Potentially problematic release.

fal 1.2.1py3-none-any.whl → 1.7.2py3-none-any.whl