PyPI - aibrix - Versions diffs - 0.1.0__tar.gz - Mend

aibrix 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

aibrix-0.1.0/PKG-INFO +88 -0
aibrix-0.1.0/README.md +51 -0
aibrix-0.1.0/aibrix/__init__.py +13 -0
aibrix-0.1.0/aibrix/__version__.py +3 -0
aibrix-0.1.0/aibrix/app.py +176 -0
aibrix-0.1.0/aibrix/batch/__init__.py +16 -0
aibrix-0.1.0/aibrix/batch/constant.py +21 -0
aibrix-0.1.0/aibrix/batch/driver.py +67 -0
aibrix-0.1.0/aibrix/batch/job_manager.py +349 -0
aibrix-0.1.0/aibrix/batch/request_proxy.py +79 -0
aibrix-0.1.0/aibrix/batch/scheduler.py +292 -0
aibrix-0.1.0/aibrix/batch/storage/__init__.py +88 -0
aibrix-0.1.0/aibrix/batch/storage/batch_storage.py +129 -0
aibrix-0.1.0/aibrix/batch/storage/generic_storage.py +235 -0
aibrix-0.1.0/aibrix/batch/storage/tos_storage.py +182 -0
aibrix-0.1.0/aibrix/config.py +20 -0
aibrix-0.1.0/aibrix/downloader/__init__.py +37 -0
aibrix-0.1.0/aibrix/downloader/__main__.py +40 -0
aibrix-0.1.0/aibrix/downloader/base.py +171 -0
aibrix-0.1.0/aibrix/downloader/huggingface.py +120 -0
aibrix-0.1.0/aibrix/downloader/s3.py +184 -0
aibrix-0.1.0/aibrix/downloader/tos.py +171 -0
aibrix-0.1.0/aibrix/downloader/utils.py +101 -0
aibrix-0.1.0/aibrix/envs.py +89 -0
aibrix-0.1.0/aibrix/logger.py +44 -0
aibrix-0.1.0/aibrix/metrics/__init__.py +13 -0
aibrix-0.1.0/aibrix/metrics/engine_rules.py +48 -0
aibrix-0.1.0/aibrix/metrics/http_collector.py +70 -0
aibrix-0.1.0/aibrix/metrics/metrics.py +38 -0
aibrix-0.1.0/aibrix/metrics/standard_rules.py +58 -0
aibrix-0.1.0/aibrix/openapi/__init__.py +13 -0
aibrix-0.1.0/aibrix/openapi/engine/__init__.py +13 -0
aibrix-0.1.0/aibrix/openapi/engine/base.py +76 -0
aibrix-0.1.0/aibrix/openapi/engine/vllm.py +103 -0
aibrix-0.1.0/aibrix/openapi/protocol.py +40 -0
aibrix-0.1.0/aibrix/protos/__init__.py +13 -0
aibrix-0.1.0/pyproject.toml +76 -0

aibrix-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,88 @@
+Metadata-Version: 2.1
+Name: aibrix
+Version: 0.1.0
+Summary: AIBrix, the foundational building blocks for constructing your own GenAI inference infrastructure.
+Home-page: https://github.com/aibrix/aibrix/tree/main/python/aibrix
+License: Apache-2.0
+Author: AIBrix Authors
+Author-email: brosoul@126.com
+Requires-Python: >=3.8,<3.12
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Education
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Topic :: Scientific/Engineering
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Dist: boto3 (>=1.35.5,<2.0.0)
+Requires-Dist: fastapi (>=0.112.2,<0.113.0)
+Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
+Requires-Dist: hf-transfer (>=0.1.8,<0.2.0)
+Requires-Dist: httpx (>=0.27.2,<0.28.0)
+Requires-Dist: huggingface-hub (>=0.24.6,<0.25.0)
+Requires-Dist: prometheus-client (>=0.20.0,<0.21.0)
+Requires-Dist: tos (==2.8.0)
+Requires-Dist: types-requests (>=2.31.0,<3.0.0)
+Requires-Dist: uvicorn (>=0.30.6,<0.31.0)
+Project-URL: Repository, https://github.com/aibrix/aibrix/tree/main/python/aibrix
+Description-Content-Type: text/markdown
+# AI Runtime
+A versatile sidecar enabling metric standardization, model downloading, and management.
+## Quick Start
+### Installation
+AI Runtime can be installed by `pip`.
+```sh
+pip install aibrix
+```
+### Model download
+The AI Runtime supports model downloading from the following storage backends:
+* HuggingFace
+* S3
+* TOS
+For more details on model downloading, please refer to our [Runtime docs](https://github.com/aibrix/aibrix/blob/main/docs/source/features/runtime.rst#model-downloading).
+### Integrate with inference engines
+The AI Runtime hides various implementation details on the inference engine side, providing a universal method to guide model management, as well as expose inference monitoring metrics.
+At present, `vLLM` engine is supported, and in the future, `SGLang` and other inference engines will be supported.
+For more details on integrate with `vLLM`, please refer to our [Runtime docs](https://github.com/aibrix/aibrix/blob/main/docs/source/features/runtime.rst#metric-standardization).
+## Contributing
+We welcome contributions from the community! Check out our [contributing guidelines](https://github.com/aibrix/aibrix/blob/main/CONTRIBUTING.md) to see how you can make a difference.
+### Build from source
+```bash
+# This may take several minutes
+pip install -e .
+```
+### Lint, Format and Type Check
+Before contribute your code, please run the following commands to ensure that your code passes the tests and linting checks.
+```bash
+# install dependencies
+poetry install --no-root --with dev
+# linting, formatting and type checking
+bash ./scripts/format.sh
+```
+## License
+AI Runtime is licensed under the [APACHE License](https://github.com/aibrix/aibrix/LICENSE.md).

aibrix-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,51 @@
+# AI Runtime
+A versatile sidecar enabling metric standardization, model downloading, and management.
+## Quick Start
+### Installation
+AI Runtime can be installed by `pip`.
+```sh
+pip install aibrix
+```
+### Model download
+The AI Runtime supports model downloading from the following storage backends:
+* HuggingFace
+* S3
+* TOS
+For more details on model downloading, please refer to our [Runtime docs](https://github.com/aibrix/aibrix/blob/main/docs/source/features/runtime.rst#model-downloading).
+### Integrate with inference engines
+The AI Runtime hides various implementation details on the inference engine side, providing a universal method to guide model management, as well as expose inference monitoring metrics.
+At present, `vLLM` engine is supported, and in the future, `SGLang` and other inference engines will be supported.
+For more details on integrate with `vLLM`, please refer to our [Runtime docs](https://github.com/aibrix/aibrix/blob/main/docs/source/features/runtime.rst#metric-standardization).
+## Contributing
+We welcome contributions from the community! Check out our [contributing guidelines](https://github.com/aibrix/aibrix/blob/main/CONTRIBUTING.md) to see how you can make a difference.
+### Build from source
+```bash
+# This may take several minutes
+pip install -e .
+```
+### Lint, Format and Type Check
+Before contribute your code, please run the following commands to ensure that your code passes the tests and linting checks.
+```bash
+# install dependencies
+poetry install --no-root --with dev
+# linting, formatting and type checking
+bash ./scripts/format.sh
+```
+## License
+AI Runtime is licensed under the [APACHE License](https://github.com/aibrix/aibrix/LICENSE.md).

aibrix-0.1.0/aibrix/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+# Copyright 2024 The Aibrix Team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

aibrix-0.1.0/aibrix/__version__.py ADDED Viewed

@@ -0,0 +1,3 @@
+VERSION = (0, 1, 0)
+__version__ = ".".join(map(str, VERSION))

aibrix-0.1.0/aibrix/app.py ADDED Viewed

@@ -0,0 +1,176 @@
+import argparse
+import os
+import shutil
+import time
+from pathlib import Path
+from urllib.parse import urljoin
+import uvicorn
+from fastapi import APIRouter, FastAPI, Request, Response
+from fastapi.datastructures import State
+from fastapi.responses import JSONResponse
+from prometheus_client import make_asgi_app, multiprocess
+from starlette.routing import Mount
+from aibrix import __version__, envs
+from aibrix.config import EXCLUDE_METRICS_HTTP_ENDPOINTS
+from aibrix.logger import init_logger
+from aibrix.metrics.engine_rules import get_metric_standard_rules
+from aibrix.metrics.http_collector import HTTPCollector
+from aibrix.metrics.metrics import (
+    HTTP_COUNTER_METRICS,
+    HTTP_LATENCY_METRICS,
+    INFO_METRICS,
+    REGISTRY,
+)
+from aibrix.openapi.engine.base import InferenceEngine, get_inference_engine
+from aibrix.openapi.protocol import (
+    ErrorResponse,
+    LoadLoraAdapterRequest,
+    UnloadLoraAdapterRequest,
+)
+logger = init_logger(__name__)
+router = APIRouter()
+def initial_prometheus_multiproc_dir():
+    if "PROMETHEUS_MULTIPROC_DIR" not in os.environ:
+        prometheus_multiproc_dir = envs.PROMETHEUS_MULTIPROC_DIR
+    else:
+        prometheus_multiproc_dir = os.environ["PROMETHEUS_MULTIPROC_DIR"]
+    # Note: ensure it will be automatically cleaned up upon exit.
+    path = Path(prometheus_multiproc_dir)
+    path.mkdir(parents=True, exist_ok=True)
+    if path.is_dir():
+        for item in path.iterdir():
+            if item.is_dir():
+                shutil.rmtree(item)
+            else:
+                item.unlink()
+    os.environ["PROMETHEUS_MULTIPROC_DIR"] = envs.PROMETHEUS_MULTIPROC_DIR
+def inference_engine(request: Request) -> InferenceEngine:
+    return request.app.state.inference_engine
+def mount_metrics(app: FastAPI):
+    # setup multiprocess collector
+    initial_prometheus_multiproc_dir()
+    prometheus_multiproc_dir_path = os.environ["PROMETHEUS_MULTIPROC_DIR"]
+    logger.info(
+        f"AIBrix to use {prometheus_multiproc_dir_path} as PROMETHEUS_MULTIPROC_DIR"
+    )
+    # registry = CollectorRegistry()
+    multiprocess.MultiProcessCollector(REGISTRY)
+    # construct scrape metric config
+    engine = envs.INFERENCE_ENGINE
+    scrape_endpoint = urljoin(envs.INFERENCE_ENGINE_ENDPOINT, envs.METRIC_SCRAPE_PATH)
+    collector = HTTPCollector(scrape_endpoint, get_metric_standard_rules(engine))
+    REGISTRY.register(collector)
+    logger.info(
+        f"AIBrix to scrape metrics from {scrape_endpoint}, use {engine} standard rules"
+    )
+    # Add prometheus asgi middleware to route /metrics requests
+    metrics_route = Mount("/metrics", make_asgi_app(registry=REGISTRY))
+    app.routes.append(metrics_route)
+def init_app_state(state: State) -> None:
+    state.inference_engine = get_inference_engine(
+        envs.INFERENCE_ENGINE,
+        envs.INFERENCE_ENGINE_VERSION,
+        envs.INFERENCE_ENGINE_ENDPOINT,
+    )
+@router.post("/v1/lora_adapter/load")
+async def load_lora_adapter(request: LoadLoraAdapterRequest, raw_request: Request):
+    response = await inference_engine(raw_request).load_lora_adapter(request)
+    if isinstance(response, ErrorResponse):
+        return JSONResponse(content=response.model_dump(), status_code=response.code)
+    return Response(status_code=200, content=response)
+@router.post("/v1/lora_adapter/unload")
+async def unload_lora_adapter(request: UnloadLoraAdapterRequest, raw_request: Request):
+    response = await inference_engine(raw_request).unload_lora_adapter(request)
+    if isinstance(response, ErrorResponse):
+        return JSONResponse(content=response.model_dump(), status_code=response.code)
+    return Response(status_code=200, content=response)
+def build_app(args: argparse.Namespace):
+    if args.enable_fastapi_docs:
+        app = FastAPI(debug=False)
+    else:
+        app = FastAPI(debug=False, openapi_url=None, docs_url=None, redoc_url=None)
+    INFO_METRICS.info(
+        {
+            "version": __version__.__version__,
+            "engine": envs.INFERENCE_ENGINE,
+            "engine_version": envs.INFERENCE_ENGINE_VERSION,
+        }
+    )
+    mount_metrics(app)
+    init_app_state(app.state)
+    app.include_router(router)
+    @app.middleware("http")
+    async def add_router_prometheus_middlerware(request: Request, call_next):
+        method = request.method
+        endpoint = request.scope.get("path")
+        # Exclude endpoints that do not require metrics
+        if endpoint in EXCLUDE_METRICS_HTTP_ENDPOINTS:
+            response = await call_next(request)
+            return response
+        start_time = time.perf_counter()
+        response = await call_next(request)
+        process_time = time.perf_counter() - start_time
+        status = response.status_code
+        HTTP_LATENCY_METRICS.labels(
+            method=method, endpoint=endpoint, status=status
+        ).observe(process_time)
+        HTTP_COUNTER_METRICS.labels(
+            method=method, endpoint=endpoint, status=status
+        ).inc()
+        return response
+    return app
+def nullable_str(val: str):
+    if not val or val == "None":
+        return None
+    return val
+def main():
+    parser = argparse.ArgumentParser(description="Run aibrix runtime server")
+    parser.add_argument("--host", type=nullable_str, default=None, help="host name")
+    parser.add_argument("--port", type=int, default=8080, help="port number")
+    parser.add_argument(
+        "--enable-fastapi-docs",
+        action="store_true",
+        default=False,
+        help="Enable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint",
+    )
+    args = parser.parse_args()
+    logger.info("Use %s to startup runtime server", args)
+    app = build_app(args=args)
+    uvicorn.run(app, host=args.host, port=args.port)
+if __name__ == "__main__":
+    main()

aibrix-0.1.0/aibrix/batch/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright 2024 The Aibrix Team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__all__ = ["create_batch_input", "retrieve_batch_job_content"]

aibrix-0.1.0/aibrix/batch/constant.py ADDED Viewed

@@ -0,0 +1,21 @@
+# Copyright 2024 The Aibrix Team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# The following are all constants.
+# This is the time interval for the sliding window to check.
+EXPIRE_INTERVAL = 1
+# This is the job pool size in job scheduler.
+# It should be proportional to resource size in the backend.
+DEFAULT_JOB_POOL_SIZE = 1

aibrix-0.1.0/aibrix/batch/driver.py ADDED Viewed

@@ -0,0 +1,67 @@
+# Copyright 2024 The Aibrix Team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+import aibrix.batch.storage as _storage
+from aibrix.batch.constant import DEFAULT_JOB_POOL_SIZE
+from aibrix.batch.job_manager import JobManager
+from aibrix.batch.request_proxy import RequestProxy
+from aibrix.batch.scheduler import JobScheduler
+class BatchDriver:
+    def __init__(self):
+        """
+        This is main entrance to bind all components to serve job requests.
+        """
+        _storage.initialize_storage()
+        self._storage = _storage
+        self._job_manager = JobManager()
+        self._scheduler = JobScheduler(self._job_manager, DEFAULT_JOB_POOL_SIZE)
+        self._proxy = RequestProxy(self._storage, self._job_manager)
+        asyncio.create_task(self.jobs_running_loop())
+    def upload_batch_data(self, input_file_name):
+        job_id = self._storage.submit_job_input(input_file_name)
+        return job_id
+    def create_job(self, job_id, endpoint, window_due_time):
+        self._job_manager.create_job(job_id, endpoint, window_due_time)
+        due_time = self._job_manager.get_job_window_due(job_id)
+        self._scheduler.append_job(job_id, due_time)
+    def get_job_status(self, job_id):
+        return self._job_manager.get_job_status(job_id)
+    def retrieve_job_result(self, job_id):
+        num_requests = _storage.get_job_num_request(job_id)
+        req_results = _storage.get_job_results(job_id, 0, num_requests)
+        return req_results
+    async def jobs_running_loop(self):
+        """
+        This loop is going through all active jobs in scheduler.
+        For now, the executing unit is one request. Later if necessary,
+        we can support a batch size of request per execution.
+        """
+        while True:
+            one_job = self._scheduler.round_robin_get_job()
+            if one_job:
+                await self._proxy.execute_queries(one_job)
+            await asyncio.sleep(0)
+    def clear_job(self, job_id):
+        self._storage.delete_job(job_id)