aibrix 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. aibrix-0.1.0/PKG-INFO +88 -0
  2. aibrix-0.1.0/README.md +51 -0
  3. aibrix-0.1.0/aibrix/__init__.py +13 -0
  4. aibrix-0.1.0/aibrix/__version__.py +3 -0
  5. aibrix-0.1.0/aibrix/app.py +176 -0
  6. aibrix-0.1.0/aibrix/batch/__init__.py +16 -0
  7. aibrix-0.1.0/aibrix/batch/constant.py +21 -0
  8. aibrix-0.1.0/aibrix/batch/driver.py +67 -0
  9. aibrix-0.1.0/aibrix/batch/job_manager.py +349 -0
  10. aibrix-0.1.0/aibrix/batch/request_proxy.py +79 -0
  11. aibrix-0.1.0/aibrix/batch/scheduler.py +292 -0
  12. aibrix-0.1.0/aibrix/batch/storage/__init__.py +88 -0
  13. aibrix-0.1.0/aibrix/batch/storage/batch_storage.py +129 -0
  14. aibrix-0.1.0/aibrix/batch/storage/generic_storage.py +235 -0
  15. aibrix-0.1.0/aibrix/batch/storage/tos_storage.py +182 -0
  16. aibrix-0.1.0/aibrix/config.py +20 -0
  17. aibrix-0.1.0/aibrix/downloader/__init__.py +37 -0
  18. aibrix-0.1.0/aibrix/downloader/__main__.py +40 -0
  19. aibrix-0.1.0/aibrix/downloader/base.py +171 -0
  20. aibrix-0.1.0/aibrix/downloader/huggingface.py +120 -0
  21. aibrix-0.1.0/aibrix/downloader/s3.py +184 -0
  22. aibrix-0.1.0/aibrix/downloader/tos.py +171 -0
  23. aibrix-0.1.0/aibrix/downloader/utils.py +101 -0
  24. aibrix-0.1.0/aibrix/envs.py +89 -0
  25. aibrix-0.1.0/aibrix/logger.py +44 -0
  26. aibrix-0.1.0/aibrix/metrics/__init__.py +13 -0
  27. aibrix-0.1.0/aibrix/metrics/engine_rules.py +48 -0
  28. aibrix-0.1.0/aibrix/metrics/http_collector.py +70 -0
  29. aibrix-0.1.0/aibrix/metrics/metrics.py +38 -0
  30. aibrix-0.1.0/aibrix/metrics/standard_rules.py +58 -0
  31. aibrix-0.1.0/aibrix/openapi/__init__.py +13 -0
  32. aibrix-0.1.0/aibrix/openapi/engine/__init__.py +13 -0
  33. aibrix-0.1.0/aibrix/openapi/engine/base.py +76 -0
  34. aibrix-0.1.0/aibrix/openapi/engine/vllm.py +103 -0
  35. aibrix-0.1.0/aibrix/openapi/protocol.py +40 -0
  36. aibrix-0.1.0/aibrix/protos/__init__.py +13 -0
  37. aibrix-0.1.0/pyproject.toml +76 -0
aibrix-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,88 @@
1
+ Metadata-Version: 2.1
2
+ Name: aibrix
3
+ Version: 0.1.0
4
+ Summary: AIBrix, the foundational building blocks for constructing your own GenAI inference infrastructure.
5
+ Home-page: https://github.com/aibrix/aibrix/tree/main/python/aibrix
6
+ License: Apache-2.0
7
+ Author: AIBrix Authors
8
+ Author-email: brosoul@126.com
9
+ Requires-Python: >=3.8,<3.12
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Education
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: Topic :: Software Development
23
+ Classifier: Topic :: Software Development :: Libraries
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Requires-Dist: boto3 (>=1.35.5,<2.0.0)
26
+ Requires-Dist: fastapi (>=0.112.2,<0.113.0)
27
+ Requires-Dist: gunicorn (>=23.0.0,<24.0.0)
28
+ Requires-Dist: hf-transfer (>=0.1.8,<0.2.0)
29
+ Requires-Dist: httpx (>=0.27.2,<0.28.0)
30
+ Requires-Dist: huggingface-hub (>=0.24.6,<0.25.0)
31
+ Requires-Dist: prometheus-client (>=0.20.0,<0.21.0)
32
+ Requires-Dist: tos (==2.8.0)
33
+ Requires-Dist: types-requests (>=2.31.0,<3.0.0)
34
+ Requires-Dist: uvicorn (>=0.30.6,<0.31.0)
35
+ Project-URL: Repository, https://github.com/aibrix/aibrix/tree/main/python/aibrix
36
+ Description-Content-Type: text/markdown
37
+
38
+ # AI Runtime
39
+ A versatile sidecar enabling metric standardization, model downloading, and management.
40
+
41
+ ## Quick Start
42
+ ### Installation
43
+ AI Runtime can be installed by `pip`.
44
+
45
+ ```sh
46
+ pip install aibrix
47
+ ```
48
+
49
+ ### Model download
50
+ The AI Runtime supports model downloading from the following storage backends:
51
+ * HuggingFace
52
+ * S3
53
+ * TOS
54
+
55
+ For more details on model downloading, please refer to our [Runtime docs](https://github.com/aibrix/aibrix/blob/main/docs/source/features/runtime.rst#model-downloading).
56
+
57
+ ### Integrate with inference engines
58
+ The AI Runtime hides various implementation details on the inference engine side, providing a universal method to guide model management, as well as expose inference monitoring metrics.
59
+
60
+ At present, `vLLM` engine is supported, and in the future, `SGLang` and other inference engines will be supported.
61
+
62
+ For more details on integrate with `vLLM`, please refer to our [Runtime docs](https://github.com/aibrix/aibrix/blob/main/docs/source/features/runtime.rst#metric-standardization).
63
+
64
+ ## Contributing
65
+ We welcome contributions from the community! Check out our [contributing guidelines](https://github.com/aibrix/aibrix/blob/main/CONTRIBUTING.md) to see how you can make a difference.
66
+
67
+ ### Build from source
68
+
69
+ ```bash
70
+ # This may take several minutes
71
+ pip install -e .
72
+ ```
73
+
74
+ ### Lint, Format and Type Check
75
+
76
+ Before contribute your code, please run the following commands to ensure that your code passes the tests and linting checks.
77
+
78
+ ```bash
79
+ # install dependencies
80
+ poetry install --no-root --with dev
81
+
82
+ # linting, formatting and type checking
83
+ bash ./scripts/format.sh
84
+ ```
85
+
86
+ ## License
87
+
88
+ AI Runtime is licensed under the [APACHE License](https://github.com/aibrix/aibrix/LICENSE.md).
aibrix-0.1.0/README.md ADDED
@@ -0,0 +1,51 @@
1
+ # AI Runtime
2
+ A versatile sidecar enabling metric standardization, model downloading, and management.
3
+
4
+ ## Quick Start
5
+ ### Installation
6
+ AI Runtime can be installed by `pip`.
7
+
8
+ ```sh
9
+ pip install aibrix
10
+ ```
11
+
12
+ ### Model download
13
+ The AI Runtime supports model downloading from the following storage backends:
14
+ * HuggingFace
15
+ * S3
16
+ * TOS
17
+
18
+ For more details on model downloading, please refer to our [Runtime docs](https://github.com/aibrix/aibrix/blob/main/docs/source/features/runtime.rst#model-downloading).
19
+
20
+ ### Integrate with inference engines
21
+ The AI Runtime hides various implementation details on the inference engine side, providing a universal method to guide model management, as well as expose inference monitoring metrics.
22
+
23
+ At present, `vLLM` engine is supported, and in the future, `SGLang` and other inference engines will be supported.
24
+
25
+ For more details on integrate with `vLLM`, please refer to our [Runtime docs](https://github.com/aibrix/aibrix/blob/main/docs/source/features/runtime.rst#metric-standardization).
26
+
27
+ ## Contributing
28
+ We welcome contributions from the community! Check out our [contributing guidelines](https://github.com/aibrix/aibrix/blob/main/CONTRIBUTING.md) to see how you can make a difference.
29
+
30
+ ### Build from source
31
+
32
+ ```bash
33
+ # This may take several minutes
34
+ pip install -e .
35
+ ```
36
+
37
+ ### Lint, Format and Type Check
38
+
39
+ Before contribute your code, please run the following commands to ensure that your code passes the tests and linting checks.
40
+
41
+ ```bash
42
+ # install dependencies
43
+ poetry install --no-root --with dev
44
+
45
+ # linting, formatting and type checking
46
+ bash ./scripts/format.sh
47
+ ```
48
+
49
+ ## License
50
+
51
+ AI Runtime is licensed under the [APACHE License](https://github.com/aibrix/aibrix/LICENSE.md).
@@ -0,0 +1,13 @@
1
+ # Copyright 2024 The Aibrix Team.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,3 @@
1
+ VERSION = (0, 1, 0)
2
+
3
+ __version__ = ".".join(map(str, VERSION))
@@ -0,0 +1,176 @@
1
+ import argparse
2
+ import os
3
+ import shutil
4
+ import time
5
+ from pathlib import Path
6
+ from urllib.parse import urljoin
7
+
8
+ import uvicorn
9
+ from fastapi import APIRouter, FastAPI, Request, Response
10
+ from fastapi.datastructures import State
11
+ from fastapi.responses import JSONResponse
12
+ from prometheus_client import make_asgi_app, multiprocess
13
+ from starlette.routing import Mount
14
+
15
+ from aibrix import __version__, envs
16
+ from aibrix.config import EXCLUDE_METRICS_HTTP_ENDPOINTS
17
+ from aibrix.logger import init_logger
18
+ from aibrix.metrics.engine_rules import get_metric_standard_rules
19
+ from aibrix.metrics.http_collector import HTTPCollector
20
+ from aibrix.metrics.metrics import (
21
+ HTTP_COUNTER_METRICS,
22
+ HTTP_LATENCY_METRICS,
23
+ INFO_METRICS,
24
+ REGISTRY,
25
+ )
26
+ from aibrix.openapi.engine.base import InferenceEngine, get_inference_engine
27
+ from aibrix.openapi.protocol import (
28
+ ErrorResponse,
29
+ LoadLoraAdapterRequest,
30
+ UnloadLoraAdapterRequest,
31
+ )
32
+
33
+ logger = init_logger(__name__)
34
+ router = APIRouter()
35
+
36
+
37
+ def initial_prometheus_multiproc_dir():
38
+ if "PROMETHEUS_MULTIPROC_DIR" not in os.environ:
39
+ prometheus_multiproc_dir = envs.PROMETHEUS_MULTIPROC_DIR
40
+ else:
41
+ prometheus_multiproc_dir = os.environ["PROMETHEUS_MULTIPROC_DIR"]
42
+
43
+ # Note: ensure it will be automatically cleaned up upon exit.
44
+ path = Path(prometheus_multiproc_dir)
45
+ path.mkdir(parents=True, exist_ok=True)
46
+ if path.is_dir():
47
+ for item in path.iterdir():
48
+ if item.is_dir():
49
+ shutil.rmtree(item)
50
+ else:
51
+ item.unlink()
52
+ os.environ["PROMETHEUS_MULTIPROC_DIR"] = envs.PROMETHEUS_MULTIPROC_DIR
53
+
54
+
55
+ def inference_engine(request: Request) -> InferenceEngine:
56
+ return request.app.state.inference_engine
57
+
58
+
59
+ def mount_metrics(app: FastAPI):
60
+ # setup multiprocess collector
61
+ initial_prometheus_multiproc_dir()
62
+ prometheus_multiproc_dir_path = os.environ["PROMETHEUS_MULTIPROC_DIR"]
63
+ logger.info(
64
+ f"AIBrix to use {prometheus_multiproc_dir_path} as PROMETHEUS_MULTIPROC_DIR"
65
+ )
66
+ # registry = CollectorRegistry()
67
+ multiprocess.MultiProcessCollector(REGISTRY)
68
+
69
+ # construct scrape metric config
70
+ engine = envs.INFERENCE_ENGINE
71
+
72
+ scrape_endpoint = urljoin(envs.INFERENCE_ENGINE_ENDPOINT, envs.METRIC_SCRAPE_PATH)
73
+ collector = HTTPCollector(scrape_endpoint, get_metric_standard_rules(engine))
74
+ REGISTRY.register(collector)
75
+ logger.info(
76
+ f"AIBrix to scrape metrics from {scrape_endpoint}, use {engine} standard rules"
77
+ )
78
+
79
+ # Add prometheus asgi middleware to route /metrics requests
80
+ metrics_route = Mount("/metrics", make_asgi_app(registry=REGISTRY))
81
+
82
+ app.routes.append(metrics_route)
83
+
84
+
85
+ def init_app_state(state: State) -> None:
86
+ state.inference_engine = get_inference_engine(
87
+ envs.INFERENCE_ENGINE,
88
+ envs.INFERENCE_ENGINE_VERSION,
89
+ envs.INFERENCE_ENGINE_ENDPOINT,
90
+ )
91
+
92
+
93
+ @router.post("/v1/lora_adapter/load")
94
+ async def load_lora_adapter(request: LoadLoraAdapterRequest, raw_request: Request):
95
+ response = await inference_engine(raw_request).load_lora_adapter(request)
96
+ if isinstance(response, ErrorResponse):
97
+ return JSONResponse(content=response.model_dump(), status_code=response.code)
98
+
99
+ return Response(status_code=200, content=response)
100
+
101
+
102
+ @router.post("/v1/lora_adapter/unload")
103
+ async def unload_lora_adapter(request: UnloadLoraAdapterRequest, raw_request: Request):
104
+ response = await inference_engine(raw_request).unload_lora_adapter(request)
105
+ if isinstance(response, ErrorResponse):
106
+ return JSONResponse(content=response.model_dump(), status_code=response.code)
107
+
108
+ return Response(status_code=200, content=response)
109
+
110
+
111
+ def build_app(args: argparse.Namespace):
112
+ if args.enable_fastapi_docs:
113
+ app = FastAPI(debug=False)
114
+ else:
115
+ app = FastAPI(debug=False, openapi_url=None, docs_url=None, redoc_url=None)
116
+
117
+ INFO_METRICS.info(
118
+ {
119
+ "version": __version__.__version__,
120
+ "engine": envs.INFERENCE_ENGINE,
121
+ "engine_version": envs.INFERENCE_ENGINE_VERSION,
122
+ }
123
+ )
124
+ mount_metrics(app)
125
+ init_app_state(app.state)
126
+ app.include_router(router)
127
+
128
+ @app.middleware("http")
129
+ async def add_router_prometheus_middlerware(request: Request, call_next):
130
+ method = request.method
131
+ endpoint = request.scope.get("path")
132
+ # Exclude endpoints that do not require metrics
133
+ if endpoint in EXCLUDE_METRICS_HTTP_ENDPOINTS:
134
+ response = await call_next(request)
135
+ return response
136
+
137
+ start_time = time.perf_counter()
138
+ response = await call_next(request)
139
+ process_time = time.perf_counter() - start_time
140
+
141
+ status = response.status_code
142
+ HTTP_LATENCY_METRICS.labels(
143
+ method=method, endpoint=endpoint, status=status
144
+ ).observe(process_time)
145
+ HTTP_COUNTER_METRICS.labels(
146
+ method=method, endpoint=endpoint, status=status
147
+ ).inc()
148
+ return response
149
+
150
+ return app
151
+
152
+
153
+ def nullable_str(val: str):
154
+ if not val or val == "None":
155
+ return None
156
+ return val
157
+
158
+
159
+ def main():
160
+ parser = argparse.ArgumentParser(description="Run aibrix runtime server")
161
+ parser.add_argument("--host", type=nullable_str, default=None, help="host name")
162
+ parser.add_argument("--port", type=int, default=8080, help="port number")
163
+ parser.add_argument(
164
+ "--enable-fastapi-docs",
165
+ action="store_true",
166
+ default=False,
167
+ help="Enable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint",
168
+ )
169
+ args = parser.parse_args()
170
+ logger.info("Use %s to startup runtime server", args)
171
+ app = build_app(args=args)
172
+ uvicorn.run(app, host=args.host, port=args.port)
173
+
174
+
175
+ if __name__ == "__main__":
176
+ main()
@@ -0,0 +1,16 @@
1
+ # Copyright 2024 The Aibrix Team.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ __all__ = ["create_batch_input", "retrieve_batch_job_content"]
@@ -0,0 +1,21 @@
1
+ # Copyright 2024 The Aibrix Team.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ # The following are all constants.
17
+ # This is the time interval for the sliding window to check.
18
+ EXPIRE_INTERVAL = 1
19
+ # This is the job pool size in job scheduler.
20
+ # It should be proportional to resource size in the backend.
21
+ DEFAULT_JOB_POOL_SIZE = 1
@@ -0,0 +1,67 @@
1
+ # Copyright 2024 The Aibrix Team.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import asyncio
16
+
17
+ import aibrix.batch.storage as _storage
18
+ from aibrix.batch.constant import DEFAULT_JOB_POOL_SIZE
19
+ from aibrix.batch.job_manager import JobManager
20
+ from aibrix.batch.request_proxy import RequestProxy
21
+ from aibrix.batch.scheduler import JobScheduler
22
+
23
+
24
+ class BatchDriver:
25
+ def __init__(self):
26
+ """
27
+ This is main entrance to bind all components to serve job requests.
28
+ """
29
+ _storage.initialize_storage()
30
+ self._storage = _storage
31
+ self._job_manager = JobManager()
32
+ self._scheduler = JobScheduler(self._job_manager, DEFAULT_JOB_POOL_SIZE)
33
+ self._proxy = RequestProxy(self._storage, self._job_manager)
34
+ asyncio.create_task(self.jobs_running_loop())
35
+
36
+ def upload_batch_data(self, input_file_name):
37
+ job_id = self._storage.submit_job_input(input_file_name)
38
+ return job_id
39
+
40
+ def create_job(self, job_id, endpoint, window_due_time):
41
+ self._job_manager.create_job(job_id, endpoint, window_due_time)
42
+
43
+ due_time = self._job_manager.get_job_window_due(job_id)
44
+ self._scheduler.append_job(job_id, due_time)
45
+
46
+ def get_job_status(self, job_id):
47
+ return self._job_manager.get_job_status(job_id)
48
+
49
+ def retrieve_job_result(self, job_id):
50
+ num_requests = _storage.get_job_num_request(job_id)
51
+ req_results = _storage.get_job_results(job_id, 0, num_requests)
52
+ return req_results
53
+
54
+ async def jobs_running_loop(self):
55
+ """
56
+ This loop is going through all active jobs in scheduler.
57
+ For now, the executing unit is one request. Later if necessary,
58
+ we can support a batch size of request per execution.
59
+ """
60
+ while True:
61
+ one_job = self._scheduler.round_robin_get_job()
62
+ if one_job:
63
+ await self._proxy.execute_queries(one_job)
64
+ await asyncio.sleep(0)
65
+
66
+ def clear_job(self, job_id):
67
+ self._storage.delete_job(job_id)