gpustack 0.1.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gpustack-0.1.0rc1/PKG-INFO +33 -0
- gpustack-0.1.0rc1/gpustack/__init__.py +2 -0
- gpustack-0.1.0rc1/gpustack/api/exceptions.py +133 -0
- gpustack-0.1.0rc1/gpustack/api/middlewares.py +122 -0
- gpustack-0.1.0rc1/gpustack/chat/__init__.py +0 -0
- gpustack-0.1.0rc1/gpustack/chat/manager.py +184 -0
- gpustack-0.1.0rc1/gpustack/client/__init__.py +4 -0
- gpustack-0.1.0rc1/gpustack/client/generated_clientset.py +39 -0
- gpustack-0.1.0rc1/gpustack/client/generated_http_client.py +286 -0
- gpustack-0.1.0rc1/gpustack/client/generated_model_client.py +69 -0
- gpustack-0.1.0rc1/gpustack/client/generated_model_instance_client.py +69 -0
- gpustack-0.1.0rc1/gpustack/client/generated_user_client.py +69 -0
- gpustack-0.1.0rc1/gpustack/client/generated_worker_client.py +69 -0
- gpustack-0.1.0rc1/gpustack/cmd/__init__.py +3 -0
- gpustack-0.1.0rc1/gpustack/cmd/chat.py +31 -0
- gpustack-0.1.0rc1/gpustack/cmd/start.py +231 -0
- gpustack-0.1.0rc1/gpustack/cmd/version.py +24 -0
- gpustack-0.1.0rc1/gpustack/codegen/__init__.py +0 -0
- gpustack-0.1.0rc1/gpustack/codegen/filters.py +17 -0
- gpustack-0.1.0rc1/gpustack/codegen/generate.py +91 -0
- gpustack-0.1.0rc1/gpustack/codegen/templates/client.py.jinja +70 -0
- gpustack-0.1.0rc1/gpustack/codegen/templates/clientset.py.jinja +35 -0
- gpustack-0.1.0rc1/gpustack/codegen/templates/http_client.py.jinja +286 -0
- gpustack-0.1.0rc1/gpustack/config/__init__.py +4 -0
- gpustack-0.1.0rc1/gpustack/config/config.py +155 -0
- gpustack-0.1.0rc1/gpustack/http_proxy/__init__.py +0 -0
- gpustack-0.1.0rc1/gpustack/http_proxy/load_balancer.py +17 -0
- gpustack-0.1.0rc1/gpustack/http_proxy/strategies.py +35 -0
- gpustack-0.1.0rc1/gpustack/logging.py +29 -0
- gpustack-0.1.0rc1/gpustack/main.py +44 -0
- gpustack-0.1.0rc1/gpustack/migrations/README +1 -0
- gpustack-0.1.0rc1/gpustack/migrations/env.py +82 -0
- gpustack-0.1.0rc1/gpustack/migrations/script.py.mako +28 -0
- gpustack-0.1.0rc1/gpustack/migrations/versions/2024_06_28_1630-4f4ec0a5fcb3_init_tables.py +105 -0
- gpustack-0.1.0rc1/gpustack/migrations/versions/2024_06_28_1831-a16a55af6f75_add_instance_name.py +34 -0
- gpustack-0.1.0rc1/gpustack/mixins/__init__.py +6 -0
- gpustack-0.1.0rc1/gpustack/mixins/active_record.py +323 -0
- gpustack-0.1.0rc1/gpustack/mixins/timestamp.py +27 -0
- gpustack-0.1.0rc1/gpustack/routes/__init__.py +0 -0
- gpustack-0.1.0rc1/gpustack/routes/api_keys.py +86 -0
- gpustack-0.1.0rc1/gpustack/routes/auth.py +64 -0
- gpustack-0.1.0rc1/gpustack/routes/dashboard.py +269 -0
- gpustack-0.1.0rc1/gpustack/routes/gpu_devices.py +42 -0
- gpustack-0.1.0rc1/gpustack/routes/model_instances.py +154 -0
- gpustack-0.1.0rc1/gpustack/routes/models.py +115 -0
- gpustack-0.1.0rc1/gpustack/routes/openai.py +108 -0
- gpustack-0.1.0rc1/gpustack/routes/probes.py +14 -0
- gpustack-0.1.0rc1/gpustack/routes/routes.py +54 -0
- gpustack-0.1.0rc1/gpustack/routes/ui.py +21 -0
- gpustack-0.1.0rc1/gpustack/routes/users.py +117 -0
- gpustack-0.1.0rc1/gpustack/routes/workers.py +86 -0
- gpustack-0.1.0rc1/gpustack/scheduler/calculator.py +171 -0
- gpustack-0.1.0rc1/gpustack/scheduler/policy.py +153 -0
- gpustack-0.1.0rc1/gpustack/scheduler/queue.py +34 -0
- gpustack-0.1.0rc1/gpustack/scheduler/scheduler.py +207 -0
- gpustack-0.1.0rc1/gpustack/schemas/__init__.py +76 -0
- gpustack-0.1.0rc1/gpustack/schemas/api_keys.py +34 -0
- gpustack-0.1.0rc1/gpustack/schemas/common.py +83 -0
- gpustack-0.1.0rc1/gpustack/schemas/dashboard.py +73 -0
- gpustack-0.1.0rc1/gpustack/schemas/gpu_devices.py +27 -0
- gpustack-0.1.0rc1/gpustack/schemas/model_usage.py +25 -0
- gpustack-0.1.0rc1/gpustack/schemas/models.py +144 -0
- gpustack-0.1.0rc1/gpustack/schemas/stmt.py +20 -0
- gpustack-0.1.0rc1/gpustack/schemas/system_load.py +26 -0
- gpustack-0.1.0rc1/gpustack/schemas/users.py +66 -0
- gpustack-0.1.0rc1/gpustack/schemas/workers.py +140 -0
- gpustack-0.1.0rc1/gpustack/security.py +62 -0
- gpustack-0.1.0rc1/gpustack/server/__init__.py +0 -0
- gpustack-0.1.0rc1/gpustack/server/app.py +22 -0
- gpustack-0.1.0rc1/gpustack/server/auth.py +175 -0
- gpustack-0.1.0rc1/gpustack/server/bus.py +64 -0
- gpustack-0.1.0rc1/gpustack/server/controller.py +72 -0
- gpustack-0.1.0rc1/gpustack/server/db.py +59 -0
- gpustack-0.1.0rc1/gpustack/server/deps.py +12 -0
- gpustack-0.1.0rc1/gpustack/server/server.py +182 -0
- gpustack-0.1.0rc1/gpustack/server/system_load.py +113 -0
- gpustack-0.1.0rc1/gpustack/server/worker_syncer.py +52 -0
- gpustack-0.1.0rc1/gpustack/third_party/fastfetch/fastfetch-linux-aarch64 +0 -0
- gpustack-0.1.0rc1/gpustack/third_party/fastfetch/fastfetch-linux-amd64 +0 -0
- gpustack-0.1.0rc1/gpustack/third_party/fastfetch/fastfetch-macos-universal +0 -0
- gpustack-0.1.0rc1/gpustack/third_party/gguf-parser/gguf-parser-darwin-universal +0 -0
- gpustack-0.1.0rc1/gpustack/third_party/gguf-parser/gguf-parser-linux-amd64 +0 -0
- gpustack-0.1.0rc1/gpustack/third_party/gguf-parser/gguf-parser-linux-arm64 +0 -0
- gpustack-0.1.0rc1/gpustack/third_party/llama-box/llama-box-darwin-amd64-metal +0 -0
- gpustack-0.1.0rc1/gpustack/third_party/llama-box/llama-box-darwin-arm64-metal +0 -0
- gpustack-0.1.0rc1/gpustack/third_party/llama-box/llama-box-linux-amd64-cuda-12.5 +0 -0
- gpustack-0.1.0rc1/gpustack/ui/css/layouts__index.1720086314638.chunk.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/p__api-keys__index.1720086314638.chunk.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/p__dashboard__index.1720086314638.chunk.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/p__llmodels__index.1720086314638.chunk.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/p__login__index.1720086314638.chunk.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/p__playground__index.1720086314638.chunk.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/p__profile__index.1720086314638.chunk.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/p__resources__index.1720086314638.chunk.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/p__users__index.1720086314638.chunk.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/umi.1720086314638.css +1 -0
- gpustack-0.1.0rc1/gpustack/ui/css/umi.1720086314638.css.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/index.html +15 -0
- gpustack-0.1.0rc1/gpustack/ui/js/0.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/0.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/242.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/242.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/255.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/255.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/259.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/302.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/347.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/347.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/349.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/349.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/393.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/423.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/423.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/431.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/431.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/522.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/522.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/530.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/582.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/582.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/602.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/602.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/70.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/70.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/731.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/731.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/762.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/762.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/798.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/86.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/86.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/921.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/921.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/927.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/927.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/950.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/981.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/981.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/layouts__index.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__404.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__api-keys__index.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__api-keys__index.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__dashboard__index.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__dashboard__index.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__llmodels__index.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__llmodels__index.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__login__index.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__login__index.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__playground__index.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__playground__index.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__profile__index.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__resources__index.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__resources__index.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__users__index.1720086314638.chunk.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/p__users__index.1720086314638.chunk.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/js/umi.1720086314638.js +1 -0
- gpustack-0.1.0rc1/gpustack/ui/js/umi.1720086314638.js.gz +0 -0
- gpustack-0.1.0rc1/gpustack/ui/static/avatar.d61efc48.png +0 -0
- gpustack-0.1.0rc1/gpustack/ui/static/favicon.png +0 -0
- gpustack-0.1.0rc1/gpustack/ui/static/gpustack-logo.58d53008.png +0 -0
- gpustack-0.1.0rc1/gpustack/utils.py +88 -0
- gpustack-0.1.0rc1/gpustack/worker/__init__.py +3 -0
- gpustack-0.1.0rc1/gpustack/worker/collector.py +249 -0
- gpustack-0.1.0rc1/gpustack/worker/downloaders.py +177 -0
- gpustack-0.1.0rc1/gpustack/worker/exporter.py +254 -0
- gpustack-0.1.0rc1/gpustack/worker/inference_server.py +206 -0
- gpustack-0.1.0rc1/gpustack/worker/logs.py +65 -0
- gpustack-0.1.0rc1/gpustack/worker/serve_manager.py +171 -0
- gpustack-0.1.0rc1/gpustack/worker/worker.py +112 -0
- gpustack-0.1.0rc1/gpustack/worker/worker_manager.py +100 -0
- gpustack-0.1.0rc1/pyproject.toml +63 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: gpustack
|
|
3
|
+
Version: 0.1.0rc1
|
|
4
|
+
Summary: GPUStack
|
|
5
|
+
Author: GPUStack Authors
|
|
6
|
+
Author-email: contact@gpustack.ai
|
|
7
|
+
Requires-Python: >=3.11,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Requires-Dist: aiosqlite (>=0.20.0,<0.21.0)
|
|
12
|
+
Requires-Dist: alembic (>=1.13.2,<2.0.0)
|
|
13
|
+
Requires-Dist: apscheduler (>=3.10.4,<4.0.0)
|
|
14
|
+
Requires-Dist: argon2-cffi (>=23.1.0,<24.0.0)
|
|
15
|
+
Requires-Dist: asyncpg (>=0.29.0,<0.30.0)
|
|
16
|
+
Requires-Dist: attrs (>=23.2.0,<24.0.0)
|
|
17
|
+
Requires-Dist: colorama (>=0.4.6,<0.5.0)
|
|
18
|
+
Requires-Dist: dataclasses-json (>=0.6.7,<0.7.0)
|
|
19
|
+
Requires-Dist: fastapi (>=0.111.0,<0.112.0)
|
|
20
|
+
Requires-Dist: httpx[socks] (>=0.27.0,<0.28.0)
|
|
21
|
+
Requires-Dist: huggingface-hub (>=0.23.3,<0.24.0)
|
|
22
|
+
Requires-Dist: inflection (>=0.5.1,<0.6.0)
|
|
23
|
+
Requires-Dist: netifaces (>=0.11.0,<0.12.0)
|
|
24
|
+
Requires-Dist: openai (>=1.31.1,<2.0.0)
|
|
25
|
+
Requires-Dist: prometheus-client (>=0.20.0,<0.21.0)
|
|
26
|
+
Requires-Dist: psutil (>=5.9.8,<6.0.0)
|
|
27
|
+
Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
|
|
28
|
+
Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
|
|
29
|
+
Requires-Dist: python-multipart (>=0.0.9,<0.0.10)
|
|
30
|
+
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
|
31
|
+
Requires-Dist: setproctitle (>=1.3.3,<2.0.0)
|
|
32
|
+
Requires-Dist: sqlalchemy[asyncio] (>=2.0.30,<3.0.0)
|
|
33
|
+
Requires-Dist: sqlmodel (>=0.0.18,<0.0.19)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from fastapi import FastAPI, Request, status
|
|
2
|
+
from fastapi.exceptions import RequestValidationError
|
|
3
|
+
from fastapi.responses import JSONResponse
|
|
4
|
+
import httpx
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class HTTPException(Exception):
|
|
9
|
+
def __init__(self, status_code: int, reason: str, message: str):
|
|
10
|
+
self.status_code = status_code
|
|
11
|
+
self.reason = reason
|
|
12
|
+
self.message = message
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def http_exception_factory(status_code: int, reason: str, default_message: str):
|
|
16
|
+
class_name = reason + "Exception"
|
|
17
|
+
return type(
|
|
18
|
+
class_name,
|
|
19
|
+
(HTTPException,),
|
|
20
|
+
{
|
|
21
|
+
"__init__": lambda self, message=default_message: super(
|
|
22
|
+
self.__class__, self
|
|
23
|
+
).__init__(status_code, reason, message)
|
|
24
|
+
},
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
AlreadyExistsException = http_exception_factory(
|
|
29
|
+
status.HTTP_409_CONFLICT, "AlreadyExists", "Already exists"
|
|
30
|
+
)
|
|
31
|
+
NotFoundException = http_exception_factory(
|
|
32
|
+
status.HTTP_404_NOT_FOUND, "NotFound", "Not found"
|
|
33
|
+
)
|
|
34
|
+
UnauthorizedException = http_exception_factory(
|
|
35
|
+
status.HTTP_401_UNAUTHORIZED, "Unauthorized", "Unauthorized"
|
|
36
|
+
)
|
|
37
|
+
ForbiddenException = http_exception_factory(
|
|
38
|
+
status.HTTP_403_FORBIDDEN, "Forbidden", "Forbidden"
|
|
39
|
+
)
|
|
40
|
+
InvalidException = http_exception_factory(
|
|
41
|
+
status.HTTP_422_UNPROCESSABLE_ENTITY, "Invalid", "Invalid input"
|
|
42
|
+
)
|
|
43
|
+
BadRequestException = http_exception_factory(
|
|
44
|
+
status.HTTP_400_BAD_REQUEST, "BadRequest", "Bad request"
|
|
45
|
+
)
|
|
46
|
+
InternalServerErrorException = http_exception_factory(
|
|
47
|
+
status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
48
|
+
"InternalServerError",
|
|
49
|
+
"Internal server error",
|
|
50
|
+
)
|
|
51
|
+
ServiceUnavailableException = http_exception_factory(
|
|
52
|
+
status.HTTP_503_SERVICE_UNAVAILABLE, "ServiceUnavailable", "Service unavailable"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def raise_if_response_error(response: httpx.Response):
|
|
57
|
+
if response.status_code < status.HTTP_400_BAD_REQUEST:
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
error = ErrorResponse.model_validate(response.json())
|
|
61
|
+
|
|
62
|
+
if response.status_code == status.HTTP_404_NOT_FOUND:
|
|
63
|
+
raise NotFoundException(error.message)
|
|
64
|
+
|
|
65
|
+
if (
|
|
66
|
+
response.status_code == status.HTTP_409_CONFLICT
|
|
67
|
+
and error.reason == "AlreadyExists"
|
|
68
|
+
):
|
|
69
|
+
raise AlreadyExistsException(error.message)
|
|
70
|
+
|
|
71
|
+
if response.status_code == status.HTTP_401_UNAUTHORIZED:
|
|
72
|
+
raise UnauthorizedException(error.message)
|
|
73
|
+
|
|
74
|
+
if response.status_code == status.HTTP_403_FORBIDDEN:
|
|
75
|
+
raise ForbiddenException(error.message)
|
|
76
|
+
|
|
77
|
+
if response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY:
|
|
78
|
+
raise InvalidException(error.message)
|
|
79
|
+
|
|
80
|
+
if response.status_code == status.HTTP_400_BAD_REQUEST:
|
|
81
|
+
raise BadRequestException(error.message)
|
|
82
|
+
|
|
83
|
+
if response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR:
|
|
84
|
+
raise InternalServerErrorException(error.message)
|
|
85
|
+
|
|
86
|
+
if response.status_code == status.HTTP_503_SERVICE_UNAVAILABLE:
|
|
87
|
+
raise ServiceUnavailableException(error.message)
|
|
88
|
+
|
|
89
|
+
raise HTTPException(error.code, error.reason, error.message)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ErrorResponse(BaseModel):
|
|
93
|
+
code: int
|
|
94
|
+
reason: str
|
|
95
|
+
message: str
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
error_responses = {
|
|
99
|
+
404: {"model": ErrorResponse},
|
|
100
|
+
409: {"model": ErrorResponse},
|
|
101
|
+
401: {"model": ErrorResponse},
|
|
102
|
+
403: {"model": ErrorResponse},
|
|
103
|
+
422: {"model": ErrorResponse},
|
|
104
|
+
400: {"model": ErrorResponse},
|
|
105
|
+
500: {"model": ErrorResponse},
|
|
106
|
+
503: {"model": ErrorResponse},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def register_handlers(app: FastAPI):
|
|
111
|
+
@app.exception_handler(HTTPException)
|
|
112
|
+
async def http_exception_handler(request: Request, exc: HTTPException):
|
|
113
|
+
return JSONResponse(
|
|
114
|
+
status_code=exc.status_code,
|
|
115
|
+
content=ErrorResponse(
|
|
116
|
+
code=exc.status_code, reason=exc.reason, message=exc.message
|
|
117
|
+
).model_dump(),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
@app.exception_handler(RequestValidationError)
|
|
121
|
+
async def validation_exception_handler(request, exc: RequestValidationError):
|
|
122
|
+
message = f"{len(exc.errors())} validation errors:\n"
|
|
123
|
+
for err in exc.errors():
|
|
124
|
+
message += f" {err}\n"
|
|
125
|
+
|
|
126
|
+
return JSONResponse(
|
|
127
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
128
|
+
content=ErrorResponse(
|
|
129
|
+
code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
130
|
+
reason="Invalid",
|
|
131
|
+
message=message,
|
|
132
|
+
).model_dump(),
|
|
133
|
+
)
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from datetime import date
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from fastapi import Request, Response
|
|
6
|
+
from fastapi.responses import StreamingResponse
|
|
7
|
+
from jwt import DecodeError, ExpiredSignatureError
|
|
8
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
9
|
+
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
|
10
|
+
from gpustack.schemas.model_usage import ModelUsage
|
|
11
|
+
from gpustack.schemas.models import Model
|
|
12
|
+
from gpustack.schemas.users import User
|
|
13
|
+
from gpustack.security import JWT_TOKEN_EXPIRE_MINUTES, JWTManager
|
|
14
|
+
from gpustack.server.auth import SESSION_COOKIE_NAME
|
|
15
|
+
from gpustack.server.db import get_engine
|
|
16
|
+
from sqlmodel.ext.asyncio.session import AsyncSession
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ModelUsageMiddleware(BaseHTTPMiddleware):
|
|
22
|
+
async def dispatch(self, request: Request, call_next):
|
|
23
|
+
response = await call_next(request)
|
|
24
|
+
if (
|
|
25
|
+
not request.url.path == "/v1-openai/chat/completions"
|
|
26
|
+
or response.status_code != 200
|
|
27
|
+
):
|
|
28
|
+
return response
|
|
29
|
+
|
|
30
|
+
stream: bool = getattr(request.state, "stream", False)
|
|
31
|
+
if stream:
|
|
32
|
+
response = await self.handle_streaming_response(request, response)
|
|
33
|
+
else:
|
|
34
|
+
response_body = b"".join([chunk async for chunk in response.body_iterator])
|
|
35
|
+
try:
|
|
36
|
+
completion_dict = json.loads(response_body)
|
|
37
|
+
chat_completion = ChatCompletion(**completion_dict)
|
|
38
|
+
await self.process_model_usage(request, chat_completion)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
logger.error(f"Error processing model usage: {e}")
|
|
41
|
+
response = Response(content=response_body, headers=dict(response.headers))
|
|
42
|
+
|
|
43
|
+
return response
|
|
44
|
+
|
|
45
|
+
async def handle_streaming_response(
|
|
46
|
+
self, request: Request, response: StreamingResponse
|
|
47
|
+
):
|
|
48
|
+
async def streaming_generator():
|
|
49
|
+
try:
|
|
50
|
+
async for chunk in response.body_iterator:
|
|
51
|
+
data = chunk.decode("utf-8")
|
|
52
|
+
yield chunk
|
|
53
|
+
if '"completion_tokens":' in data:
|
|
54
|
+
completion_dict = json.loads(data.split('data: ')[-1])
|
|
55
|
+
completion_chunk = ChatCompletionChunk(**completion_dict)
|
|
56
|
+
await self.process_model_usage(request, completion_chunk)
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
async for chunk in response.body_iterator:
|
|
60
|
+
yield chunk
|
|
61
|
+
except Exception as e:
|
|
62
|
+
logger.error(f"Error processing streaming response: {e}")
|
|
63
|
+
|
|
64
|
+
return StreamingResponse(streaming_generator(), headers=dict(response.headers))
|
|
65
|
+
|
|
66
|
+
async def process_model_usage(
|
|
67
|
+
self, request: Request, chat_completion: ChatCompletion | ChatCompletionChunk
|
|
68
|
+
):
|
|
69
|
+
completion_tokens = chat_completion.usage.completion_tokens
|
|
70
|
+
prompt_tokens = chat_completion.usage.prompt_tokens
|
|
71
|
+
user: User = request.state.user
|
|
72
|
+
model: Model = request.state.model
|
|
73
|
+
fields = {
|
|
74
|
+
"user_id": user.id,
|
|
75
|
+
"model_id": model.id,
|
|
76
|
+
"date": date.today(),
|
|
77
|
+
}
|
|
78
|
+
model_usage = ModelUsage(
|
|
79
|
+
**fields,
|
|
80
|
+
completion_token_count=completion_tokens,
|
|
81
|
+
prompt_token_count=prompt_tokens,
|
|
82
|
+
request_count=1,
|
|
83
|
+
operation="chat_completion",
|
|
84
|
+
)
|
|
85
|
+
async with AsyncSession(get_engine()) as session:
|
|
86
|
+
current_model_usage = await ModelUsage.one_by_fields(session, fields)
|
|
87
|
+
if current_model_usage:
|
|
88
|
+
current_model_usage.completion_token_count += completion_tokens
|
|
89
|
+
current_model_usage.prompt_token_count += prompt_tokens
|
|
90
|
+
current_model_usage.request_count += 1
|
|
91
|
+
await current_model_usage.update(session)
|
|
92
|
+
else:
|
|
93
|
+
await ModelUsage.create(session, model_usage)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class RefreshTokenMiddleware(BaseHTTPMiddleware):
|
|
97
|
+
async def dispatch(self, request: Request, call_next):
|
|
98
|
+
response = await call_next(request)
|
|
99
|
+
|
|
100
|
+
jwt_manager: JWTManager = request.app.state.jwt_manager
|
|
101
|
+
token = request.cookies.get(SESSION_COOKIE_NAME)
|
|
102
|
+
|
|
103
|
+
if token:
|
|
104
|
+
try:
|
|
105
|
+
payload = jwt_manager.decode_jwt_token(token)
|
|
106
|
+
if payload:
|
|
107
|
+
# Check if the token is about to expire (less than 5 minutes left)
|
|
108
|
+
if payload['exp'] - time.time() < 5 * 60:
|
|
109
|
+
new_token = jwt_manager.create_jwt_token(
|
|
110
|
+
username=payload['sub']
|
|
111
|
+
)
|
|
112
|
+
response.set_cookie(
|
|
113
|
+
key=SESSION_COOKIE_NAME,
|
|
114
|
+
value=new_token,
|
|
115
|
+
httponly=True,
|
|
116
|
+
max_age=JWT_TOKEN_EXPIRE_MINUTES * 60,
|
|
117
|
+
expires=JWT_TOKEN_EXPIRE_MINUTES * 60,
|
|
118
|
+
)
|
|
119
|
+
except (ExpiredSignatureError, DecodeError):
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
return response
|
|
File without changes
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from colorama import Fore, Style
|
|
6
|
+
from openai import OpenAI
|
|
7
|
+
from pydantic import model_validator
|
|
8
|
+
from pydantic_settings import BaseSettings
|
|
9
|
+
from tqdm import tqdm
|
|
10
|
+
|
|
11
|
+
from gpustack.client.generated_clientset import ClientSet
|
|
12
|
+
from gpustack.schemas.models import (
|
|
13
|
+
ModelCreate,
|
|
14
|
+
ModelInstance,
|
|
15
|
+
ModelInstanceStateEnum,
|
|
16
|
+
SourceEnum,
|
|
17
|
+
)
|
|
18
|
+
from gpustack.server.bus import Event
|
|
19
|
+
from openai.types.chat import (
|
|
20
|
+
ChatCompletionMessageParam,
|
|
21
|
+
ChatCompletionUserMessageParam,
|
|
22
|
+
ChatCompletionAssistantMessageParam,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ChatConfig(BaseSettings):
|
|
27
|
+
debug: bool = False
|
|
28
|
+
model: str
|
|
29
|
+
prompt: Optional[str] = None
|
|
30
|
+
base_url: str = os.getenv("GPUSTACK_SERVER_URL", "http://127.0.0.1")
|
|
31
|
+
api_key: Optional[str] = os.getenv("GPUSTACK_API_KEY")
|
|
32
|
+
|
|
33
|
+
@model_validator(mode="after")
|
|
34
|
+
def check_api_key(self):
|
|
35
|
+
if self.base_url != "http://127.0.0.1" and not self.api_key:
|
|
36
|
+
raise ValueError(
|
|
37
|
+
"API key is required. Please set GPUSTACK_API_KEY env var."
|
|
38
|
+
)
|
|
39
|
+
elif self.base_url == "http://127.0.0.1" and not self.api_key:
|
|
40
|
+
self.api_key = "local"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def parse_arguments(args) -> ChatConfig:
|
|
44
|
+
return ChatConfig(debug=args.debug, model=args.model, prompt=args.prompt)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def print_completion_result(message):
|
|
48
|
+
# move cursor to the end of previous line
|
|
49
|
+
sys.stdout.write("\033[F\033[1000C")
|
|
50
|
+
print(message)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def print_error(message):
|
|
54
|
+
print(f"{Fore.RED}{message}{Style.RESET_ALL}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ChatManager:
|
|
58
|
+
def __init__(self, cfg: ChatConfig) -> None:
|
|
59
|
+
self._model_name = cfg.model
|
|
60
|
+
self._prompt = cfg.prompt
|
|
61
|
+
self._clientset = ClientSet(base_url=cfg.base_url, api_key=cfg.api_key)
|
|
62
|
+
self._openai_client = OpenAI(
|
|
63
|
+
base_url=f"{cfg.base_url}/v1-openai", api_key=cfg.api_key
|
|
64
|
+
)
|
|
65
|
+
self._history: List[ChatCompletionMessageParam] = []
|
|
66
|
+
|
|
67
|
+
def start(self):
|
|
68
|
+
self._ensure_model()
|
|
69
|
+
|
|
70
|
+
if self._prompt:
|
|
71
|
+
self.chat_completion(self._prompt)
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
user_input = None
|
|
75
|
+
while True:
|
|
76
|
+
user_input = input(">")
|
|
77
|
+
if user_input == "\\q" or user_input == "\\quit":
|
|
78
|
+
break
|
|
79
|
+
elif user_input == "\\?" or user_input == "\\h" or user_input == "\\help":
|
|
80
|
+
self._print_help()
|
|
81
|
+
continue
|
|
82
|
+
elif user_input == "\\c" or user_input == "\\clear":
|
|
83
|
+
self._clear_context()
|
|
84
|
+
continue
|
|
85
|
+
elif not user_input.strip():
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
self.chat_completion(user_input)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
print_error(e)
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def _print_help():
|
|
95
|
+
print("Commands:")
|
|
96
|
+
print(" \\q or \\quit - Quit the chat")
|
|
97
|
+
print(" \\c or \\clear - Clear chat context in prompt")
|
|
98
|
+
print(" \\h or \\? or \\help - Print this help message")
|
|
99
|
+
|
|
100
|
+
def _clear_context(self):
|
|
101
|
+
self._history = []
|
|
102
|
+
print("Chat context cleared.")
|
|
103
|
+
|
|
104
|
+
def _ensure_model(self):
|
|
105
|
+
models = self._clientset.models.list()
|
|
106
|
+
for model in models.items:
|
|
107
|
+
if model.name == self._model_name:
|
|
108
|
+
self._model = model
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
if not hasattr(self, "_model"):
|
|
112
|
+
self._create_model()
|
|
113
|
+
|
|
114
|
+
self._wait_for_model_ready()
|
|
115
|
+
|
|
116
|
+
def _create_model(self):
|
|
117
|
+
model_create = ModelCreate(
|
|
118
|
+
name=self._model_name,
|
|
119
|
+
source=SourceEnum.OLLAMA_LIBRARY,
|
|
120
|
+
ollama_library_model_name=self._model_name,
|
|
121
|
+
)
|
|
122
|
+
created = self._clientset.models.create(model_create=model_create)
|
|
123
|
+
self._model = created
|
|
124
|
+
|
|
125
|
+
def _wait_for_model_ready(self):
|
|
126
|
+
def stop_when_running(event: Event) -> bool:
|
|
127
|
+
if event.data["id"] == self._model.id and event.data["state"] == "Running":
|
|
128
|
+
return True
|
|
129
|
+
elif event.data["state"] == ModelInstanceStateEnum.error:
|
|
130
|
+
raise Exception(f"Error running model: {event.data['state_message']}")
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
with tqdm(
|
|
134
|
+
total=0,
|
|
135
|
+
desc=f"Preparing {self._model_name} model...",
|
|
136
|
+
bar_format="{desc}",
|
|
137
|
+
leave=False,
|
|
138
|
+
) as pbar:
|
|
139
|
+
current_progress = 0
|
|
140
|
+
|
|
141
|
+
def print_progress(event: Event):
|
|
142
|
+
nonlocal current_progress
|
|
143
|
+
mi = ModelInstance.model_validate(event.data)
|
|
144
|
+
if mi.download_progress is not None:
|
|
145
|
+
increment = mi.download_progress - current_progress
|
|
146
|
+
if increment <= 0:
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
if pbar.total == 0:
|
|
150
|
+
pbar.total = 100
|
|
151
|
+
pbar.bar_format = "{l_bar}{bar}{r_bar}"
|
|
152
|
+
pbar.set_description(f"Downloading {self._model_name} model")
|
|
153
|
+
pbar.reset()
|
|
154
|
+
|
|
155
|
+
pbar.update(increment)
|
|
156
|
+
current_progress = mi.download_progress
|
|
157
|
+
|
|
158
|
+
self._clientset.model_instances.watch(
|
|
159
|
+
stop_condition=stop_when_running,
|
|
160
|
+
callback=print_progress,
|
|
161
|
+
params={"model_id": self._model.id},
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def chat_completion(self, prompt: str):
|
|
165
|
+
self._history.append(
|
|
166
|
+
ChatCompletionUserMessageParam(role="user", content=prompt)
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
completion = self._openai_client.chat.completions.create(
|
|
170
|
+
model=self._model_name,
|
|
171
|
+
messages=self._history,
|
|
172
|
+
stream=True,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
result = ""
|
|
176
|
+
for chunk in completion:
|
|
177
|
+
if chunk.choices[0].delta.content:
|
|
178
|
+
result += chunk.choices[0].delta.content
|
|
179
|
+
print(chunk.choices[0].delta.content, end="", flush=True)
|
|
180
|
+
|
|
181
|
+
self._history.append(
|
|
182
|
+
ChatCompletionAssistantMessageParam(role="assistant", content=result)
|
|
183
|
+
)
|
|
184
|
+
print()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from .generated_http_client import HTTPClient
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from .generated_worker_client import WorkerClient
|
|
6
|
+
from .generated_model_client import ModelClient
|
|
7
|
+
from .generated_model_instance_client import ModelInstanceClient
|
|
8
|
+
from .generated_user_client import UserClient
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ClientSet:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
base_url: str,
|
|
15
|
+
api_key: Optional[str] = None,
|
|
16
|
+
username: Optional[str] = None,
|
|
17
|
+
password: Optional[str] = None,
|
|
18
|
+
headers: Optional[dict] = None,
|
|
19
|
+
):
|
|
20
|
+
if headers is None:
|
|
21
|
+
headers = {}
|
|
22
|
+
|
|
23
|
+
if api_key:
|
|
24
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
25
|
+
elif username and password:
|
|
26
|
+
base64_credentials = base64.b64encode(
|
|
27
|
+
f"{username}:{password}".encode()
|
|
28
|
+
).decode()
|
|
29
|
+
headers["Authorization"] = f"Basic {base64_credentials}"
|
|
30
|
+
|
|
31
|
+
self.base_url = base_url
|
|
32
|
+
self.headers = headers
|
|
33
|
+
|
|
34
|
+
http_client = HTTPClient(base_url=base_url).with_headers(headers)
|
|
35
|
+
|
|
36
|
+
self.workers = WorkerClient(http_client)
|
|
37
|
+
self.models = ModelClient(http_client)
|
|
38
|
+
self.model_instances = ModelInstanceClient(http_client)
|
|
39
|
+
self.users = UserClient(http_client)
|