gpustack 0.1.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. gpustack-0.1.0rc1/PKG-INFO +33 -0
  2. gpustack-0.1.0rc1/gpustack/__init__.py +2 -0
  3. gpustack-0.1.0rc1/gpustack/api/exceptions.py +133 -0
  4. gpustack-0.1.0rc1/gpustack/api/middlewares.py +122 -0
  5. gpustack-0.1.0rc1/gpustack/chat/__init__.py +0 -0
  6. gpustack-0.1.0rc1/gpustack/chat/manager.py +184 -0
  7. gpustack-0.1.0rc1/gpustack/client/__init__.py +4 -0
  8. gpustack-0.1.0rc1/gpustack/client/generated_clientset.py +39 -0
  9. gpustack-0.1.0rc1/gpustack/client/generated_http_client.py +286 -0
  10. gpustack-0.1.0rc1/gpustack/client/generated_model_client.py +69 -0
  11. gpustack-0.1.0rc1/gpustack/client/generated_model_instance_client.py +69 -0
  12. gpustack-0.1.0rc1/gpustack/client/generated_user_client.py +69 -0
  13. gpustack-0.1.0rc1/gpustack/client/generated_worker_client.py +69 -0
  14. gpustack-0.1.0rc1/gpustack/cmd/__init__.py +3 -0
  15. gpustack-0.1.0rc1/gpustack/cmd/chat.py +31 -0
  16. gpustack-0.1.0rc1/gpustack/cmd/start.py +231 -0
  17. gpustack-0.1.0rc1/gpustack/cmd/version.py +24 -0
  18. gpustack-0.1.0rc1/gpustack/codegen/__init__.py +0 -0
  19. gpustack-0.1.0rc1/gpustack/codegen/filters.py +17 -0
  20. gpustack-0.1.0rc1/gpustack/codegen/generate.py +91 -0
  21. gpustack-0.1.0rc1/gpustack/codegen/templates/client.py.jinja +70 -0
  22. gpustack-0.1.0rc1/gpustack/codegen/templates/clientset.py.jinja +35 -0
  23. gpustack-0.1.0rc1/gpustack/codegen/templates/http_client.py.jinja +286 -0
  24. gpustack-0.1.0rc1/gpustack/config/__init__.py +4 -0
  25. gpustack-0.1.0rc1/gpustack/config/config.py +155 -0
  26. gpustack-0.1.0rc1/gpustack/http_proxy/__init__.py +0 -0
  27. gpustack-0.1.0rc1/gpustack/http_proxy/load_balancer.py +17 -0
  28. gpustack-0.1.0rc1/gpustack/http_proxy/strategies.py +35 -0
  29. gpustack-0.1.0rc1/gpustack/logging.py +29 -0
  30. gpustack-0.1.0rc1/gpustack/main.py +44 -0
  31. gpustack-0.1.0rc1/gpustack/migrations/README +1 -0
  32. gpustack-0.1.0rc1/gpustack/migrations/env.py +82 -0
  33. gpustack-0.1.0rc1/gpustack/migrations/script.py.mako +28 -0
  34. gpustack-0.1.0rc1/gpustack/migrations/versions/2024_06_28_1630-4f4ec0a5fcb3_init_tables.py +105 -0
  35. gpustack-0.1.0rc1/gpustack/migrations/versions/2024_06_28_1831-a16a55af6f75_add_instance_name.py +34 -0
  36. gpustack-0.1.0rc1/gpustack/mixins/__init__.py +6 -0
  37. gpustack-0.1.0rc1/gpustack/mixins/active_record.py +323 -0
  38. gpustack-0.1.0rc1/gpustack/mixins/timestamp.py +27 -0
  39. gpustack-0.1.0rc1/gpustack/routes/__init__.py +0 -0
  40. gpustack-0.1.0rc1/gpustack/routes/api_keys.py +86 -0
  41. gpustack-0.1.0rc1/gpustack/routes/auth.py +64 -0
  42. gpustack-0.1.0rc1/gpustack/routes/dashboard.py +269 -0
  43. gpustack-0.1.0rc1/gpustack/routes/gpu_devices.py +42 -0
  44. gpustack-0.1.0rc1/gpustack/routes/model_instances.py +154 -0
  45. gpustack-0.1.0rc1/gpustack/routes/models.py +115 -0
  46. gpustack-0.1.0rc1/gpustack/routes/openai.py +108 -0
  47. gpustack-0.1.0rc1/gpustack/routes/probes.py +14 -0
  48. gpustack-0.1.0rc1/gpustack/routes/routes.py +54 -0
  49. gpustack-0.1.0rc1/gpustack/routes/ui.py +21 -0
  50. gpustack-0.1.0rc1/gpustack/routes/users.py +117 -0
  51. gpustack-0.1.0rc1/gpustack/routes/workers.py +86 -0
  52. gpustack-0.1.0rc1/gpustack/scheduler/calculator.py +171 -0
  53. gpustack-0.1.0rc1/gpustack/scheduler/policy.py +153 -0
  54. gpustack-0.1.0rc1/gpustack/scheduler/queue.py +34 -0
  55. gpustack-0.1.0rc1/gpustack/scheduler/scheduler.py +207 -0
  56. gpustack-0.1.0rc1/gpustack/schemas/__init__.py +76 -0
  57. gpustack-0.1.0rc1/gpustack/schemas/api_keys.py +34 -0
  58. gpustack-0.1.0rc1/gpustack/schemas/common.py +83 -0
  59. gpustack-0.1.0rc1/gpustack/schemas/dashboard.py +73 -0
  60. gpustack-0.1.0rc1/gpustack/schemas/gpu_devices.py +27 -0
  61. gpustack-0.1.0rc1/gpustack/schemas/model_usage.py +25 -0
  62. gpustack-0.1.0rc1/gpustack/schemas/models.py +144 -0
  63. gpustack-0.1.0rc1/gpustack/schemas/stmt.py +20 -0
  64. gpustack-0.1.0rc1/gpustack/schemas/system_load.py +26 -0
  65. gpustack-0.1.0rc1/gpustack/schemas/users.py +66 -0
  66. gpustack-0.1.0rc1/gpustack/schemas/workers.py +140 -0
  67. gpustack-0.1.0rc1/gpustack/security.py +62 -0
  68. gpustack-0.1.0rc1/gpustack/server/__init__.py +0 -0
  69. gpustack-0.1.0rc1/gpustack/server/app.py +22 -0
  70. gpustack-0.1.0rc1/gpustack/server/auth.py +175 -0
  71. gpustack-0.1.0rc1/gpustack/server/bus.py +64 -0
  72. gpustack-0.1.0rc1/gpustack/server/controller.py +72 -0
  73. gpustack-0.1.0rc1/gpustack/server/db.py +59 -0
  74. gpustack-0.1.0rc1/gpustack/server/deps.py +12 -0
  75. gpustack-0.1.0rc1/gpustack/server/server.py +182 -0
  76. gpustack-0.1.0rc1/gpustack/server/system_load.py +113 -0
  77. gpustack-0.1.0rc1/gpustack/server/worker_syncer.py +52 -0
  78. gpustack-0.1.0rc1/gpustack/third_party/fastfetch/fastfetch-linux-aarch64 +0 -0
  79. gpustack-0.1.0rc1/gpustack/third_party/fastfetch/fastfetch-linux-amd64 +0 -0
  80. gpustack-0.1.0rc1/gpustack/third_party/fastfetch/fastfetch-macos-universal +0 -0
  81. gpustack-0.1.0rc1/gpustack/third_party/gguf-parser/gguf-parser-darwin-universal +0 -0
  82. gpustack-0.1.0rc1/gpustack/third_party/gguf-parser/gguf-parser-linux-amd64 +0 -0
  83. gpustack-0.1.0rc1/gpustack/third_party/gguf-parser/gguf-parser-linux-arm64 +0 -0
  84. gpustack-0.1.0rc1/gpustack/third_party/llama-box/llama-box-darwin-amd64-metal +0 -0
  85. gpustack-0.1.0rc1/gpustack/third_party/llama-box/llama-box-darwin-arm64-metal +0 -0
  86. gpustack-0.1.0rc1/gpustack/third_party/llama-box/llama-box-linux-amd64-cuda-12.5 +0 -0
  87. gpustack-0.1.0rc1/gpustack/ui/css/layouts__index.1720086314638.chunk.css +1 -0
  88. gpustack-0.1.0rc1/gpustack/ui/css/p__api-keys__index.1720086314638.chunk.css +1 -0
  89. gpustack-0.1.0rc1/gpustack/ui/css/p__dashboard__index.1720086314638.chunk.css +1 -0
  90. gpustack-0.1.0rc1/gpustack/ui/css/p__llmodels__index.1720086314638.chunk.css +1 -0
  91. gpustack-0.1.0rc1/gpustack/ui/css/p__login__index.1720086314638.chunk.css +1 -0
  92. gpustack-0.1.0rc1/gpustack/ui/css/p__playground__index.1720086314638.chunk.css +1 -0
  93. gpustack-0.1.0rc1/gpustack/ui/css/p__profile__index.1720086314638.chunk.css +1 -0
  94. gpustack-0.1.0rc1/gpustack/ui/css/p__resources__index.1720086314638.chunk.css +1 -0
  95. gpustack-0.1.0rc1/gpustack/ui/css/p__users__index.1720086314638.chunk.css +1 -0
  96. gpustack-0.1.0rc1/gpustack/ui/css/umi.1720086314638.css +1 -0
  97. gpustack-0.1.0rc1/gpustack/ui/css/umi.1720086314638.css.gz +0 -0
  98. gpustack-0.1.0rc1/gpustack/ui/index.html +15 -0
  99. gpustack-0.1.0rc1/gpustack/ui/js/0.1720086314638.chunk.js +1 -0
  100. gpustack-0.1.0rc1/gpustack/ui/js/0.1720086314638.chunk.js.gz +0 -0
  101. gpustack-0.1.0rc1/gpustack/ui/js/242.1720086314638.chunk.js +1 -0
  102. gpustack-0.1.0rc1/gpustack/ui/js/242.1720086314638.chunk.js.gz +0 -0
  103. gpustack-0.1.0rc1/gpustack/ui/js/255.1720086314638.chunk.js +1 -0
  104. gpustack-0.1.0rc1/gpustack/ui/js/255.1720086314638.chunk.js.gz +0 -0
  105. gpustack-0.1.0rc1/gpustack/ui/js/259.1720086314638.chunk.js +1 -0
  106. gpustack-0.1.0rc1/gpustack/ui/js/302.1720086314638.chunk.js +1 -0
  107. gpustack-0.1.0rc1/gpustack/ui/js/347.1720086314638.chunk.js +1 -0
  108. gpustack-0.1.0rc1/gpustack/ui/js/347.1720086314638.chunk.js.gz +0 -0
  109. gpustack-0.1.0rc1/gpustack/ui/js/349.1720086314638.chunk.js +1 -0
  110. gpustack-0.1.0rc1/gpustack/ui/js/349.1720086314638.chunk.js.gz +0 -0
  111. gpustack-0.1.0rc1/gpustack/ui/js/393.1720086314638.chunk.js +1 -0
  112. gpustack-0.1.0rc1/gpustack/ui/js/423.1720086314638.chunk.js +1 -0
  113. gpustack-0.1.0rc1/gpustack/ui/js/423.1720086314638.chunk.js.gz +0 -0
  114. gpustack-0.1.0rc1/gpustack/ui/js/431.1720086314638.chunk.js +1 -0
  115. gpustack-0.1.0rc1/gpustack/ui/js/431.1720086314638.chunk.js.gz +0 -0
  116. gpustack-0.1.0rc1/gpustack/ui/js/522.1720086314638.chunk.js +1 -0
  117. gpustack-0.1.0rc1/gpustack/ui/js/522.1720086314638.chunk.js.gz +0 -0
  118. gpustack-0.1.0rc1/gpustack/ui/js/530.1720086314638.chunk.js +1 -0
  119. gpustack-0.1.0rc1/gpustack/ui/js/582.1720086314638.chunk.js +1 -0
  120. gpustack-0.1.0rc1/gpustack/ui/js/582.1720086314638.chunk.js.gz +0 -0
  121. gpustack-0.1.0rc1/gpustack/ui/js/602.1720086314638.chunk.js +1 -0
  122. gpustack-0.1.0rc1/gpustack/ui/js/602.1720086314638.chunk.js.gz +0 -0
  123. gpustack-0.1.0rc1/gpustack/ui/js/70.1720086314638.chunk.js +1 -0
  124. gpustack-0.1.0rc1/gpustack/ui/js/70.1720086314638.chunk.js.gz +0 -0
  125. gpustack-0.1.0rc1/gpustack/ui/js/731.1720086314638.chunk.js +1 -0
  126. gpustack-0.1.0rc1/gpustack/ui/js/731.1720086314638.chunk.js.gz +0 -0
  127. gpustack-0.1.0rc1/gpustack/ui/js/762.1720086314638.chunk.js +1 -0
  128. gpustack-0.1.0rc1/gpustack/ui/js/762.1720086314638.chunk.js.gz +0 -0
  129. gpustack-0.1.0rc1/gpustack/ui/js/798.1720086314638.chunk.js +1 -0
  130. gpustack-0.1.0rc1/gpustack/ui/js/86.1720086314638.chunk.js +1 -0
  131. gpustack-0.1.0rc1/gpustack/ui/js/86.1720086314638.chunk.js.gz +0 -0
  132. gpustack-0.1.0rc1/gpustack/ui/js/921.1720086314638.chunk.js +1 -0
  133. gpustack-0.1.0rc1/gpustack/ui/js/921.1720086314638.chunk.js.gz +0 -0
  134. gpustack-0.1.0rc1/gpustack/ui/js/927.1720086314638.chunk.js +1 -0
  135. gpustack-0.1.0rc1/gpustack/ui/js/927.1720086314638.chunk.js.gz +0 -0
  136. gpustack-0.1.0rc1/gpustack/ui/js/950.1720086314638.chunk.js +1 -0
  137. gpustack-0.1.0rc1/gpustack/ui/js/981.1720086314638.chunk.js +1 -0
  138. gpustack-0.1.0rc1/gpustack/ui/js/981.1720086314638.chunk.js.gz +0 -0
  139. gpustack-0.1.0rc1/gpustack/ui/js/layouts__index.1720086314638.chunk.js +1 -0
  140. gpustack-0.1.0rc1/gpustack/ui/js/p__404.1720086314638.chunk.js +1 -0
  141. gpustack-0.1.0rc1/gpustack/ui/js/p__api-keys__index.1720086314638.chunk.js +1 -0
  142. gpustack-0.1.0rc1/gpustack/ui/js/p__api-keys__index.1720086314638.chunk.js.gz +0 -0
  143. gpustack-0.1.0rc1/gpustack/ui/js/p__dashboard__index.1720086314638.chunk.js +1 -0
  144. gpustack-0.1.0rc1/gpustack/ui/js/p__dashboard__index.1720086314638.chunk.js.gz +0 -0
  145. gpustack-0.1.0rc1/gpustack/ui/js/p__llmodels__index.1720086314638.chunk.js +1 -0
  146. gpustack-0.1.0rc1/gpustack/ui/js/p__llmodels__index.1720086314638.chunk.js.gz +0 -0
  147. gpustack-0.1.0rc1/gpustack/ui/js/p__login__index.1720086314638.chunk.js +1 -0
  148. gpustack-0.1.0rc1/gpustack/ui/js/p__login__index.1720086314638.chunk.js.gz +0 -0
  149. gpustack-0.1.0rc1/gpustack/ui/js/p__playground__index.1720086314638.chunk.js +1 -0
  150. gpustack-0.1.0rc1/gpustack/ui/js/p__playground__index.1720086314638.chunk.js.gz +0 -0
  151. gpustack-0.1.0rc1/gpustack/ui/js/p__profile__index.1720086314638.chunk.js +1 -0
  152. gpustack-0.1.0rc1/gpustack/ui/js/p__resources__index.1720086314638.chunk.js +1 -0
  153. gpustack-0.1.0rc1/gpustack/ui/js/p__resources__index.1720086314638.chunk.js.gz +0 -0
  154. gpustack-0.1.0rc1/gpustack/ui/js/p__users__index.1720086314638.chunk.js +1 -0
  155. gpustack-0.1.0rc1/gpustack/ui/js/p__users__index.1720086314638.chunk.js.gz +0 -0
  156. gpustack-0.1.0rc1/gpustack/ui/js/umi.1720086314638.js +1 -0
  157. gpustack-0.1.0rc1/gpustack/ui/js/umi.1720086314638.js.gz +0 -0
  158. gpustack-0.1.0rc1/gpustack/ui/static/avatar.d61efc48.png +0 -0
  159. gpustack-0.1.0rc1/gpustack/ui/static/favicon.png +0 -0
  160. gpustack-0.1.0rc1/gpustack/ui/static/gpustack-logo.58d53008.png +0 -0
  161. gpustack-0.1.0rc1/gpustack/utils.py +88 -0
  162. gpustack-0.1.0rc1/gpustack/worker/__init__.py +3 -0
  163. gpustack-0.1.0rc1/gpustack/worker/collector.py +249 -0
  164. gpustack-0.1.0rc1/gpustack/worker/downloaders.py +177 -0
  165. gpustack-0.1.0rc1/gpustack/worker/exporter.py +254 -0
  166. gpustack-0.1.0rc1/gpustack/worker/inference_server.py +206 -0
  167. gpustack-0.1.0rc1/gpustack/worker/logs.py +65 -0
  168. gpustack-0.1.0rc1/gpustack/worker/serve_manager.py +171 -0
  169. gpustack-0.1.0rc1/gpustack/worker/worker.py +112 -0
  170. gpustack-0.1.0rc1/gpustack/worker/worker_manager.py +100 -0
  171. gpustack-0.1.0rc1/pyproject.toml +63 -0
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.1
2
+ Name: gpustack
3
+ Version: 0.1.0rc1
4
+ Summary: GPUStack
5
+ Author: GPUStack Authors
6
+ Author-email: contact@gpustack.ai
7
+ Requires-Python: >=3.11,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Requires-Dist: aiosqlite (>=0.20.0,<0.21.0)
12
+ Requires-Dist: alembic (>=1.13.2,<2.0.0)
13
+ Requires-Dist: apscheduler (>=3.10.4,<4.0.0)
14
+ Requires-Dist: argon2-cffi (>=23.1.0,<24.0.0)
15
+ Requires-Dist: asyncpg (>=0.29.0,<0.30.0)
16
+ Requires-Dist: attrs (>=23.2.0,<24.0.0)
17
+ Requires-Dist: colorama (>=0.4.6,<0.5.0)
18
+ Requires-Dist: dataclasses-json (>=0.6.7,<0.7.0)
19
+ Requires-Dist: fastapi (>=0.111.0,<0.112.0)
20
+ Requires-Dist: httpx[socks] (>=0.27.0,<0.28.0)
21
+ Requires-Dist: huggingface-hub (>=0.23.3,<0.24.0)
22
+ Requires-Dist: inflection (>=0.5.1,<0.6.0)
23
+ Requires-Dist: netifaces (>=0.11.0,<0.12.0)
24
+ Requires-Dist: openai (>=1.31.1,<2.0.0)
25
+ Requires-Dist: prometheus-client (>=0.20.0,<0.21.0)
26
+ Requires-Dist: psutil (>=5.9.8,<6.0.0)
27
+ Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
28
+ Requires-Dist: pyjwt (>=2.8.0,<3.0.0)
29
+ Requires-Dist: python-multipart (>=0.0.9,<0.0.10)
30
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
31
+ Requires-Dist: setproctitle (>=1.3.3,<2.0.0)
32
+ Requires-Dist: sqlalchemy[asyncio] (>=2.0.30,<3.0.0)
33
+ Requires-Dist: sqlmodel (>=0.0.18,<0.0.19)
@@ -0,0 +1,2 @@
1
+ __version__ = '0.1.0rc1'
2
+ __git_commit__ = 'cdab9a9'
@@ -0,0 +1,133 @@
1
+ from fastapi import FastAPI, Request, status
2
+ from fastapi.exceptions import RequestValidationError
3
+ from fastapi.responses import JSONResponse
4
+ import httpx
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class HTTPException(Exception):
9
+ def __init__(self, status_code: int, reason: str, message: str):
10
+ self.status_code = status_code
11
+ self.reason = reason
12
+ self.message = message
13
+
14
+
15
+ def http_exception_factory(status_code: int, reason: str, default_message: str):
16
+ class_name = reason + "Exception"
17
+ return type(
18
+ class_name,
19
+ (HTTPException,),
20
+ {
21
+ "__init__": lambda self, message=default_message: super(
22
+ self.__class__, self
23
+ ).__init__(status_code, reason, message)
24
+ },
25
+ )
26
+
27
+
28
+ AlreadyExistsException = http_exception_factory(
29
+ status.HTTP_409_CONFLICT, "AlreadyExists", "Already exists"
30
+ )
31
+ NotFoundException = http_exception_factory(
32
+ status.HTTP_404_NOT_FOUND, "NotFound", "Not found"
33
+ )
34
+ UnauthorizedException = http_exception_factory(
35
+ status.HTTP_401_UNAUTHORIZED, "Unauthorized", "Unauthorized"
36
+ )
37
+ ForbiddenException = http_exception_factory(
38
+ status.HTTP_403_FORBIDDEN, "Forbidden", "Forbidden"
39
+ )
40
+ InvalidException = http_exception_factory(
41
+ status.HTTP_422_UNPROCESSABLE_ENTITY, "Invalid", "Invalid input"
42
+ )
43
+ BadRequestException = http_exception_factory(
44
+ status.HTTP_400_BAD_REQUEST, "BadRequest", "Bad request"
45
+ )
46
+ InternalServerErrorException = http_exception_factory(
47
+ status.HTTP_500_INTERNAL_SERVER_ERROR,
48
+ "InternalServerError",
49
+ "Internal server error",
50
+ )
51
+ ServiceUnavailableException = http_exception_factory(
52
+ status.HTTP_503_SERVICE_UNAVAILABLE, "ServiceUnavailable", "Service unavailable"
53
+ )
54
+
55
+
56
+ def raise_if_response_error(response: httpx.Response):
57
+ if response.status_code < status.HTTP_400_BAD_REQUEST:
58
+ return
59
+
60
+ error = ErrorResponse.model_validate(response.json())
61
+
62
+ if response.status_code == status.HTTP_404_NOT_FOUND:
63
+ raise NotFoundException(error.message)
64
+
65
+ if (
66
+ response.status_code == status.HTTP_409_CONFLICT
67
+ and error.reason == "AlreadyExists"
68
+ ):
69
+ raise AlreadyExistsException(error.message)
70
+
71
+ if response.status_code == status.HTTP_401_UNAUTHORIZED:
72
+ raise UnauthorizedException(error.message)
73
+
74
+ if response.status_code == status.HTTP_403_FORBIDDEN:
75
+ raise ForbiddenException(error.message)
76
+
77
+ if response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY:
78
+ raise InvalidException(error.message)
79
+
80
+ if response.status_code == status.HTTP_400_BAD_REQUEST:
81
+ raise BadRequestException(error.message)
82
+
83
+ if response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR:
84
+ raise InternalServerErrorException(error.message)
85
+
86
+ if response.status_code == status.HTTP_503_SERVICE_UNAVAILABLE:
87
+ raise ServiceUnavailableException(error.message)
88
+
89
+ raise HTTPException(error.code, error.reason, error.message)
90
+
91
+
92
+ class ErrorResponse(BaseModel):
93
+ code: int
94
+ reason: str
95
+ message: str
96
+
97
+
98
+ error_responses = {
99
+ 404: {"model": ErrorResponse},
100
+ 409: {"model": ErrorResponse},
101
+ 401: {"model": ErrorResponse},
102
+ 403: {"model": ErrorResponse},
103
+ 422: {"model": ErrorResponse},
104
+ 400: {"model": ErrorResponse},
105
+ 500: {"model": ErrorResponse},
106
+ 503: {"model": ErrorResponse},
107
+ }
108
+
109
+
110
+ def register_handlers(app: FastAPI):
111
+ @app.exception_handler(HTTPException)
112
+ async def http_exception_handler(request: Request, exc: HTTPException):
113
+ return JSONResponse(
114
+ status_code=exc.status_code,
115
+ content=ErrorResponse(
116
+ code=exc.status_code, reason=exc.reason, message=exc.message
117
+ ).model_dump(),
118
+ )
119
+
120
+ @app.exception_handler(RequestValidationError)
121
+ async def validation_exception_handler(request, exc: RequestValidationError):
122
+ message = f"{len(exc.errors())} validation errors:\n"
123
+ for err in exc.errors():
124
+ message += f" {err}\n"
125
+
126
+ return JSONResponse(
127
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
128
+ content=ErrorResponse(
129
+ code=status.HTTP_422_UNPROCESSABLE_ENTITY,
130
+ reason="Invalid",
131
+ message=message,
132
+ ).model_dump(),
133
+ )
@@ -0,0 +1,122 @@
1
+ from datetime import date
2
+ import json
3
+ import logging
4
+ import time
5
+ from fastapi import Request, Response
6
+ from fastapi.responses import StreamingResponse
7
+ from jwt import DecodeError, ExpiredSignatureError
8
+ from starlette.middleware.base import BaseHTTPMiddleware
9
+ from openai.types.chat import ChatCompletion, ChatCompletionChunk
10
+ from gpustack.schemas.model_usage import ModelUsage
11
+ from gpustack.schemas.models import Model
12
+ from gpustack.schemas.users import User
13
+ from gpustack.security import JWT_TOKEN_EXPIRE_MINUTES, JWTManager
14
+ from gpustack.server.auth import SESSION_COOKIE_NAME
15
+ from gpustack.server.db import get_engine
16
+ from sqlmodel.ext.asyncio.session import AsyncSession
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class ModelUsageMiddleware(BaseHTTPMiddleware):
22
+ async def dispatch(self, request: Request, call_next):
23
+ response = await call_next(request)
24
+ if (
25
+ not request.url.path == "/v1-openai/chat/completions"
26
+ or response.status_code != 200
27
+ ):
28
+ return response
29
+
30
+ stream: bool = getattr(request.state, "stream", False)
31
+ if stream:
32
+ response = await self.handle_streaming_response(request, response)
33
+ else:
34
+ response_body = b"".join([chunk async for chunk in response.body_iterator])
35
+ try:
36
+ completion_dict = json.loads(response_body)
37
+ chat_completion = ChatCompletion(**completion_dict)
38
+ await self.process_model_usage(request, chat_completion)
39
+ except Exception as e:
40
+ logger.error(f"Error processing model usage: {e}")
41
+ response = Response(content=response_body, headers=dict(response.headers))
42
+
43
+ return response
44
+
45
+ async def handle_streaming_response(
46
+ self, request: Request, response: StreamingResponse
47
+ ):
48
+ async def streaming_generator():
49
+ try:
50
+ async for chunk in response.body_iterator:
51
+ data = chunk.decode("utf-8")
52
+ yield chunk
53
+ if '"completion_tokens":' in data:
54
+ completion_dict = json.loads(data.split('data: ')[-1])
55
+ completion_chunk = ChatCompletionChunk(**completion_dict)
56
+ await self.process_model_usage(request, completion_chunk)
57
+ break
58
+
59
+ async for chunk in response.body_iterator:
60
+ yield chunk
61
+ except Exception as e:
62
+ logger.error(f"Error processing streaming response: {e}")
63
+
64
+ return StreamingResponse(streaming_generator(), headers=dict(response.headers))
65
+
66
+ async def process_model_usage(
67
+ self, request: Request, chat_completion: ChatCompletion | ChatCompletionChunk
68
+ ):
69
+ completion_tokens = chat_completion.usage.completion_tokens
70
+ prompt_tokens = chat_completion.usage.prompt_tokens
71
+ user: User = request.state.user
72
+ model: Model = request.state.model
73
+ fields = {
74
+ "user_id": user.id,
75
+ "model_id": model.id,
76
+ "date": date.today(),
77
+ }
78
+ model_usage = ModelUsage(
79
+ **fields,
80
+ completion_token_count=completion_tokens,
81
+ prompt_token_count=prompt_tokens,
82
+ request_count=1,
83
+ operation="chat_completion",
84
+ )
85
+ async with AsyncSession(get_engine()) as session:
86
+ current_model_usage = await ModelUsage.one_by_fields(session, fields)
87
+ if current_model_usage:
88
+ current_model_usage.completion_token_count += completion_tokens
89
+ current_model_usage.prompt_token_count += prompt_tokens
90
+ current_model_usage.request_count += 1
91
+ await current_model_usage.update(session)
92
+ else:
93
+ await ModelUsage.create(session, model_usage)
94
+
95
+
96
+ class RefreshTokenMiddleware(BaseHTTPMiddleware):
97
+ async def dispatch(self, request: Request, call_next):
98
+ response = await call_next(request)
99
+
100
+ jwt_manager: JWTManager = request.app.state.jwt_manager
101
+ token = request.cookies.get(SESSION_COOKIE_NAME)
102
+
103
+ if token:
104
+ try:
105
+ payload = jwt_manager.decode_jwt_token(token)
106
+ if payload:
107
+ # Check if the token is about to expire (less than 5 minutes left)
108
+ if payload['exp'] - time.time() < 5 * 60:
109
+ new_token = jwt_manager.create_jwt_token(
110
+ username=payload['sub']
111
+ )
112
+ response.set_cookie(
113
+ key=SESSION_COOKIE_NAME,
114
+ value=new_token,
115
+ httponly=True,
116
+ max_age=JWT_TOKEN_EXPIRE_MINUTES * 60,
117
+ expires=JWT_TOKEN_EXPIRE_MINUTES * 60,
118
+ )
119
+ except (ExpiredSignatureError, DecodeError):
120
+ pass
121
+
122
+ return response
File without changes
@@ -0,0 +1,184 @@
1
+ import os
2
+ import sys
3
+ from typing import List, Optional
4
+
5
+ from colorama import Fore, Style
6
+ from openai import OpenAI
7
+ from pydantic import model_validator
8
+ from pydantic_settings import BaseSettings
9
+ from tqdm import tqdm
10
+
11
+ from gpustack.client.generated_clientset import ClientSet
12
+ from gpustack.schemas.models import (
13
+ ModelCreate,
14
+ ModelInstance,
15
+ ModelInstanceStateEnum,
16
+ SourceEnum,
17
+ )
18
+ from gpustack.server.bus import Event
19
+ from openai.types.chat import (
20
+ ChatCompletionMessageParam,
21
+ ChatCompletionUserMessageParam,
22
+ ChatCompletionAssistantMessageParam,
23
+ )
24
+
25
+
26
+ class ChatConfig(BaseSettings):
27
+ debug: bool = False
28
+ model: str
29
+ prompt: Optional[str] = None
30
+ base_url: str = os.getenv("GPUSTACK_SERVER_URL", "http://127.0.0.1")
31
+ api_key: Optional[str] = os.getenv("GPUSTACK_API_KEY")
32
+
33
+ @model_validator(mode="after")
34
+ def check_api_key(self):
35
+ if self.base_url != "http://127.0.0.1" and not self.api_key:
36
+ raise ValueError(
37
+ "API key is required. Please set GPUSTACK_API_KEY env var."
38
+ )
39
+ elif self.base_url == "http://127.0.0.1" and not self.api_key:
40
+ self.api_key = "local"
41
+
42
+
43
+ def parse_arguments(args) -> ChatConfig:
44
+ return ChatConfig(debug=args.debug, model=args.model, prompt=args.prompt)
45
+
46
+
47
+ def print_completion_result(message):
48
+ # move cursor to the end of previous line
49
+ sys.stdout.write("\033[F\033[1000C")
50
+ print(message)
51
+
52
+
53
+ def print_error(message):
54
+ print(f"{Fore.RED}{message}{Style.RESET_ALL}")
55
+
56
+
57
+ class ChatManager:
58
+ def __init__(self, cfg: ChatConfig) -> None:
59
+ self._model_name = cfg.model
60
+ self._prompt = cfg.prompt
61
+ self._clientset = ClientSet(base_url=cfg.base_url, api_key=cfg.api_key)
62
+ self._openai_client = OpenAI(
63
+ base_url=f"{cfg.base_url}/v1-openai", api_key=cfg.api_key
64
+ )
65
+ self._history: List[ChatCompletionMessageParam] = []
66
+
67
+ def start(self):
68
+ self._ensure_model()
69
+
70
+ if self._prompt:
71
+ self.chat_completion(self._prompt)
72
+ return
73
+
74
+ user_input = None
75
+ while True:
76
+ user_input = input(">")
77
+ if user_input == "\\q" or user_input == "\\quit":
78
+ break
79
+ elif user_input == "\\?" or user_input == "\\h" or user_input == "\\help":
80
+ self._print_help()
81
+ continue
82
+ elif user_input == "\\c" or user_input == "\\clear":
83
+ self._clear_context()
84
+ continue
85
+ elif not user_input.strip():
86
+ continue
87
+
88
+ try:
89
+ self.chat_completion(user_input)
90
+ except Exception as e:
91
+ print_error(e)
92
+
93
+ @staticmethod
94
+ def _print_help():
95
+ print("Commands:")
96
+ print(" \\q or \\quit - Quit the chat")
97
+ print(" \\c or \\clear - Clear chat context in prompt")
98
+ print(" \\h or \\? or \\help - Print this help message")
99
+
100
+ def _clear_context(self):
101
+ self._history = []
102
+ print("Chat context cleared.")
103
+
104
+ def _ensure_model(self):
105
+ models = self._clientset.models.list()
106
+ for model in models.items:
107
+ if model.name == self._model_name:
108
+ self._model = model
109
+ break
110
+
111
+ if not hasattr(self, "_model"):
112
+ self._create_model()
113
+
114
+ self._wait_for_model_ready()
115
+
116
+ def _create_model(self):
117
+ model_create = ModelCreate(
118
+ name=self._model_name,
119
+ source=SourceEnum.OLLAMA_LIBRARY,
120
+ ollama_library_model_name=self._model_name,
121
+ )
122
+ created = self._clientset.models.create(model_create=model_create)
123
+ self._model = created
124
+
125
+ def _wait_for_model_ready(self):
126
+ def stop_when_running(event: Event) -> bool:
127
+ if event.data["id"] == self._model.id and event.data["state"] == "Running":
128
+ return True
129
+ elif event.data["state"] == ModelInstanceStateEnum.error:
130
+ raise Exception(f"Error running model: {event.data['state_message']}")
131
+ return False
132
+
133
+ with tqdm(
134
+ total=0,
135
+ desc=f"Preparing {self._model_name} model...",
136
+ bar_format="{desc}",
137
+ leave=False,
138
+ ) as pbar:
139
+ current_progress = 0
140
+
141
+ def print_progress(event: Event):
142
+ nonlocal current_progress
143
+ mi = ModelInstance.model_validate(event.data)
144
+ if mi.download_progress is not None:
145
+ increment = mi.download_progress - current_progress
146
+ if increment <= 0:
147
+ return
148
+
149
+ if pbar.total == 0:
150
+ pbar.total = 100
151
+ pbar.bar_format = "{l_bar}{bar}{r_bar}"
152
+ pbar.set_description(f"Downloading {self._model_name} model")
153
+ pbar.reset()
154
+
155
+ pbar.update(increment)
156
+ current_progress = mi.download_progress
157
+
158
+ self._clientset.model_instances.watch(
159
+ stop_condition=stop_when_running,
160
+ callback=print_progress,
161
+ params={"model_id": self._model.id},
162
+ )
163
+
164
+ def chat_completion(self, prompt: str):
165
+ self._history.append(
166
+ ChatCompletionUserMessageParam(role="user", content=prompt)
167
+ )
168
+
169
+ completion = self._openai_client.chat.completions.create(
170
+ model=self._model_name,
171
+ messages=self._history,
172
+ stream=True,
173
+ )
174
+
175
+ result = ""
176
+ for chunk in completion:
177
+ if chunk.choices[0].delta.content:
178
+ result += chunk.choices[0].delta.content
179
+ print(chunk.choices[0].delta.content, end="", flush=True)
180
+
181
+ self._history.append(
182
+ ChatCompletionAssistantMessageParam(role="assistant", content=result)
183
+ )
184
+ print()
@@ -0,0 +1,4 @@
1
+ from .generated_clientset import ClientSet
2
+
3
+
4
+ __all__ = ["ClientSet"]
@@ -0,0 +1,39 @@
1
+ import base64
2
+ from .generated_http_client import HTTPClient
3
+ from typing import Optional
4
+
5
+ from .generated_worker_client import WorkerClient
6
+ from .generated_model_client import ModelClient
7
+ from .generated_model_instance_client import ModelInstanceClient
8
+ from .generated_user_client import UserClient
9
+
10
+
11
+ class ClientSet:
12
+ def __init__(
13
+ self,
14
+ base_url: str,
15
+ api_key: Optional[str] = None,
16
+ username: Optional[str] = None,
17
+ password: Optional[str] = None,
18
+ headers: Optional[dict] = None,
19
+ ):
20
+ if headers is None:
21
+ headers = {}
22
+
23
+ if api_key:
24
+ headers["Authorization"] = f"Bearer {api_key}"
25
+ elif username and password:
26
+ base64_credentials = base64.b64encode(
27
+ f"{username}:{password}".encode()
28
+ ).decode()
29
+ headers["Authorization"] = f"Basic {base64_credentials}"
30
+
31
+ self.base_url = base_url
32
+ self.headers = headers
33
+
34
+ http_client = HTTPClient(base_url=base_url).with_headers(headers)
35
+
36
+ self.workers = WorkerClient(http_client)
37
+ self.models = ModelClient(http_client)
38
+ self.model_instances = ModelInstanceClient(http_client)
39
+ self.users = UserClient(http_client)