guidellm 0.4.0a21__py3-none-any.whl → 0.4.0a155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guidellm might be problematic. Click here for more details.

Files changed (116) hide show
  1. guidellm/__init__.py +5 -2
  2. guidellm/__main__.py +451 -252
  3. guidellm/backends/__init__.py +33 -0
  4. guidellm/backends/backend.py +110 -0
  5. guidellm/backends/openai.py +355 -0
  6. guidellm/backends/response_handlers.py +455 -0
  7. guidellm/benchmark/__init__.py +53 -39
  8. guidellm/benchmark/benchmarker.py +148 -317
  9. guidellm/benchmark/entrypoints.py +466 -128
  10. guidellm/benchmark/output.py +517 -771
  11. guidellm/benchmark/profile.py +580 -280
  12. guidellm/benchmark/progress.py +568 -549
  13. guidellm/benchmark/scenarios/__init__.py +40 -0
  14. guidellm/benchmark/scenarios/chat.json +6 -0
  15. guidellm/benchmark/scenarios/rag.json +6 -0
  16. guidellm/benchmark/schemas.py +2085 -0
  17. guidellm/data/__init__.py +28 -4
  18. guidellm/data/collators.py +16 -0
  19. guidellm/data/deserializers/__init__.py +53 -0
  20. guidellm/data/deserializers/deserializer.py +109 -0
  21. guidellm/data/deserializers/file.py +222 -0
  22. guidellm/data/deserializers/huggingface.py +94 -0
  23. guidellm/data/deserializers/memory.py +192 -0
  24. guidellm/data/deserializers/synthetic.py +346 -0
  25. guidellm/data/loaders.py +145 -0
  26. guidellm/data/preprocessors/__init__.py +25 -0
  27. guidellm/data/preprocessors/formatters.py +412 -0
  28. guidellm/data/preprocessors/mappers.py +198 -0
  29. guidellm/data/preprocessors/preprocessor.py +29 -0
  30. guidellm/data/processor.py +30 -0
  31. guidellm/data/schemas.py +13 -0
  32. guidellm/data/utils/__init__.py +10 -0
  33. guidellm/data/utils/dataset.py +94 -0
  34. guidellm/data/utils/functions.py +18 -0
  35. guidellm/extras/__init__.py +4 -0
  36. guidellm/extras/audio.py +215 -0
  37. guidellm/extras/vision.py +242 -0
  38. guidellm/logger.py +2 -2
  39. guidellm/mock_server/__init__.py +8 -0
  40. guidellm/mock_server/config.py +84 -0
  41. guidellm/mock_server/handlers/__init__.py +17 -0
  42. guidellm/mock_server/handlers/chat_completions.py +280 -0
  43. guidellm/mock_server/handlers/completions.py +280 -0
  44. guidellm/mock_server/handlers/tokenizer.py +142 -0
  45. guidellm/mock_server/models.py +510 -0
  46. guidellm/mock_server/server.py +168 -0
  47. guidellm/mock_server/utils.py +302 -0
  48. guidellm/preprocess/dataset.py +23 -26
  49. guidellm/presentation/builder.py +2 -2
  50. guidellm/presentation/data_models.py +25 -21
  51. guidellm/presentation/injector.py +2 -3
  52. guidellm/scheduler/__init__.py +65 -26
  53. guidellm/scheduler/constraints.py +1035 -0
  54. guidellm/scheduler/environments.py +252 -0
  55. guidellm/scheduler/scheduler.py +140 -368
  56. guidellm/scheduler/schemas.py +272 -0
  57. guidellm/scheduler/strategies.py +519 -0
  58. guidellm/scheduler/worker.py +391 -420
  59. guidellm/scheduler/worker_group.py +707 -0
  60. guidellm/schemas/__init__.py +31 -0
  61. guidellm/schemas/info.py +159 -0
  62. guidellm/schemas/request.py +216 -0
  63. guidellm/schemas/response.py +119 -0
  64. guidellm/schemas/stats.py +228 -0
  65. guidellm/{config.py → settings.py} +32 -21
  66. guidellm/utils/__init__.py +95 -8
  67. guidellm/utils/auto_importer.py +98 -0
  68. guidellm/utils/cli.py +46 -2
  69. guidellm/utils/console.py +183 -0
  70. guidellm/utils/encoding.py +778 -0
  71. guidellm/utils/functions.py +134 -0
  72. guidellm/utils/hf_datasets.py +1 -2
  73. guidellm/utils/hf_transformers.py +4 -4
  74. guidellm/utils/imports.py +9 -0
  75. guidellm/utils/messaging.py +1118 -0
  76. guidellm/utils/mixins.py +115 -0
  77. guidellm/utils/pydantic_utils.py +411 -0
  78. guidellm/utils/random.py +3 -4
  79. guidellm/utils/registry.py +220 -0
  80. guidellm/utils/singleton.py +133 -0
  81. guidellm/{objects → utils}/statistics.py +341 -247
  82. guidellm/utils/synchronous.py +159 -0
  83. guidellm/utils/text.py +163 -50
  84. guidellm/utils/typing.py +41 -0
  85. guidellm/version.py +1 -1
  86. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
  87. guidellm-0.4.0a155.dist-info/RECORD +96 -0
  88. guidellm/backend/__init__.py +0 -23
  89. guidellm/backend/backend.py +0 -259
  90. guidellm/backend/openai.py +0 -705
  91. guidellm/backend/response.py +0 -136
  92. guidellm/benchmark/aggregator.py +0 -760
  93. guidellm/benchmark/benchmark.py +0 -837
  94. guidellm/benchmark/scenario.py +0 -104
  95. guidellm/data/prideandprejudice.txt.gz +0 -0
  96. guidellm/dataset/__init__.py +0 -22
  97. guidellm/dataset/creator.py +0 -213
  98. guidellm/dataset/entrypoints.py +0 -42
  99. guidellm/dataset/file.py +0 -92
  100. guidellm/dataset/hf_datasets.py +0 -62
  101. guidellm/dataset/in_memory.py +0 -132
  102. guidellm/dataset/synthetic.py +0 -287
  103. guidellm/objects/__init__.py +0 -18
  104. guidellm/objects/pydantic.py +0 -89
  105. guidellm/request/__init__.py +0 -18
  106. guidellm/request/loader.py +0 -284
  107. guidellm/request/request.py +0 -79
  108. guidellm/request/types.py +0 -10
  109. guidellm/scheduler/queues.py +0 -25
  110. guidellm/scheduler/result.py +0 -155
  111. guidellm/scheduler/strategy.py +0 -495
  112. guidellm-0.4.0a21.dist-info/RECORD +0 -62
  113. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
  114. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
  115. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
  116. {guidellm-0.4.0a21.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,96 @@
1
+ guidellm/__init__.py,sha256=1zl-PT9IZJvDfdLSMviPLzhVE3_ZXpizmc9s7UWa6kQ,1206
2
+ guidellm/__main__.py,sha256=bzKBbZP4qXx9u5QhYZTp6tWqqf5NpcGfTxAUV3giKbA,20512
3
+ guidellm/logger.py,sha256=6qGOeff8hOJF6p57Zietq6qr64N7E40CJSQSQcUFgKc,2912
4
+ guidellm/settings.py,sha256=C4miDtWaI5lJ4NBXxfuUitt5-6_FCzZPzM1Bjie9XoA,7283
5
+ guidellm/version.py,sha256=NIzyWA7lNdSpf2MtPJuOjvW5h6E9nGDea2G4nGFDbgY,127
6
+ guidellm/backends/__init__.py,sha256=Ou-SHPHLoHYfRFkaf_LocNxUUHI_DzmXXRGITyCasac,1101
7
+ guidellm/backends/backend.py,sha256=Qz7z3s6rWwdYGVC-CbPvSFKWSsPiJsSFKBFyfvBG6rY,3385
8
+ guidellm/backends/openai.py,sha256=0At-DMt3_kD0Qa788j_HJ1BcFiUh-SFReoYyaTy8k1g,13007
9
+ guidellm/backends/response_handlers.py,sha256=jwoAGzy-BXxYzMroq08dEUA5ooWoscRpGdU5I4cCEUw,17136
10
+ guidellm/benchmark/__init__.py,sha256=E9g3x0Peopsapw6Pkk9AAthUv3tIOaH59Ai6_92xnzM,2337
11
+ guidellm/benchmark/benchmarker.py,sha256=I3pgAUQSnp1TFrDDGqZ5El-ZQRYyLQer4cfEyI16NVQ,6444
12
+ guidellm/benchmark/entrypoints.py,sha256=hbuluEL4Hlj5IOgaagANyzWauMp8kxIv83UytXQYLDI,17938
13
+ guidellm/benchmark/output.py,sha256=dLlZFCOaAvNZmdsMGzXFdPFKIvM_H63uurxWh4dj69U,27243
14
+ guidellm/benchmark/profile.py,sha256=RvQdmVLNLU-V8U8xIXm3vH8tY7Hp_4jNPQe5dombj8g,24007
15
+ guidellm/benchmark/progress.py,sha256=oZqZZ_vInmifBNd490ZTgcCjaGy2_slViEABSWDJgHI,25976
16
+ guidellm/benchmark/schemas.py,sha256=pULZ0F0HbubtRDQBrU9XhJqY0me2GK8IdAXjcqLaRk0,81246
17
+ guidellm/benchmark/scenarios/__init__.py,sha256=SmaYf8hfByJU4LVJ7pZKNxJPYBObl7UKpoaJEmLPdTI,1276
18
+ guidellm/benchmark/scenarios/chat.json,sha256=4H_ByPCv_9azHn6iTxCY3FfpoUtlbShDPdNyzDwHJVQ,226
19
+ guidellm/benchmark/scenarios/rag.json,sha256=BIpifJoAtWgB3NRRYK51ZuCH4Zvh1OeBFanB7vcxS-E,231
20
+ guidellm/data/__init__.py,sha256=0-7B9vBgY6QHT4svxnBhGN4DoA4jE_9HZxOgndyOiUM,743
21
+ guidellm/data/collators.py,sha256=j4OLGqwbt4sNnTqE8iSbe11qmgJBHnyWjipNeEz1SCk,445
22
+ guidellm/data/loaders.py,sha256=OcZh2-Mlt4ZQ6xAI5tpRp3THiYMBGT0DZEQrYupjBdA,5105
23
+ guidellm/data/processor.py,sha256=8IzCiAr3n-6RAFZrr5dMfsh-7hpK2xF9s_xSQVRK0Mo,807
24
+ guidellm/data/schemas.py,sha256=6KUv2OPW_f369SzEhyPBXPDMGoQtJzEPJalBCpp3f84,280
25
+ guidellm/data/deserializers/__init__.py,sha256=NhfbgebOun2FgWTNPydto2f3LNSTlOqdaxeFpDvQUhY,1608
26
+ guidellm/data/deserializers/deserializer.py,sha256=tNmMFUbCsIFnaWJ6vEXrie0jAUKlVA4Gyl7hhnetiEk,3805
27
+ guidellm/data/deserializers/file.py,sha256=6XVc8MrHnHAc06hVXb_X0TH2DAL9S-oIXcK2HPMj9SI,7501
28
+ guidellm/data/deserializers/huggingface.py,sha256=p4eN_jjuM_ChtyLcQy4QrTEW-tpsf3mIfYm3GGK-1JQ,2908
29
+ guidellm/data/deserializers/memory.py,sha256=4u--QmbxyKgU2asNFo-a7DjKLSlH0ggSb6T8axVsevM,6621
30
+ guidellm/data/deserializers/synthetic.py,sha256=tFs3Z6_ZwY-UdScJWXbswAZ31fVW8p9ISuOQM1qCvZM,12069
31
+ guidellm/data/preprocessors/__init__.py,sha256=khp1-m5EqJ6I40qFAYVv71LncrEXzKBmRocxQG5-ZuE,757
32
+ guidellm/data/preprocessors/formatters.py,sha256=DV_-29rFuSqV7yyQETJ92FBPG9yrkOacEE1nhxXRVyc,14764
33
+ guidellm/data/preprocessors/mappers.py,sha256=nn2zXkabgv0NVT4Iods-cJ3UGsdCpCsqDi81b7K9M_k,6769
34
+ guidellm/data/preprocessors/preprocessor.py,sha256=7_9qezg820_JqEPizoIlGtMxZgEox17V2MxUHfkz5J8,747
35
+ guidellm/data/utils/__init__.py,sha256=lsVIrDXiZgLXdGDeNqm4y5Ilai6jiMOpxVhyFap5ocA,186
36
+ guidellm/data/utils/dataset.py,sha256=8VO7n_6F4ARSXitvzOCngtO-WokNfFb25lA-mijS7UE,2325
37
+ guidellm/data/utils/functions.py,sha256=cuNCTzhiqFUCBpvwwVMgBqQORGz4q1XS3FUfXdxw-gQ,390
38
+ guidellm/extras/__init__.py,sha256=bNtt6CNDhwMM5XlL1q74j_df-1xoXavTShB05LjDYMw,96
39
+ guidellm/extras/audio.py,sha256=ECDK5IFFBhfae1UQrOGGQCE_7wSCuTySo-TThpm4WfU,6421
40
+ guidellm/extras/vision.py,sha256=hU8e7ryUnMZOT6_utR9GKhayvCXYPljeSwCx8S4-nIQ,7691
41
+ guidellm/mock_server/__init__.py,sha256=oRvGpE8a2U9CUdGnza4GDbShT96NfjOW-cAoh0xDR84,183
42
+ guidellm/mock_server/config.py,sha256=t67sJjFV1aO0YMVrRJEm5ysFe1SzYCm-XVSlgmALPdA,2988
43
+ guidellm/mock_server/models.py,sha256=7CPhbQNSLhRJ4XlAXRIHn0_9yQ8IrGWgoImK-8oUHnk,18946
44
+ guidellm/mock_server/server.py,sha256=Nsc6eADcPRMlMmVZhVA1XML9AYfhR75T90YAH50va1Q,6178
45
+ guidellm/mock_server/utils.py,sha256=NYhLj2dJ4EfC2UQIhgzhr_LInAUi_lYqth7A5QK2Djw,10336
46
+ guidellm/mock_server/handlers/__init__.py,sha256=GX2KD41Uc3H-b338mI4Cf1tK-TZcpTKqKdZH9CPgy8Y,698
47
+ guidellm/mock_server/handlers/chat_completions.py,sha256=xrC0OnPDa5V393u2QmUCIWXVgzqc72YiiyxSMwyZv18,10386
48
+ guidellm/mock_server/handlers/completions.py,sha256=BGTI9tJ1PrldT-Nzz_e7KjEtPFpc0NmLvr3nF-tEYKk,10024
49
+ guidellm/mock_server/handlers/tokenizer.py,sha256=OJAILmsk1tvYfHmdP6iuTf8Fg2gDm2_JyPZH-U-pxFE,5243
50
+ guidellm/preprocess/__init__.py,sha256=6mRs1atYwYkdX4txez_bEVk-_nCDsNt5Wo20eWZ24jA,112
51
+ guidellm/preprocess/dataset.py,sha256=msPfS_--kgEtREgoAVvmyxvUN4aonu6hN9YbS41FJuI,12082
52
+ guidellm/presentation/__init__.py,sha256=tkkHf8ZB_rH-rqnAGY21NuOw2GQq3WCGix_whMqeUSs,483
53
+ guidellm/presentation/builder.py,sha256=eSKsUUx7RbmsqgMRKxcaMuYqquanzS5moc8Uv9TI3Z8,905
54
+ guidellm/presentation/data_models.py,sha256=1CjeHaevj2r4oHLcBQeDY0BQ9VXTol46KULr3F-ps48,7424
55
+ guidellm/presentation/injector.py,sha256=mDo0hvrh4NE2c4RZK5GoegtzWVcvw_zEpvm5sRy7xGE,1834
56
+ guidellm/scheduler/__init__.py,sha256=dj-RZDd5B6H6mt7dPAYGDayqa2k6ngqwfKgRUeCkwDw,2511
57
+ guidellm/scheduler/constraints.py,sha256=a_cDJtmREU1rpbJ1UYxsdH9xW5fnOYd_uLj3VbfStb0,39590
58
+ guidellm/scheduler/environments.py,sha256=W_kp2dICO7Z-NWoji9MwgDu0HjPDhUJNGXyll_LL45Y,8829
59
+ guidellm/scheduler/scheduler.py,sha256=7y3PMAmkNwEYQt-I5ZPkilZZIaw1ad3Hs7z6ZgtQVBw,6986
60
+ guidellm/scheduler/schemas.py,sha256=iVxT0GaDK8q8ruuCL5D4046WrCf7X4v2KeZWfP8gdng,9461
61
+ guidellm/scheduler/strategies.py,sha256=LfYogQCubT3eCKKEUQBWcsBh19kg8J5_9NZz77YAlGc,17796
62
+ guidellm/scheduler/worker.py,sha256=4-RBw1znsR_Fv4O4agT1XEHETp7Fz9TOaOMBRIBm7-U,17157
63
+ guidellm/scheduler/worker_group.py,sha256=czOxgbWJ0BKBqa0HOdqt09-n-xR4VUkiatJHsOCbJio,28597
64
+ guidellm/schemas/__init__.py,sha256=4odN5dEqgRQaxsPpYLnyls0JAlDoEhbWnbYTnPZspN8,879
65
+ guidellm/schemas/info.py,sha256=A_LVqpQteCQvf6XXPi02m1pYX2-vOtCEswLLTZfa9_Y,5678
66
+ guidellm/schemas/request.py,sha256=K25Ph56nyhFMA9pzzX52uo4dG0K_Agb7paUvKsafAaI,7567
67
+ guidellm/schemas/response.py,sha256=ROesx1rDI7g1jRoGbZjmGROazBxcT-3NVwQTIhwI2O4,4578
68
+ guidellm/schemas/stats.py,sha256=4FPdMtoAVv-vQMLTaWYgIcqof7z6_nLHxYGV_lD1L9g,7507
69
+ guidellm/utils/__init__.py,sha256=XGBV3fdETLihLn97_Sd0KM1B4hneoe3d1Oh0nMKObv8,3040
70
+ guidellm/utils/auto_importer.py,sha256=rkraMx815TasixoFn0bwtp--7V7TxuEvfZUVFB8V5L0,3658
71
+ guidellm/utils/cli.py,sha256=kw7A0HSTZaZDdAElHczo1WLCcL9DVlt13HG3a9mu_00,3545
72
+ guidellm/utils/colors.py,sha256=D0IGz8A346-Pt5qgnP3S5uV-VgngJoXbfToVCOna41k,175
73
+ guidellm/utils/console.py,sha256=IC9vZ0PpwW9SxReZA3BGXyNRK20tdV0FDNuUmbMwUlE,4382
74
+ guidellm/utils/default_group.py,sha256=iZ47bwRcUCxkX04Zdg0qpmqKtFg4P7lt5_hpw1CnKkA,4167
75
+ guidellm/utils/dict.py,sha256=oogh34_NznFEn1L6NKY2RDVBm7TUK9LOZfMc-rquNw8,673
76
+ guidellm/utils/encoding.py,sha256=hf__oDU-iQYKg_FDqYptyV4BxVC6RseIn3OBgaZXBzE,27821
77
+ guidellm/utils/functions.py,sha256=Q4fLePI12yboNb6mx9p5oYC92PPGUZ2o1lBQT690-ro,4316
78
+ guidellm/utils/hf_datasets.py,sha256=M4uESvC08SzOekQUzUFRcnS49qBYJCAubElqURucU8w,992
79
+ guidellm/utils/hf_transformers.py,sha256=EXOctGUB-ZkwS1yrhIpSOK9IGJTaxXXZO0kUR1qXhp4,992
80
+ guidellm/utils/imports.py,sha256=Ch7TCnsvvLW-2ExAERxj2DUG1Dthl4KefYj1bYSAlTs,179
81
+ guidellm/utils/messaging.py,sha256=OLGC6aunhulC0-aKj6aI5VKlRoXQETulcO-XGqjlQg8,45566
82
+ guidellm/utils/mixins.py,sha256=i48rD2FVm-2qXKq7ENCTnvYZgr0IXOrVSJpkh-8-a_Q,4202
83
+ guidellm/utils/pydantic_utils.py,sha256=sP9mYVZyQpj4OoCGdJTwyKY_vasDYsKSesafZ5RJNBw,14742
84
+ guidellm/utils/random.py,sha256=rDy1lpJ9vYMM59DYgKeHT8IG_I7fnjoHjNfD8QIF03k,1273
85
+ guidellm/utils/registry.py,sha256=1yS3_4s4Zkvq0L2bnouTIwxhUT2nZwJ021LcVuRooaE,7644
86
+ guidellm/utils/singleton.py,sha256=yjpUPAtRRvOh63Ubg9ivjQjLdwsExOQM6U1nSQvZuTc,5026
87
+ guidellm/utils/statistics.py,sha256=KzUYm4fVNVtDd6FRCRBnqYmFcea-9n0JKCAZyqeZLM8,40006
88
+ guidellm/utils/synchronous.py,sha256=rRkWwbDf1ty607KUhDKsqV4HcdKU5o0-1s5hwdG-Hak,5209
89
+ guidellm/utils/text.py,sha256=0K8yUEB4gzztevxzuiMXossSoHhvzcHoKqRhQYQdOrg,11644
90
+ guidellm/utils/typing.py,sha256=jt0o7SRbDhnvrifR3l4hN8oL3uJNxl8aMnvaoABb-MU,1235
91
+ guidellm-0.4.0a155.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
92
+ guidellm-0.4.0a155.dist-info/METADATA,sha256=AEyyL1EHQ5t9SiABaf8IN5VxX3nB710-2pguCKQXw4o,21923
93
+ guidellm-0.4.0a155.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
94
+ guidellm-0.4.0a155.dist-info/entry_points.txt,sha256=DzLFEg47fF7qY1b-9laPz9jg0KSKJ1_D9TbF93kLz_E,51
95
+ guidellm-0.4.0a155.dist-info/top_level.txt,sha256=EXRGjnvFtL6MeZTe0tnHRMYcEWUW3vEqoG2zO7vFOtk,9
96
+ guidellm-0.4.0a155.dist-info/RECORD,,
@@ -1,23 +0,0 @@
1
- from .backend import (
2
- Backend,
3
- BackendType,
4
- )
5
- from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
6
- from .response import (
7
- RequestArgs,
8
- ResponseSummary,
9
- StreamingResponseType,
10
- StreamingTextResponse,
11
- )
12
-
13
- __all__ = [
14
- "CHAT_COMPLETIONS_PATH",
15
- "TEXT_COMPLETIONS_PATH",
16
- "Backend",
17
- "BackendType",
18
- "OpenAIHTTPBackend",
19
- "RequestArgs",
20
- "ResponseSummary",
21
- "StreamingResponseType",
22
- "StreamingTextResponse",
23
- ]
@@ -1,259 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from collections.abc import AsyncGenerator
3
- from pathlib import Path
4
- from typing import Any, Literal, Optional, Union
5
-
6
- from loguru import logger
7
- from PIL import Image
8
-
9
- from guidellm.backend.response import ResponseSummary, StreamingTextResponse
10
- from guidellm.config import settings
11
-
12
- __all__ = [
13
- "Backend",
14
- "BackendType",
15
- ]
16
-
17
-
18
- BackendType = Literal["openai_http"]
19
-
20
-
21
- class Backend(ABC):
22
- """
23
- Abstract base class for generative AI backends.
24
-
25
- This class provides a common interface for creating and interacting with different
26
- generative AI backends. Subclasses should implement the abstract methods to
27
- define specific backend behavior.
28
-
29
- :cvar _registry: A registration dictionary that maps BackendType to backend classes.
30
- :param type_: The type of the backend.
31
- """
32
-
33
- _registry: dict[BackendType, "type[Backend]"] = {}
34
-
35
- @classmethod
36
- def register(cls, backend_type: BackendType):
37
- """
38
- A decorator to register a backend class in the backend registry.
39
-
40
- :param backend_type: The type of backend to register.
41
- :type backend_type: BackendType
42
- :return: The decorated backend class.
43
- :rtype: Type[Backend]
44
- """
45
- if backend_type in cls._registry:
46
- raise ValueError(f"Backend type already registered: {backend_type}")
47
-
48
- if not issubclass(cls, Backend):
49
- raise TypeError("Only subclasses of Backend can be registered")
50
-
51
- def inner_wrapper(wrapped_class: type["Backend"]):
52
- cls._registry[backend_type] = wrapped_class
53
- logger.info("Registered backend type: {}", backend_type)
54
- return wrapped_class
55
-
56
- return inner_wrapper
57
-
58
- @classmethod
59
- def create(cls, type_: BackendType, **kwargs) -> "Backend":
60
- """
61
- Factory method to create a backend instance based on the backend type.
62
-
63
- :param type_: The type of backend to create.
64
- :type type_: BackendType
65
- :param kwargs: Additional arguments for backend initialization.
66
- :return: An instance of a subclass of Backend.
67
- :rtype: Backend
68
- :raises ValueError: If the backend type is not registered.
69
- """
70
-
71
- logger.info("Creating backend of type {}", type_)
72
-
73
- if type_ not in cls._registry:
74
- err = ValueError(f"Unsupported backend type: {type_}")
75
- logger.error("{}", err)
76
- raise err
77
-
78
- return Backend._registry[type_](**kwargs)
79
-
80
- def __init__(self, type_: BackendType):
81
- self._type = type_
82
-
83
- @property
84
- def type_(self) -> BackendType:
85
- """
86
- :return: The type of the backend.
87
- """
88
- return self._type
89
-
90
- @property
91
- @abstractmethod
92
- def target(self) -> str:
93
- """
94
- :return: The target location for the backend.
95
- """
96
- ...
97
-
98
- @property
99
- @abstractmethod
100
- def model(self) -> Optional[str]:
101
- """
102
- :return: The model used for the backend requests.
103
- """
104
- ...
105
-
106
- @property
107
- @abstractmethod
108
- def info(self) -> dict[str, Any]:
109
- """
110
- :return: The information about the backend.
111
- """
112
- ...
113
-
114
- @abstractmethod
115
- async def reset(self) -> None:
116
- """
117
- Reset the connection object. This is useful for backends that
118
- reuse connections or have state that needs to be cleared.
119
- """
120
- ...
121
-
122
- async def validate(self):
123
- """
124
- Handle final setup and validate the backend is ready for use.
125
- If not successful, raises the appropriate exception.
126
- """
127
- logger.info("{} validating backend {}", self.__class__.__name__, self.type_)
128
- await self.check_setup()
129
- models = await self.available_models()
130
- if not models:
131
- raise ValueError("No models available for the backend")
132
-
133
- # Use the preferred route defined in the global settings when performing the
134
- # validation request. This avoids calling an unavailable endpoint (ie
135
- # /v1/completions) when the deployment only supports the chat completions
136
- # endpoint.
137
- if settings.preferred_route == "chat_completions":
138
- async for _ in self.chat_completions( # type: ignore[attr-defined]
139
- content="Test connection", output_token_count=1
140
- ):
141
- pass
142
- else:
143
- async for _ in self.text_completions( # type: ignore[attr-defined]
144
- prompt="Test connection", output_token_count=1
145
- ):
146
- pass
147
-
148
- await self.reset()
149
-
150
- @abstractmethod
151
- async def check_setup(self):
152
- """
153
- Check the setup for the backend.
154
- If unsuccessful, raises the appropriate exception.
155
-
156
- :raises ValueError: If the setup check fails.
157
- """
158
- ...
159
-
160
- @abstractmethod
161
- async def prepare_multiprocessing(self):
162
- """
163
- Prepare the backend for use in a multiprocessing environment.
164
- This is useful for backends that have instance state that can not
165
- be shared across processes and should be cleared out and re-initialized
166
- for each new process.
167
- """
168
- ...
169
-
170
- @abstractmethod
171
- async def available_models(self) -> list[str]:
172
- """
173
- Get the list of available models for the backend.
174
-
175
- :return: The list of available models.
176
- :rtype: List[str]
177
- """
178
- ...
179
-
180
- @abstractmethod
181
- async def text_completions(
182
- self,
183
- prompt: Union[str, list[str]],
184
- request_id: Optional[str] = None,
185
- prompt_token_count: Optional[int] = None,
186
- output_token_count: Optional[int] = None,
187
- **kwargs,
188
- ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
189
- """
190
- Generate text only completions for the given prompt.
191
- Does not support multiple modalities, complicated chat interfaces,
192
- or chat templates. Specifically, it requests with only the prompt.
193
-
194
- :param prompt: The prompt (or list of prompts) to generate a completion for.
195
- If a list is supplied, these are concatenated and run through the model
196
- for a single prompt.
197
- :param request_id: The unique identifier for the request, if any.
198
- Added to logging statements and the response for tracking purposes.
199
- :param prompt_token_count: The number of tokens measured in the prompt, if any.
200
- Returned in the response stats for later analysis, if applicable.
201
- :param output_token_count: If supplied, the number of tokens to enforce
202
- generation of for the output for this request.
203
- :param kwargs: Additional keyword arguments to pass with the request.
204
- :return: An async generator that yields a StreamingTextResponse for start,
205
- a StreamingTextResponse for each received iteration,
206
- and a ResponseSummary for the final response.
207
- """
208
- ...
209
-
210
- @abstractmethod
211
- async def chat_completions(
212
- self,
213
- content: Union[
214
- str,
215
- list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]],
216
- Any,
217
- ],
218
- request_id: Optional[str] = None,
219
- prompt_token_count: Optional[int] = None,
220
- output_token_count: Optional[int] = None,
221
- raw_content: bool = False,
222
- **kwargs,
223
- ) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
224
- """
225
- Generate chat completions for the given content.
226
- Supports multiple modalities, complicated chat interfaces, and chat templates.
227
- Specifically, it requests with the content, which can be any combination of
228
- text, images, and audio provided the target model supports it,
229
- and returns the output text. Additionally, any chat templates
230
- for the model are applied within the backend.
231
-
232
- :param content: The content (or list of content) to generate a completion for.
233
- This supports any combination of text, images, and audio (model dependent).
234
- Supported text only request examples:
235
- content="Sample prompt", content=["Sample prompt", "Second prompt"],
236
- content=[{"type": "text", "value": "Sample prompt"}.
237
- Supported text and image request examples:
238
- content=["Describe the image", PIL.Image.open("image.jpg")],
239
- content=["Describe the image", Path("image.jpg")],
240
- content=["Describe the image", {"type": "image_url",
241
- "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}].
242
- Supported text and audio request examples:
243
- content=["Transcribe the audio", Path("audio.wav")],
244
- content=["Transcribe the audio", {"type": "input_audio",
245
- "input_audio": {"data": f"{base64_bytes}", "format": "wav}].
246
- Additionally, if raw_content=True then the content is passed directly to the
247
- backend without any processing.
248
- :param request_id: The unique identifier for the request, if any.
249
- Added to logging statements and the response for tracking purposes.
250
- :param prompt_token_count: The number of tokens measured in the prompt, if any.
251
- Returned in the response stats for later analysis, if applicable.
252
- :param output_token_count: If supplied, the number of tokens to enforce
253
- generation of for the output for this request.
254
- :param kwargs: Additional keyword arguments to pass with the request.
255
- :return: An async generator that yields a StreamingTextResponse for start,
256
- a StreamingTextResponse for each received iteration,
257
- and a ResponseSummary for the final response.
258
- """
259
- ...