guidellm 0.4.0a18__py3-none-any.whl → 0.4.0a155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guidellm might be problematic. Click here for more details.
- guidellm/__init__.py +5 -2
- guidellm/__main__.py +451 -252
- guidellm/backends/__init__.py +33 -0
- guidellm/backends/backend.py +110 -0
- guidellm/backends/openai.py +355 -0
- guidellm/backends/response_handlers.py +455 -0
- guidellm/benchmark/__init__.py +53 -39
- guidellm/benchmark/benchmarker.py +148 -317
- guidellm/benchmark/entrypoints.py +466 -128
- guidellm/benchmark/output.py +517 -771
- guidellm/benchmark/profile.py +580 -280
- guidellm/benchmark/progress.py +568 -549
- guidellm/benchmark/scenarios/__init__.py +40 -0
- guidellm/benchmark/scenarios/chat.json +6 -0
- guidellm/benchmark/scenarios/rag.json +6 -0
- guidellm/benchmark/schemas.py +2085 -0
- guidellm/data/__init__.py +28 -4
- guidellm/data/collators.py +16 -0
- guidellm/data/deserializers/__init__.py +53 -0
- guidellm/data/deserializers/deserializer.py +109 -0
- guidellm/data/deserializers/file.py +222 -0
- guidellm/data/deserializers/huggingface.py +94 -0
- guidellm/data/deserializers/memory.py +192 -0
- guidellm/data/deserializers/synthetic.py +346 -0
- guidellm/data/loaders.py +145 -0
- guidellm/data/preprocessors/__init__.py +25 -0
- guidellm/data/preprocessors/formatters.py +412 -0
- guidellm/data/preprocessors/mappers.py +198 -0
- guidellm/data/preprocessors/preprocessor.py +29 -0
- guidellm/data/processor.py +30 -0
- guidellm/data/schemas.py +13 -0
- guidellm/data/utils/__init__.py +10 -0
- guidellm/data/utils/dataset.py +94 -0
- guidellm/data/utils/functions.py +18 -0
- guidellm/extras/__init__.py +4 -0
- guidellm/extras/audio.py +215 -0
- guidellm/extras/vision.py +242 -0
- guidellm/logger.py +2 -2
- guidellm/mock_server/__init__.py +8 -0
- guidellm/mock_server/config.py +84 -0
- guidellm/mock_server/handlers/__init__.py +17 -0
- guidellm/mock_server/handlers/chat_completions.py +280 -0
- guidellm/mock_server/handlers/completions.py +280 -0
- guidellm/mock_server/handlers/tokenizer.py +142 -0
- guidellm/mock_server/models.py +510 -0
- guidellm/mock_server/server.py +168 -0
- guidellm/mock_server/utils.py +302 -0
- guidellm/preprocess/dataset.py +23 -26
- guidellm/presentation/builder.py +2 -2
- guidellm/presentation/data_models.py +25 -21
- guidellm/presentation/injector.py +2 -3
- guidellm/scheduler/__init__.py +65 -26
- guidellm/scheduler/constraints.py +1035 -0
- guidellm/scheduler/environments.py +252 -0
- guidellm/scheduler/scheduler.py +140 -368
- guidellm/scheduler/schemas.py +272 -0
- guidellm/scheduler/strategies.py +519 -0
- guidellm/scheduler/worker.py +391 -420
- guidellm/scheduler/worker_group.py +707 -0
- guidellm/schemas/__init__.py +31 -0
- guidellm/schemas/info.py +159 -0
- guidellm/schemas/request.py +216 -0
- guidellm/schemas/response.py +119 -0
- guidellm/schemas/stats.py +228 -0
- guidellm/{config.py → settings.py} +32 -21
- guidellm/utils/__init__.py +95 -8
- guidellm/utils/auto_importer.py +98 -0
- guidellm/utils/cli.py +46 -2
- guidellm/utils/console.py +183 -0
- guidellm/utils/encoding.py +778 -0
- guidellm/utils/functions.py +134 -0
- guidellm/utils/hf_datasets.py +1 -2
- guidellm/utils/hf_transformers.py +4 -4
- guidellm/utils/imports.py +9 -0
- guidellm/utils/messaging.py +1118 -0
- guidellm/utils/mixins.py +115 -0
- guidellm/utils/pydantic_utils.py +411 -0
- guidellm/utils/random.py +3 -4
- guidellm/utils/registry.py +220 -0
- guidellm/utils/singleton.py +133 -0
- guidellm/{objects → utils}/statistics.py +341 -247
- guidellm/utils/synchronous.py +159 -0
- guidellm/utils/text.py +163 -50
- guidellm/utils/typing.py +41 -0
- guidellm/version.py +1 -1
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/METADATA +33 -10
- guidellm-0.4.0a155.dist-info/RECORD +96 -0
- guidellm/backend/__init__.py +0 -23
- guidellm/backend/backend.py +0 -259
- guidellm/backend/openai.py +0 -705
- guidellm/backend/response.py +0 -136
- guidellm/benchmark/aggregator.py +0 -760
- guidellm/benchmark/benchmark.py +0 -837
- guidellm/benchmark/scenario.py +0 -104
- guidellm/data/prideandprejudice.txt.gz +0 -0
- guidellm/dataset/__init__.py +0 -22
- guidellm/dataset/creator.py +0 -213
- guidellm/dataset/entrypoints.py +0 -42
- guidellm/dataset/file.py +0 -92
- guidellm/dataset/hf_datasets.py +0 -62
- guidellm/dataset/in_memory.py +0 -132
- guidellm/dataset/synthetic.py +0 -287
- guidellm/objects/__init__.py +0 -18
- guidellm/objects/pydantic.py +0 -89
- guidellm/request/__init__.py +0 -18
- guidellm/request/loader.py +0 -284
- guidellm/request/request.py +0 -79
- guidellm/request/types.py +0 -10
- guidellm/scheduler/queues.py +0 -25
- guidellm/scheduler/result.py +0 -155
- guidellm/scheduler/strategy.py +0 -495
- guidellm-0.4.0a18.dist-info/RECORD +0 -62
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/WHEEL +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/entry_points.txt +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/licenses/LICENSE +0 -0
- {guidellm-0.4.0a18.dist-info → guidellm-0.4.0a155.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
guidellm/__init__.py,sha256=1zl-PT9IZJvDfdLSMviPLzhVE3_ZXpizmc9s7UWa6kQ,1206
|
|
2
|
+
guidellm/__main__.py,sha256=bzKBbZP4qXx9u5QhYZTp6tWqqf5NpcGfTxAUV3giKbA,20512
|
|
3
|
+
guidellm/logger.py,sha256=6qGOeff8hOJF6p57Zietq6qr64N7E40CJSQSQcUFgKc,2912
|
|
4
|
+
guidellm/settings.py,sha256=C4miDtWaI5lJ4NBXxfuUitt5-6_FCzZPzM1Bjie9XoA,7283
|
|
5
|
+
guidellm/version.py,sha256=NIzyWA7lNdSpf2MtPJuOjvW5h6E9nGDea2G4nGFDbgY,127
|
|
6
|
+
guidellm/backends/__init__.py,sha256=Ou-SHPHLoHYfRFkaf_LocNxUUHI_DzmXXRGITyCasac,1101
|
|
7
|
+
guidellm/backends/backend.py,sha256=Qz7z3s6rWwdYGVC-CbPvSFKWSsPiJsSFKBFyfvBG6rY,3385
|
|
8
|
+
guidellm/backends/openai.py,sha256=0At-DMt3_kD0Qa788j_HJ1BcFiUh-SFReoYyaTy8k1g,13007
|
|
9
|
+
guidellm/backends/response_handlers.py,sha256=jwoAGzy-BXxYzMroq08dEUA5ooWoscRpGdU5I4cCEUw,17136
|
|
10
|
+
guidellm/benchmark/__init__.py,sha256=E9g3x0Peopsapw6Pkk9AAthUv3tIOaH59Ai6_92xnzM,2337
|
|
11
|
+
guidellm/benchmark/benchmarker.py,sha256=I3pgAUQSnp1TFrDDGqZ5El-ZQRYyLQer4cfEyI16NVQ,6444
|
|
12
|
+
guidellm/benchmark/entrypoints.py,sha256=hbuluEL4Hlj5IOgaagANyzWauMp8kxIv83UytXQYLDI,17938
|
|
13
|
+
guidellm/benchmark/output.py,sha256=dLlZFCOaAvNZmdsMGzXFdPFKIvM_H63uurxWh4dj69U,27243
|
|
14
|
+
guidellm/benchmark/profile.py,sha256=RvQdmVLNLU-V8U8xIXm3vH8tY7Hp_4jNPQe5dombj8g,24007
|
|
15
|
+
guidellm/benchmark/progress.py,sha256=oZqZZ_vInmifBNd490ZTgcCjaGy2_slViEABSWDJgHI,25976
|
|
16
|
+
guidellm/benchmark/schemas.py,sha256=pULZ0F0HbubtRDQBrU9XhJqY0me2GK8IdAXjcqLaRk0,81246
|
|
17
|
+
guidellm/benchmark/scenarios/__init__.py,sha256=SmaYf8hfByJU4LVJ7pZKNxJPYBObl7UKpoaJEmLPdTI,1276
|
|
18
|
+
guidellm/benchmark/scenarios/chat.json,sha256=4H_ByPCv_9azHn6iTxCY3FfpoUtlbShDPdNyzDwHJVQ,226
|
|
19
|
+
guidellm/benchmark/scenarios/rag.json,sha256=BIpifJoAtWgB3NRRYK51ZuCH4Zvh1OeBFanB7vcxS-E,231
|
|
20
|
+
guidellm/data/__init__.py,sha256=0-7B9vBgY6QHT4svxnBhGN4DoA4jE_9HZxOgndyOiUM,743
|
|
21
|
+
guidellm/data/collators.py,sha256=j4OLGqwbt4sNnTqE8iSbe11qmgJBHnyWjipNeEz1SCk,445
|
|
22
|
+
guidellm/data/loaders.py,sha256=OcZh2-Mlt4ZQ6xAI5tpRp3THiYMBGT0DZEQrYupjBdA,5105
|
|
23
|
+
guidellm/data/processor.py,sha256=8IzCiAr3n-6RAFZrr5dMfsh-7hpK2xF9s_xSQVRK0Mo,807
|
|
24
|
+
guidellm/data/schemas.py,sha256=6KUv2OPW_f369SzEhyPBXPDMGoQtJzEPJalBCpp3f84,280
|
|
25
|
+
guidellm/data/deserializers/__init__.py,sha256=NhfbgebOun2FgWTNPydto2f3LNSTlOqdaxeFpDvQUhY,1608
|
|
26
|
+
guidellm/data/deserializers/deserializer.py,sha256=tNmMFUbCsIFnaWJ6vEXrie0jAUKlVA4Gyl7hhnetiEk,3805
|
|
27
|
+
guidellm/data/deserializers/file.py,sha256=6XVc8MrHnHAc06hVXb_X0TH2DAL9S-oIXcK2HPMj9SI,7501
|
|
28
|
+
guidellm/data/deserializers/huggingface.py,sha256=p4eN_jjuM_ChtyLcQy4QrTEW-tpsf3mIfYm3GGK-1JQ,2908
|
|
29
|
+
guidellm/data/deserializers/memory.py,sha256=4u--QmbxyKgU2asNFo-a7DjKLSlH0ggSb6T8axVsevM,6621
|
|
30
|
+
guidellm/data/deserializers/synthetic.py,sha256=tFs3Z6_ZwY-UdScJWXbswAZ31fVW8p9ISuOQM1qCvZM,12069
|
|
31
|
+
guidellm/data/preprocessors/__init__.py,sha256=khp1-m5EqJ6I40qFAYVv71LncrEXzKBmRocxQG5-ZuE,757
|
|
32
|
+
guidellm/data/preprocessors/formatters.py,sha256=DV_-29rFuSqV7yyQETJ92FBPG9yrkOacEE1nhxXRVyc,14764
|
|
33
|
+
guidellm/data/preprocessors/mappers.py,sha256=nn2zXkabgv0NVT4Iods-cJ3UGsdCpCsqDi81b7K9M_k,6769
|
|
34
|
+
guidellm/data/preprocessors/preprocessor.py,sha256=7_9qezg820_JqEPizoIlGtMxZgEox17V2MxUHfkz5J8,747
|
|
35
|
+
guidellm/data/utils/__init__.py,sha256=lsVIrDXiZgLXdGDeNqm4y5Ilai6jiMOpxVhyFap5ocA,186
|
|
36
|
+
guidellm/data/utils/dataset.py,sha256=8VO7n_6F4ARSXitvzOCngtO-WokNfFb25lA-mijS7UE,2325
|
|
37
|
+
guidellm/data/utils/functions.py,sha256=cuNCTzhiqFUCBpvwwVMgBqQORGz4q1XS3FUfXdxw-gQ,390
|
|
38
|
+
guidellm/extras/__init__.py,sha256=bNtt6CNDhwMM5XlL1q74j_df-1xoXavTShB05LjDYMw,96
|
|
39
|
+
guidellm/extras/audio.py,sha256=ECDK5IFFBhfae1UQrOGGQCE_7wSCuTySo-TThpm4WfU,6421
|
|
40
|
+
guidellm/extras/vision.py,sha256=hU8e7ryUnMZOT6_utR9GKhayvCXYPljeSwCx8S4-nIQ,7691
|
|
41
|
+
guidellm/mock_server/__init__.py,sha256=oRvGpE8a2U9CUdGnza4GDbShT96NfjOW-cAoh0xDR84,183
|
|
42
|
+
guidellm/mock_server/config.py,sha256=t67sJjFV1aO0YMVrRJEm5ysFe1SzYCm-XVSlgmALPdA,2988
|
|
43
|
+
guidellm/mock_server/models.py,sha256=7CPhbQNSLhRJ4XlAXRIHn0_9yQ8IrGWgoImK-8oUHnk,18946
|
|
44
|
+
guidellm/mock_server/server.py,sha256=Nsc6eADcPRMlMmVZhVA1XML9AYfhR75T90YAH50va1Q,6178
|
|
45
|
+
guidellm/mock_server/utils.py,sha256=NYhLj2dJ4EfC2UQIhgzhr_LInAUi_lYqth7A5QK2Djw,10336
|
|
46
|
+
guidellm/mock_server/handlers/__init__.py,sha256=GX2KD41Uc3H-b338mI4Cf1tK-TZcpTKqKdZH9CPgy8Y,698
|
|
47
|
+
guidellm/mock_server/handlers/chat_completions.py,sha256=xrC0OnPDa5V393u2QmUCIWXVgzqc72YiiyxSMwyZv18,10386
|
|
48
|
+
guidellm/mock_server/handlers/completions.py,sha256=BGTI9tJ1PrldT-Nzz_e7KjEtPFpc0NmLvr3nF-tEYKk,10024
|
|
49
|
+
guidellm/mock_server/handlers/tokenizer.py,sha256=OJAILmsk1tvYfHmdP6iuTf8Fg2gDm2_JyPZH-U-pxFE,5243
|
|
50
|
+
guidellm/preprocess/__init__.py,sha256=6mRs1atYwYkdX4txez_bEVk-_nCDsNt5Wo20eWZ24jA,112
|
|
51
|
+
guidellm/preprocess/dataset.py,sha256=msPfS_--kgEtREgoAVvmyxvUN4aonu6hN9YbS41FJuI,12082
|
|
52
|
+
guidellm/presentation/__init__.py,sha256=tkkHf8ZB_rH-rqnAGY21NuOw2GQq3WCGix_whMqeUSs,483
|
|
53
|
+
guidellm/presentation/builder.py,sha256=eSKsUUx7RbmsqgMRKxcaMuYqquanzS5moc8Uv9TI3Z8,905
|
|
54
|
+
guidellm/presentation/data_models.py,sha256=1CjeHaevj2r4oHLcBQeDY0BQ9VXTol46KULr3F-ps48,7424
|
|
55
|
+
guidellm/presentation/injector.py,sha256=mDo0hvrh4NE2c4RZK5GoegtzWVcvw_zEpvm5sRy7xGE,1834
|
|
56
|
+
guidellm/scheduler/__init__.py,sha256=dj-RZDd5B6H6mt7dPAYGDayqa2k6ngqwfKgRUeCkwDw,2511
|
|
57
|
+
guidellm/scheduler/constraints.py,sha256=a_cDJtmREU1rpbJ1UYxsdH9xW5fnOYd_uLj3VbfStb0,39590
|
|
58
|
+
guidellm/scheduler/environments.py,sha256=W_kp2dICO7Z-NWoji9MwgDu0HjPDhUJNGXyll_LL45Y,8829
|
|
59
|
+
guidellm/scheduler/scheduler.py,sha256=7y3PMAmkNwEYQt-I5ZPkilZZIaw1ad3Hs7z6ZgtQVBw,6986
|
|
60
|
+
guidellm/scheduler/schemas.py,sha256=iVxT0GaDK8q8ruuCL5D4046WrCf7X4v2KeZWfP8gdng,9461
|
|
61
|
+
guidellm/scheduler/strategies.py,sha256=LfYogQCubT3eCKKEUQBWcsBh19kg8J5_9NZz77YAlGc,17796
|
|
62
|
+
guidellm/scheduler/worker.py,sha256=4-RBw1znsR_Fv4O4agT1XEHETp7Fz9TOaOMBRIBm7-U,17157
|
|
63
|
+
guidellm/scheduler/worker_group.py,sha256=czOxgbWJ0BKBqa0HOdqt09-n-xR4VUkiatJHsOCbJio,28597
|
|
64
|
+
guidellm/schemas/__init__.py,sha256=4odN5dEqgRQaxsPpYLnyls0JAlDoEhbWnbYTnPZspN8,879
|
|
65
|
+
guidellm/schemas/info.py,sha256=A_LVqpQteCQvf6XXPi02m1pYX2-vOtCEswLLTZfa9_Y,5678
|
|
66
|
+
guidellm/schemas/request.py,sha256=K25Ph56nyhFMA9pzzX52uo4dG0K_Agb7paUvKsafAaI,7567
|
|
67
|
+
guidellm/schemas/response.py,sha256=ROesx1rDI7g1jRoGbZjmGROazBxcT-3NVwQTIhwI2O4,4578
|
|
68
|
+
guidellm/schemas/stats.py,sha256=4FPdMtoAVv-vQMLTaWYgIcqof7z6_nLHxYGV_lD1L9g,7507
|
|
69
|
+
guidellm/utils/__init__.py,sha256=XGBV3fdETLihLn97_Sd0KM1B4hneoe3d1Oh0nMKObv8,3040
|
|
70
|
+
guidellm/utils/auto_importer.py,sha256=rkraMx815TasixoFn0bwtp--7V7TxuEvfZUVFB8V5L0,3658
|
|
71
|
+
guidellm/utils/cli.py,sha256=kw7A0HSTZaZDdAElHczo1WLCcL9DVlt13HG3a9mu_00,3545
|
|
72
|
+
guidellm/utils/colors.py,sha256=D0IGz8A346-Pt5qgnP3S5uV-VgngJoXbfToVCOna41k,175
|
|
73
|
+
guidellm/utils/console.py,sha256=IC9vZ0PpwW9SxReZA3BGXyNRK20tdV0FDNuUmbMwUlE,4382
|
|
74
|
+
guidellm/utils/default_group.py,sha256=iZ47bwRcUCxkX04Zdg0qpmqKtFg4P7lt5_hpw1CnKkA,4167
|
|
75
|
+
guidellm/utils/dict.py,sha256=oogh34_NznFEn1L6NKY2RDVBm7TUK9LOZfMc-rquNw8,673
|
|
76
|
+
guidellm/utils/encoding.py,sha256=hf__oDU-iQYKg_FDqYptyV4BxVC6RseIn3OBgaZXBzE,27821
|
|
77
|
+
guidellm/utils/functions.py,sha256=Q4fLePI12yboNb6mx9p5oYC92PPGUZ2o1lBQT690-ro,4316
|
|
78
|
+
guidellm/utils/hf_datasets.py,sha256=M4uESvC08SzOekQUzUFRcnS49qBYJCAubElqURucU8w,992
|
|
79
|
+
guidellm/utils/hf_transformers.py,sha256=EXOctGUB-ZkwS1yrhIpSOK9IGJTaxXXZO0kUR1qXhp4,992
|
|
80
|
+
guidellm/utils/imports.py,sha256=Ch7TCnsvvLW-2ExAERxj2DUG1Dthl4KefYj1bYSAlTs,179
|
|
81
|
+
guidellm/utils/messaging.py,sha256=OLGC6aunhulC0-aKj6aI5VKlRoXQETulcO-XGqjlQg8,45566
|
|
82
|
+
guidellm/utils/mixins.py,sha256=i48rD2FVm-2qXKq7ENCTnvYZgr0IXOrVSJpkh-8-a_Q,4202
|
|
83
|
+
guidellm/utils/pydantic_utils.py,sha256=sP9mYVZyQpj4OoCGdJTwyKY_vasDYsKSesafZ5RJNBw,14742
|
|
84
|
+
guidellm/utils/random.py,sha256=rDy1lpJ9vYMM59DYgKeHT8IG_I7fnjoHjNfD8QIF03k,1273
|
|
85
|
+
guidellm/utils/registry.py,sha256=1yS3_4s4Zkvq0L2bnouTIwxhUT2nZwJ021LcVuRooaE,7644
|
|
86
|
+
guidellm/utils/singleton.py,sha256=yjpUPAtRRvOh63Ubg9ivjQjLdwsExOQM6U1nSQvZuTc,5026
|
|
87
|
+
guidellm/utils/statistics.py,sha256=KzUYm4fVNVtDd6FRCRBnqYmFcea-9n0JKCAZyqeZLM8,40006
|
|
88
|
+
guidellm/utils/synchronous.py,sha256=rRkWwbDf1ty607KUhDKsqV4HcdKU5o0-1s5hwdG-Hak,5209
|
|
89
|
+
guidellm/utils/text.py,sha256=0K8yUEB4gzztevxzuiMXossSoHhvzcHoKqRhQYQdOrg,11644
|
|
90
|
+
guidellm/utils/typing.py,sha256=jt0o7SRbDhnvrifR3l4hN8oL3uJNxl8aMnvaoABb-MU,1235
|
|
91
|
+
guidellm-0.4.0a155.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
92
|
+
guidellm-0.4.0a155.dist-info/METADATA,sha256=AEyyL1EHQ5t9SiABaf8IN5VxX3nB710-2pguCKQXw4o,21923
|
|
93
|
+
guidellm-0.4.0a155.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
94
|
+
guidellm-0.4.0a155.dist-info/entry_points.txt,sha256=DzLFEg47fF7qY1b-9laPz9jg0KSKJ1_D9TbF93kLz_E,51
|
|
95
|
+
guidellm-0.4.0a155.dist-info/top_level.txt,sha256=EXRGjnvFtL6MeZTe0tnHRMYcEWUW3vEqoG2zO7vFOtk,9
|
|
96
|
+
guidellm-0.4.0a155.dist-info/RECORD,,
|
guidellm/backend/__init__.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from .backend import (
|
|
2
|
-
Backend,
|
|
3
|
-
BackendType,
|
|
4
|
-
)
|
|
5
|
-
from .openai import CHAT_COMPLETIONS_PATH, TEXT_COMPLETIONS_PATH, OpenAIHTTPBackend
|
|
6
|
-
from .response import (
|
|
7
|
-
RequestArgs,
|
|
8
|
-
ResponseSummary,
|
|
9
|
-
StreamingResponseType,
|
|
10
|
-
StreamingTextResponse,
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
__all__ = [
|
|
14
|
-
"CHAT_COMPLETIONS_PATH",
|
|
15
|
-
"TEXT_COMPLETIONS_PATH",
|
|
16
|
-
"Backend",
|
|
17
|
-
"BackendType",
|
|
18
|
-
"OpenAIHTTPBackend",
|
|
19
|
-
"RequestArgs",
|
|
20
|
-
"ResponseSummary",
|
|
21
|
-
"StreamingResponseType",
|
|
22
|
-
"StreamingTextResponse",
|
|
23
|
-
]
|
guidellm/backend/backend.py
DELETED
|
@@ -1,259 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from collections.abc import AsyncGenerator
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import Any, Literal, Optional, Union
|
|
5
|
-
|
|
6
|
-
from loguru import logger
|
|
7
|
-
from PIL import Image
|
|
8
|
-
|
|
9
|
-
from guidellm.backend.response import ResponseSummary, StreamingTextResponse
|
|
10
|
-
from guidellm.config import settings
|
|
11
|
-
|
|
12
|
-
__all__ = [
|
|
13
|
-
"Backend",
|
|
14
|
-
"BackendType",
|
|
15
|
-
]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
BackendType = Literal["openai_http"]
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class Backend(ABC):
|
|
22
|
-
"""
|
|
23
|
-
Abstract base class for generative AI backends.
|
|
24
|
-
|
|
25
|
-
This class provides a common interface for creating and interacting with different
|
|
26
|
-
generative AI backends. Subclasses should implement the abstract methods to
|
|
27
|
-
define specific backend behavior.
|
|
28
|
-
|
|
29
|
-
:cvar _registry: A registration dictionary that maps BackendType to backend classes.
|
|
30
|
-
:param type_: The type of the backend.
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
_registry: dict[BackendType, "type[Backend]"] = {}
|
|
34
|
-
|
|
35
|
-
@classmethod
|
|
36
|
-
def register(cls, backend_type: BackendType):
|
|
37
|
-
"""
|
|
38
|
-
A decorator to register a backend class in the backend registry.
|
|
39
|
-
|
|
40
|
-
:param backend_type: The type of backend to register.
|
|
41
|
-
:type backend_type: BackendType
|
|
42
|
-
:return: The decorated backend class.
|
|
43
|
-
:rtype: Type[Backend]
|
|
44
|
-
"""
|
|
45
|
-
if backend_type in cls._registry:
|
|
46
|
-
raise ValueError(f"Backend type already registered: {backend_type}")
|
|
47
|
-
|
|
48
|
-
if not issubclass(cls, Backend):
|
|
49
|
-
raise TypeError("Only subclasses of Backend can be registered")
|
|
50
|
-
|
|
51
|
-
def inner_wrapper(wrapped_class: type["Backend"]):
|
|
52
|
-
cls._registry[backend_type] = wrapped_class
|
|
53
|
-
logger.info("Registered backend type: {}", backend_type)
|
|
54
|
-
return wrapped_class
|
|
55
|
-
|
|
56
|
-
return inner_wrapper
|
|
57
|
-
|
|
58
|
-
@classmethod
|
|
59
|
-
def create(cls, type_: BackendType, **kwargs) -> "Backend":
|
|
60
|
-
"""
|
|
61
|
-
Factory method to create a backend instance based on the backend type.
|
|
62
|
-
|
|
63
|
-
:param type_: The type of backend to create.
|
|
64
|
-
:type type_: BackendType
|
|
65
|
-
:param kwargs: Additional arguments for backend initialization.
|
|
66
|
-
:return: An instance of a subclass of Backend.
|
|
67
|
-
:rtype: Backend
|
|
68
|
-
:raises ValueError: If the backend type is not registered.
|
|
69
|
-
"""
|
|
70
|
-
|
|
71
|
-
logger.info("Creating backend of type {}", type_)
|
|
72
|
-
|
|
73
|
-
if type_ not in cls._registry:
|
|
74
|
-
err = ValueError(f"Unsupported backend type: {type_}")
|
|
75
|
-
logger.error("{}", err)
|
|
76
|
-
raise err
|
|
77
|
-
|
|
78
|
-
return Backend._registry[type_](**kwargs)
|
|
79
|
-
|
|
80
|
-
def __init__(self, type_: BackendType):
|
|
81
|
-
self._type = type_
|
|
82
|
-
|
|
83
|
-
@property
|
|
84
|
-
def type_(self) -> BackendType:
|
|
85
|
-
"""
|
|
86
|
-
:return: The type of the backend.
|
|
87
|
-
"""
|
|
88
|
-
return self._type
|
|
89
|
-
|
|
90
|
-
@property
|
|
91
|
-
@abstractmethod
|
|
92
|
-
def target(self) -> str:
|
|
93
|
-
"""
|
|
94
|
-
:return: The target location for the backend.
|
|
95
|
-
"""
|
|
96
|
-
...
|
|
97
|
-
|
|
98
|
-
@property
|
|
99
|
-
@abstractmethod
|
|
100
|
-
def model(self) -> Optional[str]:
|
|
101
|
-
"""
|
|
102
|
-
:return: The model used for the backend requests.
|
|
103
|
-
"""
|
|
104
|
-
...
|
|
105
|
-
|
|
106
|
-
@property
|
|
107
|
-
@abstractmethod
|
|
108
|
-
def info(self) -> dict[str, Any]:
|
|
109
|
-
"""
|
|
110
|
-
:return: The information about the backend.
|
|
111
|
-
"""
|
|
112
|
-
...
|
|
113
|
-
|
|
114
|
-
@abstractmethod
|
|
115
|
-
async def reset(self) -> None:
|
|
116
|
-
"""
|
|
117
|
-
Reset the connection object. This is useful for backends that
|
|
118
|
-
reuse connections or have state that needs to be cleared.
|
|
119
|
-
"""
|
|
120
|
-
...
|
|
121
|
-
|
|
122
|
-
async def validate(self):
|
|
123
|
-
"""
|
|
124
|
-
Handle final setup and validate the backend is ready for use.
|
|
125
|
-
If not successful, raises the appropriate exception.
|
|
126
|
-
"""
|
|
127
|
-
logger.info("{} validating backend {}", self.__class__.__name__, self.type_)
|
|
128
|
-
await self.check_setup()
|
|
129
|
-
models = await self.available_models()
|
|
130
|
-
if not models:
|
|
131
|
-
raise ValueError("No models available for the backend")
|
|
132
|
-
|
|
133
|
-
# Use the preferred route defined in the global settings when performing the
|
|
134
|
-
# validation request. This avoids calling an unavailable endpoint (ie
|
|
135
|
-
# /v1/completions) when the deployment only supports the chat completions
|
|
136
|
-
# endpoint.
|
|
137
|
-
if settings.preferred_route == "chat_completions":
|
|
138
|
-
async for _ in self.chat_completions( # type: ignore[attr-defined]
|
|
139
|
-
content="Test connection", output_token_count=1
|
|
140
|
-
):
|
|
141
|
-
pass
|
|
142
|
-
else:
|
|
143
|
-
async for _ in self.text_completions( # type: ignore[attr-defined]
|
|
144
|
-
prompt="Test connection", output_token_count=1
|
|
145
|
-
):
|
|
146
|
-
pass
|
|
147
|
-
|
|
148
|
-
await self.reset()
|
|
149
|
-
|
|
150
|
-
@abstractmethod
|
|
151
|
-
async def check_setup(self):
|
|
152
|
-
"""
|
|
153
|
-
Check the setup for the backend.
|
|
154
|
-
If unsuccessful, raises the appropriate exception.
|
|
155
|
-
|
|
156
|
-
:raises ValueError: If the setup check fails.
|
|
157
|
-
"""
|
|
158
|
-
...
|
|
159
|
-
|
|
160
|
-
@abstractmethod
|
|
161
|
-
async def prepare_multiprocessing(self):
|
|
162
|
-
"""
|
|
163
|
-
Prepare the backend for use in a multiprocessing environment.
|
|
164
|
-
This is useful for backends that have instance state that can not
|
|
165
|
-
be shared across processes and should be cleared out and re-initialized
|
|
166
|
-
for each new process.
|
|
167
|
-
"""
|
|
168
|
-
...
|
|
169
|
-
|
|
170
|
-
@abstractmethod
|
|
171
|
-
async def available_models(self) -> list[str]:
|
|
172
|
-
"""
|
|
173
|
-
Get the list of available models for the backend.
|
|
174
|
-
|
|
175
|
-
:return: The list of available models.
|
|
176
|
-
:rtype: List[str]
|
|
177
|
-
"""
|
|
178
|
-
...
|
|
179
|
-
|
|
180
|
-
@abstractmethod
|
|
181
|
-
async def text_completions(
|
|
182
|
-
self,
|
|
183
|
-
prompt: Union[str, list[str]],
|
|
184
|
-
request_id: Optional[str] = None,
|
|
185
|
-
prompt_token_count: Optional[int] = None,
|
|
186
|
-
output_token_count: Optional[int] = None,
|
|
187
|
-
**kwargs,
|
|
188
|
-
) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
|
|
189
|
-
"""
|
|
190
|
-
Generate text only completions for the given prompt.
|
|
191
|
-
Does not support multiple modalities, complicated chat interfaces,
|
|
192
|
-
or chat templates. Specifically, it requests with only the prompt.
|
|
193
|
-
|
|
194
|
-
:param prompt: The prompt (or list of prompts) to generate a completion for.
|
|
195
|
-
If a list is supplied, these are concatenated and run through the model
|
|
196
|
-
for a single prompt.
|
|
197
|
-
:param request_id: The unique identifier for the request, if any.
|
|
198
|
-
Added to logging statements and the response for tracking purposes.
|
|
199
|
-
:param prompt_token_count: The number of tokens measured in the prompt, if any.
|
|
200
|
-
Returned in the response stats for later analysis, if applicable.
|
|
201
|
-
:param output_token_count: If supplied, the number of tokens to enforce
|
|
202
|
-
generation of for the output for this request.
|
|
203
|
-
:param kwargs: Additional keyword arguments to pass with the request.
|
|
204
|
-
:return: An async generator that yields a StreamingTextResponse for start,
|
|
205
|
-
a StreamingTextResponse for each received iteration,
|
|
206
|
-
and a ResponseSummary for the final response.
|
|
207
|
-
"""
|
|
208
|
-
...
|
|
209
|
-
|
|
210
|
-
@abstractmethod
|
|
211
|
-
async def chat_completions(
|
|
212
|
-
self,
|
|
213
|
-
content: Union[
|
|
214
|
-
str,
|
|
215
|
-
list[Union[str, dict[str, Union[str, dict[str, str]]], Path, Image.Image]],
|
|
216
|
-
Any,
|
|
217
|
-
],
|
|
218
|
-
request_id: Optional[str] = None,
|
|
219
|
-
prompt_token_count: Optional[int] = None,
|
|
220
|
-
output_token_count: Optional[int] = None,
|
|
221
|
-
raw_content: bool = False,
|
|
222
|
-
**kwargs,
|
|
223
|
-
) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
|
|
224
|
-
"""
|
|
225
|
-
Generate chat completions for the given content.
|
|
226
|
-
Supports multiple modalities, complicated chat interfaces, and chat templates.
|
|
227
|
-
Specifically, it requests with the content, which can be any combination of
|
|
228
|
-
text, images, and audio provided the target model supports it,
|
|
229
|
-
and returns the output text. Additionally, any chat templates
|
|
230
|
-
for the model are applied within the backend.
|
|
231
|
-
|
|
232
|
-
:param content: The content (or list of content) to generate a completion for.
|
|
233
|
-
This supports any combination of text, images, and audio (model dependent).
|
|
234
|
-
Supported text only request examples:
|
|
235
|
-
content="Sample prompt", content=["Sample prompt", "Second prompt"],
|
|
236
|
-
content=[{"type": "text", "value": "Sample prompt"}.
|
|
237
|
-
Supported text and image request examples:
|
|
238
|
-
content=["Describe the image", PIL.Image.open("image.jpg")],
|
|
239
|
-
content=["Describe the image", Path("image.jpg")],
|
|
240
|
-
content=["Describe the image", {"type": "image_url",
|
|
241
|
-
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}].
|
|
242
|
-
Supported text and audio request examples:
|
|
243
|
-
content=["Transcribe the audio", Path("audio.wav")],
|
|
244
|
-
content=["Transcribe the audio", {"type": "input_audio",
|
|
245
|
-
"input_audio": {"data": f"{base64_bytes}", "format": "wav}].
|
|
246
|
-
Additionally, if raw_content=True then the content is passed directly to the
|
|
247
|
-
backend without any processing.
|
|
248
|
-
:param request_id: The unique identifier for the request, if any.
|
|
249
|
-
Added to logging statements and the response for tracking purposes.
|
|
250
|
-
:param prompt_token_count: The number of tokens measured in the prompt, if any.
|
|
251
|
-
Returned in the response stats for later analysis, if applicable.
|
|
252
|
-
:param output_token_count: If supplied, the number of tokens to enforce
|
|
253
|
-
generation of for the output for this request.
|
|
254
|
-
:param kwargs: Additional keyword arguments to pass with the request.
|
|
255
|
-
:return: An async generator that yields a StreamingTextResponse for start,
|
|
256
|
-
a StreamingTextResponse for each received iteration,
|
|
257
|
-
and a ResponseSummary for the final response.
|
|
258
|
-
"""
|
|
259
|
-
...
|