sglang 0.1.14__py3-none-any.whl → 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. sglang/__init__.py +55 -2
  2. sglang/api.py +3 -5
  3. sglang/backend/anthropic.py +18 -4
  4. sglang/backend/openai.py +2 -1
  5. sglang/backend/runtime_endpoint.py +18 -5
  6. sglang/backend/vertexai.py +1 -0
  7. sglang/global_config.py +1 -0
  8. sglang/lang/chat_template.py +74 -0
  9. sglang/lang/interpreter.py +40 -16
  10. sglang/lang/tracer.py +6 -4
  11. sglang/launch_server.py +2 -1
  12. sglang/srt/constrained/fsm_cache.py +1 -0
  13. sglang/srt/constrained/jump_forward.py +1 -0
  14. sglang/srt/conversation.py +2 -2
  15. sglang/srt/hf_transformers_utils.py +2 -1
  16. sglang/srt/layers/context_flashattention_nopad.py +1 -0
  17. sglang/srt/layers/extend_attention.py +1 -0
  18. sglang/srt/layers/logits_processor.py +114 -54
  19. sglang/srt/layers/radix_attention.py +2 -1
  20. sglang/srt/layers/token_attention.py +1 -0
  21. sglang/srt/managers/detokenizer_manager.py +5 -1
  22. sglang/srt/managers/io_struct.py +12 -0
  23. sglang/srt/managers/router/infer_batch.py +70 -33
  24. sglang/srt/managers/router/manager.py +7 -2
  25. sglang/srt/managers/router/model_rpc.py +116 -73
  26. sglang/srt/managers/router/model_runner.py +111 -167
  27. sglang/srt/managers/router/radix_cache.py +46 -38
  28. sglang/srt/managers/tokenizer_manager.py +56 -11
  29. sglang/srt/memory_pool.py +5 -14
  30. sglang/srt/model_config.py +7 -0
  31. sglang/srt/models/commandr.py +376 -0
  32. sglang/srt/models/dbrx.py +413 -0
  33. sglang/srt/models/dbrx_config.py +281 -0
  34. sglang/srt/models/gemma.py +22 -20
  35. sglang/srt/models/llama2.py +23 -21
  36. sglang/srt/models/llava.py +12 -10
  37. sglang/srt/models/mixtral.py +27 -25
  38. sglang/srt/models/qwen.py +23 -21
  39. sglang/srt/models/qwen2.py +23 -21
  40. sglang/srt/models/stablelm.py +20 -21
  41. sglang/srt/models/yivl.py +6 -5
  42. sglang/srt/openai_api_adapter.py +356 -0
  43. sglang/srt/{managers/openai_protocol.py → openai_protocol.py} +36 -20
  44. sglang/srt/sampling_params.py +2 -0
  45. sglang/srt/server.py +68 -447
  46. sglang/srt/server_args.py +76 -49
  47. sglang/srt/utils.py +88 -32
  48. sglang/srt/weight_utils.py +402 -0
  49. sglang/test/test_programs.py +8 -7
  50. sglang/test/test_utils.py +195 -7
  51. {sglang-0.1.14.dist-info → sglang-0.1.15.dist-info}/METADATA +12 -14
  52. sglang-0.1.15.dist-info/RECORD +69 -0
  53. sglang-0.1.14.dist-info/RECORD +0 -64
  54. {sglang-0.1.14.dist-info → sglang-0.1.15.dist-info}/LICENSE +0 -0
  55. {sglang-0.1.14.dist-info → sglang-0.1.15.dist-info}/WHEEL +0 -0
  56. {sglang-0.1.14.dist-info → sglang-0.1.15.dist-info}/top_level.txt +0 -0
sglang/test/test_utils.py CHANGED
@@ -1,13 +1,20 @@
1
1
  """Common utilities for testing and benchmarking"""
2
2
 
3
+ import asyncio
4
+ from functools import partial
5
+
3
6
  import numpy as np
4
7
  import requests
8
+
5
9
  from sglang.backend.openai import OpenAI
6
10
  from sglang.backend.runtime_endpoint import RuntimeEndpoint
7
11
  from sglang.global_config import global_config
12
+ from sglang.srt.utils import get_exception_traceback
8
13
 
9
14
 
10
- def call_generate_lightllm(prompt, temperature, max_tokens, stop, url):
15
+ def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
16
+ assert url is not None
17
+
11
18
  data = {
12
19
  "inputs": prompt,
13
20
  "parameters": {
@@ -22,7 +29,9 @@ def call_generate_lightllm(prompt, temperature, max_tokens, stop, url):
22
29
  return pred
23
30
 
24
31
 
25
- def call_generate_vllm(prompt, temperature, max_tokens, stop, url, n=1):
32
+ def call_generate_vllm(prompt, temperature, max_tokens, stop=None, n=1, url=None):
33
+ assert url is not None
34
+
26
35
  data = {
27
36
  "prompt": prompt,
28
37
  "temperature": temperature,
@@ -40,8 +49,10 @@ def call_generate_vllm(prompt, temperature, max_tokens, stop, url, n=1):
40
49
 
41
50
 
42
51
  def call_generate_outlines(
43
- prompt, temperature, max_tokens, url, stop=[], regex=None, n=1
52
+ prompt, temperature, max_tokens, stop=[], regex=None, n=1, url=None
44
53
  ):
54
+ assert url is not None
55
+
45
56
  data = {
46
57
  "prompt": prompt,
47
58
  "temperature": temperature,
@@ -59,7 +70,9 @@ def call_generate_outlines(
59
70
  return pred
60
71
 
61
72
 
62
- def call_generate_srt_raw(prompt, temperature, max_tokens, stop, url):
73
+ def call_generate_srt_raw(prompt, temperature, max_tokens, stop=None, url=None):
74
+ assert url is not None
75
+
63
76
  data = {
64
77
  "text": prompt,
65
78
  "sampling_params": {
@@ -75,7 +88,71 @@ def call_generate_srt_raw(prompt, temperature, max_tokens, stop, url):
75
88
  return pred
76
89
 
77
90
 
78
- def call_select_lightllm(context, choices, url):
91
+ def call_generate_guidance(
92
+ prompt, temperature, max_tokens, stop=None, n=1, regex=None, model=None
93
+ ):
94
+ assert model is not None
95
+ from guidance import gen
96
+
97
+ rets = []
98
+ for _ in range(n):
99
+ out = (
100
+ model
101
+ + prompt
102
+ + gen(
103
+ name="answer",
104
+ max_tokens=max_tokens,
105
+ temperature=temperature,
106
+ stop=stop,
107
+ regex=regex,
108
+ )
109
+ )
110
+ rets.append(out["answer"])
111
+ return rets if n > 1 else rets[0]
112
+
113
+
114
+ async def call_generate_lmql(
115
+ prompt, temperature, max_tokens, stop=None, n=1, max_len=4096, model=None, **kwargs
116
+ ):
117
+ assert model is not None
118
+ import lmql
119
+
120
+ if stop != None:
121
+
122
+ @lmql.query(model=model)
123
+ async def program(question, max_tokens, stop):
124
+ '''lmql
125
+ """{question}[ANSWER]""" where len(TOKENS(ANSWER)) < max_tokens and STOPS_AT(ANSWER, stop)
126
+ return ANSWER
127
+ '''
128
+
129
+ else:
130
+
131
+ @lmql.query(model=model)
132
+ async def program(question, max_tokens):
133
+ '''lmql
134
+ """{question}[ANSWER]""" where len(TOKENS(ANSWER)) < max_tokens
135
+ return ANSWER
136
+ '''
137
+
138
+ tasks = [
139
+ program(
140
+ question=prompt,
141
+ temperature=temperature,
142
+ max_tokens=max_tokens,
143
+ stop=stop,
144
+ max_len=max_len,
145
+ **kwargs,
146
+ )
147
+ for _ in range(n)
148
+ ]
149
+ rets = await asyncio.gather(*tasks)
150
+ return rets if n > 1 else rets[0]
151
+
152
+
153
+ def call_select_lightllm(context, choices, url=None):
154
+ assert url is not None
155
+
79
156
  scores = []
80
157
  for i in range(len(choices)):
81
158
  data = {
@@ -90,7 +167,9 @@ def call_select_lightllm(context, choices, url):
90
167
  return np.argmax(scores)
91
168
 
92
169
 
93
- def call_select_vllm(context, choices, url):
170
+ def call_select_vllm(context, choices, url=None):
171
+ assert url is not None
172
+
94
173
  scores = []
95
174
  for i in range(len(choices)):
96
175
  data = {
@@ -112,6 +191,31 @@ def call_select_vllm(context, choices, url):
112
191
  """
113
192
 
114
193
 
194
+ def call_select_guidance(context, choices, model=None):
195
+ assert model is not None
196
+ from guidance import select
197
+
198
+ out = model + context + select(choices, name="answer")
199
+ return choices.index(out["answer"])
200
+
201
+
202
+ async def call_select_lmql(context, choices, temperature=0, max_len=4096, model=None):
203
+ assert model is not None
204
+ import lmql
205
+
206
+ @lmql.query(model=model)
207
+ async def program(ctx, choices):
208
+ '''lmql
209
+ """{ctx}[ANSWER]""" where ANSWER in set(choices)
210
+ return ANSWER
211
+ '''
212
+
213
+ answer = await program(
214
+ ctx=context, choices=choices, temperature=temperature, max_len=max_len
215
+ )
216
+ return choices.index(answer)
217
+
218
+
115
219
  def add_common_other_args_and_parse(parser):
116
220
  parser.add_argument("--parallel", type=int, default=64)
117
221
  parser.add_argument("--host", type=str, default="http://127.0.0.1")
@@ -120,8 +224,17 @@ def add_common_other_args_and_parse(parser):
120
224
  "--backend",
121
225
  type=str,
122
226
  required=True,
123
- choices=["vllm", "lightllm", "guidance", "lmql", "srt-raw", "llama.cpp"],
227
+ choices=[
228
+ "vllm",
229
+ "outlines",
230
+ "lightllm",
231
+ "guidance",
232
+ "lmql",
233
+ "srt-raw",
234
+ "llama.cpp",
235
+ ],
124
236
  )
237
+ parser.add_argument("--n-ctx", type=int, default=4096)
125
238
  parser.add_argument(
126
239
  "--model-path", type=str, default="meta-llama/Llama-2-7b-chat-hf"
127
240
  )
@@ -131,6 +244,7 @@ def add_common_other_args_and_parse(parser):
131
244
  if args.port is None:
132
245
  default_port = {
133
246
  "vllm": 21000,
247
+ "outlines": 21000,
134
248
  "lightllm": 22000,
135
249
  "lmql": 23000,
136
250
  "srt-raw": 30000,
@@ -160,3 +274,77 @@ def select_sglang_backend(args):
160
274
  else:
161
275
  raise ValueError(f"Invalid backend: {args.backend}")
162
276
  return backend
277
+
278
+
279
+ def _get_call_generate(args):
280
+ if args.backend == "lightllm":
281
+ return partial(call_generate_lightllm, url=f"{args.host}:{args.port}/generate")
282
+ elif args.backend == "vllm":
283
+ return partial(call_generate_vllm, url=f"{args.host}:{args.port}/generate")
284
+ elif args.backend == "srt-raw":
285
+ return partial(call_generate_srt_raw, url=f"{args.host}:{args.port}/generate")
286
+ elif args.backend == "outlines":
287
+ return partial(call_generate_outlines, url=f"{args.host}:{args.port}/generate")
288
+ elif args.backend == "guidance":
289
+ from guidance import models
290
+
291
+ model = models.LlamaCpp(args.model_path, n_gpu_layers=-1, n_ctx=args.n_ctx)
292
+ call_generate = partial(call_generate_guidance, model=model)
293
+ call_generate("Hello,", 1.0, 8, ".")
294
+ return call_generate
295
+ elif args.backend == "lmql":
296
+ import lmql
297
+
298
+ model = lmql.model(args.model_path, endpoint=f"{args.host}:{args.port}")
299
+ return partial(call_generate_lmql, model=model)
300
+ else:
301
+ raise ValueError(f"Invalid backend: {args.backend}")
302
+
303
+
304
+ def _get_call_select(args):
305
+ if args.backend == "lightllm":
306
+ return partial(call_select_lightllm, url=f"{args.host}:{args.port}/generate")
307
+ elif args.backend == "vllm":
308
+ return partial(call_select_vllm, url=f"{args.host}:{args.port}/generate")
309
+ elif args.backend == "guidance":
310
+ from guidance import models
311
+
312
+ model = models.LlamaCpp(args.model_path, n_gpu_layers=-1, n_ctx=args.n_ctx)
313
+ call_select = partial(call_select_guidance, model=model)
314
+
315
+ call_select("Hello,", ["world", "earth"])
316
+ return call_select
317
+
318
+ elif args.backend == "lmql":
319
+ import lmql
320
+
321
+ model = lmql.model(args.model_path, endpoint=f"{args.host}:{args.port}")
322
+ return partial(call_select_lmql, model=model)
323
+ else:
324
+ raise ValueError(f"Invalid backend: {args.backend}")
325
+
326
+
327
+ def get_call_generate(args):
328
+ call_generate = _get_call_generate(args)
329
+
330
+ def func(*args, **kwargs):
331
+ try:
332
+ return call_generate(*args, **kwargs)
333
+ except Exception:
334
+ print("Exception in call_generate:\n" + get_exception_traceback())
335
+ raise
336
+
337
+ return func
338
+
339
+
340
+ def get_call_select(args):
341
+ call_select = _get_call_select(args)
342
+
343
+ def func(*args, **kwargs):
344
+ try:
345
+ return call_select(*args, **kwargs)
346
+ except Exception:
347
+ print("Exception in call_select:\n" + get_exception_traceback())
348
+ raise
349
+
350
+ return func
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: A structured generation langauge for LLMs.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -212,6 +212,7 @@ Requires-Python: >=3.8
212
212
  Description-Content-Type: text/markdown
213
213
  License-File: LICENSE
214
214
  Requires-Dist: requests
215
+ Requires-Dist: tqdm
215
216
  Provides-Extra: all
216
217
  Requires-Dist: sglang[srt] ; extra == 'all'
217
218
  Requires-Dist: sglang[openai] ; extra == 'all'
@@ -222,6 +223,7 @@ Requires-Dist: numpy ; extra == 'anthropic'
222
223
  Provides-Extra: openai
223
224
  Requires-Dist: openai >=1.0 ; extra == 'openai'
224
225
  Requires-Dist: numpy ; extra == 'openai'
226
+ Requires-Dist: tiktoken ; extra == 'openai'
225
227
  Provides-Extra: srt
226
228
  Requires-Dist: aiohttp ; extra == 'srt'
227
229
  Requires-Dist: fastapi ; extra == 'srt'
@@ -231,16 +233,12 @@ Requires-Dist: torch ; extra == 'srt'
231
233
  Requires-Dist: uvloop ; extra == 'srt'
232
234
  Requires-Dist: uvicorn ; extra == 'srt'
233
235
  Requires-Dist: zmq ; extra == 'srt'
234
- Requires-Dist: vllm >=0.3.3 ; extra == 'srt'
236
+ Requires-Dist: vllm >=0.4.2 ; extra == 'srt'
235
237
  Requires-Dist: interegular ; extra == 'srt'
236
- Requires-Dist: lark ; extra == 'srt'
237
- Requires-Dist: numba ; extra == 'srt'
238
238
  Requires-Dist: pydantic ; extra == 'srt'
239
- Requires-Dist: referencing ; extra == 'srt'
240
- Requires-Dist: diskcache ; extra == 'srt'
241
- Requires-Dist: cloudpickle ; extra == 'srt'
242
239
  Requires-Dist: pillow ; extra == 'srt'
243
240
  Requires-Dist: outlines >=0.0.27 ; extra == 'srt'
241
+ Requires-Dist: packaging ; extra == 'srt'
244
242
 
245
243
  <div align="center">
246
244
  <img src="assets/logo.png" alt="logo" width="400"></img>
@@ -541,7 +539,6 @@ curl http://localhost:30000/generate \
541
539
  Learn more about the argument format [here](docs/sampling_params.md).
542
540
 
543
541
  ### OpenAI Compatible API
544
-
545
542
  In addition, the server supports an experimental OpenAI-compatible API.
546
543
 
547
544
  ```python
@@ -606,7 +603,7 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port
606
603
  ```
607
604
  python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --mem-fraction-static 0.7
608
605
  ```
609
- - You can turn on [flashinfer](docs/flashinfer.md) to acclerate the inference by using highly optimized CUDA kernels.
606
+ - You can turn on [flashinfer](docs/flashinfer.md) to accelerate the inference by using highly optimized CUDA kernels.
610
607
 
611
608
  ### Supported Models
612
609
  - Llama
@@ -622,10 +619,14 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port
622
619
  - `python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.6-34b --tokenizer-path liuhaotian/llava-v1.6-34b-tokenizer --port 3000`
623
620
  - Yi-VL
624
621
  - see [srt_example_yi_vl.py](examples/quick_start/srt_example_yi_vl.py).
625
- - AWQ/GPTQ quantization
622
+ - StableLM
623
+ - Command-R
624
+ - DBRX
625
+ - AWQ/GPTQ/Marlin quantization
626
626
 
627
- ## Benchmark And Performance
627
+ Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/model_support.md).
628
628
 
629
+ ## Benchmark And Performance
629
630
  - Llama-7B on NVIDIA A10G, FP16, Tensor Parallelism=1
630
631
  ![llama_7b](assets/llama_7b.jpg)
631
632
 
@@ -649,7 +650,4 @@ https://github.com/sgl-project/sglang/issues/157
649
650
  }
650
651
  ```
651
652
 
652
- [![Paper page](https://huggingface.co/datasets/huggingface/badges/resolve/main/paper-page-md.svg)](https://huggingface.co/papers/2312.07104)
653
-
654
-
655
653
  We learned from the design and reused some code of the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), [LMQL](https://github.com/eth-sri/lmql).
@@ -0,0 +1,69 @@
1
+ sglang/__init__.py,sha256=Ef_3iE98hM5y45k97dcEXLqlRcSovIvGxEbTebnOre4,1034
2
+ sglang/api.py,sha256=c2MIXPgtkmsgDY7BvXPOYkRaaJJRkCSBjGjvUz2zkkM,4455
3
+ sglang/global_config.py,sha256=TLmmeWsk4mrjNr-ryj0w7irSr8HRekXYrYZON2sABdk,854
4
+ sglang/launch_server.py,sha256=FteIWF2C73RN1qSPkh7cfIURV5rFvfHyKLHGDRUYJIA,294
5
+ sglang/utils.py,sha256=2dUXLMPz9VhhzbIRQABmfZnVW5yz61F3UVtb6yKyevM,6237
6
+ sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ sglang/backend/anthropic.py,sha256=gpxYWNRKDiRs1-dUUA53tuBH6TT2mSVgi-J9iOKuNNo,2075
8
+ sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
9
+ sglang/backend/openai.py,sha256=QQS09WHqMpgg70r-uB1LocqxUZ7vhv4R3FHlt7NNaKg,9583
10
+ sglang/backend/runtime_endpoint.py,sha256=ZnQ4DtbNIUr_Me5F6iYwMYsYhom8ZCs6A5kRjWwAANA,8695
11
+ sglang/backend/vertexai.py,sha256=XNkbUzOdLIz-1qP_BBieYIfUXZf6gsfdghlaulNpBM8,4714
12
+ sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ sglang/lang/chat_template.py,sha256=-pmALN5tV2upD5kb4RKP7DOvQY1s4nuvRdOcXKclXnw,11260
14
+ sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
15
+ sglang/lang/interpreter.py,sha256=W1uwgTJqeHXrkG3K7mZfH8JX9Oc9poYIwtCWRIH7lhI,27251
16
+ sglang/lang/ir.py,sha256=8Ap-uEUz6K9eNQTOKtMixePuLwRFHFKcN0Z5Yn44nKk,13320
17
+ sglang/lang/tracer.py,sha256=vArGy7RNUP0qzE26kohsIHWRIfB0d88Ph2aiLq_P_fU,8284
18
+ sglang/srt/backend_config.py,sha256=UIV6kIU2j-Xh0eoezn1aXcYIy0miftHsWFeAZwqpbGE,227
19
+ sglang/srt/conversation.py,sha256=NwTVuQXd3NqPq5WCllaYUgPLG2w2pMMbzIKDQfJMMO0,15491
20
+ sglang/srt/hf_transformers_utils.py,sha256=mwDuBMZcp66U6hZWpiO1KeOmjXXXG9fbX_ZwEqjzzn0,5286
21
+ sglang/srt/memory_pool.py,sha256=5bqI8d5_JURbKwIhv1BwlcIO2IDHewHvIqezPG-b_5M,3284
22
+ sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
23
+ sglang/srt/model_config.py,sha256=Tw13FKY7qA4hJOskl3gmdb_W5gTEoB2m0PEArUiINQU,1546
24
+ sglang/srt/openai_api_adapter.py,sha256=w3zvahyzvCnQd2pphQ6ViRBgHJmyI-TyIul6Q-CBY5Q,13214
25
+ sglang/srt/openai_protocol.py,sha256=87pLM0hxocd5LUvhYopnL61cEKz3iu8TKdJtHbk3C5o,5211
26
+ sglang/srt/sampling_params.py,sha256=dQbVr7JmTJ9JEn_sy3clB56yT9kyr9ldWFZ-GaNXOy0,3023
27
+ sglang/srt/server.py,sha256=SQXIo9XLG0fuW123-UF4VA0Os75I73upQoAzZ_U2su8,9923
28
+ sglang/srt/server_args.py,sha256=ySWe8RA4ukJQTnN4rs4_42XoYcVz1XPfeT8Ps551MlY,9510
29
+ sglang/srt/utils.py,sha256=n8OLrrbdNbA6ow1s2wbJU7a35fHGQmnFfewcgzTBecE,9201
30
+ sglang/srt/weight_utils.py,sha256=TBNP9jWb32gohPLj4-qWRn_Yn64gqWk1ZGLWrv967uU,15930
31
+ sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
32
+ sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
33
+ sglang/srt/constrained/fsm_cache.py,sha256=B9FPtpqzm4jKqciXTbfgNJL44hV2-rUG6-omDECN7iA,902
34
+ sglang/srt/constrained/jump_forward.py,sha256=fUa4AlnGX40gYiWTLuICTJfq4b7wA3AL5dydTqT3jz4,2483
35
+ sglang/srt/layers/context_flashattention_nopad.py,sha256=bENdVltDozccR5mLY_CcYDjqLob28tHA9f2s03D8UFQ,5210
36
+ sglang/srt/layers/extend_attention.py,sha256=5gvRggy6qPLrLvjctoMMsYh1w70mOGxiPjxstHqjqsY,12623
37
+ sglang/srt/layers/logits_processor.py,sha256=Vbkr6ANNfiBGkkNobqjNm1KQTqtuYQWZvmPjhhIWnS8,7267
38
+ sglang/srt/layers/radix_attention.py,sha256=PBucvAdGI27Z1qQOUxUi-YJp-tKGm6LX3L2kp99pOV4,5598
39
+ sglang/srt/layers/token_attention.py,sha256=Wm-Gj0VdmFE8krZeHjDWic9dmVxRvg1WRAIHbbA3M34,8517
40
+ sglang/srt/managers/detokenizer_manager.py,sha256=-zuI2ZLyLD3wf21u8xWZm91JkcZZ57DwUFbFxnP2vFI,3462
41
+ sglang/srt/managers/io_struct.py,sha256=hdCHrBMoZ_4vc2l6mgbGGOW5b8STd4GSlQm-J_BCmw0,3716
42
+ sglang/srt/managers/tokenizer_manager.py,sha256=hgLGkZYWs5enyeJzDjht6hOjSjTEBJSvUrFHNmjszbQ,11900
43
+ sglang/srt/managers/router/infer_batch.py,sha256=CsNErbPt2XxoUxA3MkQeP4Tr3ipNK7eF0_K7IxdEpeY,19920
44
+ sglang/srt/managers/router/manager.py,sha256=iNmLd-0V0aTU-B3FH6YutmcKJVtuhRcTU28EqbU8PII,2683
45
+ sglang/srt/managers/router/model_rpc.py,sha256=8fDGBsqyo8lAFhr4_N6rB3D3we7zTfyjeV36IR1M7Ds,28325
46
+ sglang/srt/managers/router/model_runner.py,sha256=k7YMEvqU3GSIGpaBde2rCoGlWDpVjTOJgO-3xrsz0uI,16545
47
+ sglang/srt/managers/router/radix_cache.py,sha256=ZXSYyUb2e_xHwXDi_c9U6g2-0zmX3c_wX9UWs33F6u4,6685
48
+ sglang/srt/managers/router/scheduler.py,sha256=V-LAnVSzgD2ddy2eXW3jWURCeq9Lv7YxCGk4kHyytfM,2818
49
+ sglang/srt/models/commandr.py,sha256=GHcgyksXAnp4Nlnij1qULpFk0D1iA_lV3SzhLBD6Yus,13599
50
+ sglang/srt/models/dbrx.py,sha256=OK9xmb9f1m-nrO3yFB7bvy7u6ofyobaKU2fsa0oIteQ,14158
51
+ sglang/srt/models/dbrx_config.py,sha256=6EKMCAP1kS4pkQ9Ycr39PeEeTCPG4JhKRm2rtA4jS2s,11071
52
+ sglang/srt/models/gemma.py,sha256=Y4iLdmH4U_oySEk2-UrxqXsW3tsT_vnY0bJFywxdRyU,11630
53
+ sglang/srt/models/llama2.py,sha256=lAYVI5bE1oy_jY0tvSvRSI9wxfalidNtIZc8VXEsaNQ,11648
54
+ sglang/srt/models/llava.py,sha256=ocaWPocml74UoUHaAKE0oWF7Je5Dw_3fXw1c7b53zKk,14941
55
+ sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
56
+ sglang/srt/models/mixtral.py,sha256=jC7LR9NWjeQE9I28TfNeNGy65GdzcH3kxdWfIocpvho,13892
57
+ sglang/srt/models/qwen.py,sha256=eGkWsgYAhXVNkcS9iR8T3pk65UnIdTRjzSnRveYdigQ,9320
58
+ sglang/srt/models/qwen2.py,sha256=nXF5UJlgVFuY5TjDL2nqOy4_R1xn73EYpzHj2mL5odU,11344
59
+ sglang/srt/models/stablelm.py,sha256=d1pP5e-6CtOppWRzUtQar_0ULhGIHDZlXTh9lKMWbv4,10828
60
+ sglang/srt/models/yivl.py,sha256=Aoo_AlGu9PYMDvj6bQj9PX7Ui7-oIe9MArLe5N6FAno,4406
61
+ sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
62
+ sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
63
+ sglang/test/test_programs.py,sha256=-2AoddzOOmXoj3muVUKX6Uih63UNTm3MFg2fcNnsy7Y,11498
64
+ sglang/test/test_utils.py,sha256=9VFNGUMW0LBvmtDEHZ7ponakv5ZVF7B2Lg3xX353DXw,10083
65
+ sglang-0.1.15.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
+ sglang-0.1.15.dist-info/METADATA,sha256=9pKA1HIo0OFpZz-peDJCVekVjaZvqj55sK3n5Dchd4A,28727
67
+ sglang-0.1.15.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
68
+ sglang-0.1.15.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
69
+ sglang-0.1.15.dist-info/RECORD,,
@@ -1,64 +0,0 @@
1
- sglang/__init__.py,sha256=Nxa2M7XCh2-e6I7VrCg7OSBL6BvEW3gyRD14ZdykpRM,96
2
- sglang/api.py,sha256=0-Eh7c41hWKjPXrzzvLFdLAUVkvmPGJGLAsrG9evDTE,4576
3
- sglang/global_config.py,sha256=PAX7TWeFcq0HBzNUWyCONAOjqIokWqw8vT7I6sBSKTc,797
4
- sglang/launch_server.py,sha256=jKPZRDN5bUe8Wgz5eoDkqeePhmKa8DLD4DpXQLT5auo,294
5
- sglang/utils.py,sha256=2dUXLMPz9VhhzbIRQABmfZnVW5yz61F3UVtb6yKyevM,6237
6
- sglang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- sglang/backend/anthropic.py,sha256=GJ_T1Jg0VOtajgkgczPKt5sjuVYdbAiWd2jXlJRNRmg,1677
8
- sglang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
9
- sglang/backend/openai.py,sha256=nPdA88A5GISJTH88svJdww3qHWIHZcGG2NEn0XjMkLU,9578
10
- sglang/backend/runtime_endpoint.py,sha256=r7dTazselaudlFx8hqk-PQLYDHZhpbAKjyFF1zLuM_E,8022
11
- sglang/backend/vertexai.py,sha256=BLfWf_tEgoHY9srCufJM5PLe3tql2j0G6ia7cPykxCM,4713
12
- sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- sglang/lang/chat_template.py,sha256=MaCF0fvNky0nJC9OvmAeApeHYgM6Lr03mtRhF0lS31U,8000
14
- sglang/lang/compiler.py,sha256=wNn_UqV6Sxl22mv-PpzFUtRgiFFV-Y4OYpO4LshEoRM,7527
15
- sglang/lang/interpreter.py,sha256=ahRxuEJZ7b1Tts2Lr7wViWIqL-Z12T3anvgj0XdvMN8,26666
16
- sglang/lang/ir.py,sha256=8Ap-uEUz6K9eNQTOKtMixePuLwRFHFKcN0Z5Yn44nKk,13320
17
- sglang/lang/tracer.py,sha256=pFiSNzPSg0l7ZZIlGqJDLCmQALR-wyo2dFgJP73J4_Y,8260
18
- sglang/srt/backend_config.py,sha256=UIV6kIU2j-Xh0eoezn1aXcYIy0miftHsWFeAZwqpbGE,227
19
- sglang/srt/conversation.py,sha256=mTstD-SsXG5p_YhWQUPEWU-vzzDMF4RgQ7KmLkOOC7U,15496
20
- sglang/srt/hf_transformers_utils.py,sha256=soRyYLoCn7GxgxvonufGFkdFBA3eH5i3Izk_wi7p1l0,5285
21
- sglang/srt/memory_pool.py,sha256=BMoX2wvicj214mV-xvcr_Iv_Je0qs3zTuzXfQVpV8u4,3609
22
- sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
23
- sglang/srt/model_config.py,sha256=ned-odjmKBKBhVPo04FEpus9gJsUWxrFLrLxahLwSaw,1328
24
- sglang/srt/sampling_params.py,sha256=83Fp-4HWThC20TEh139XcIb_erBqfI7KZg5txdRBq7c,2896
25
- sglang/srt/server.py,sha256=WLXissKuXQI7JFb2V8D47QSF-PPHnW-JZCiQm4YW0xE,24070
26
- sglang/srt/server_args.py,sha256=bvbi-Rb_JudqztFFfRsuXBYtUsG9hq4zMFt7X97uDhA,8954
27
- sglang/srt/utils.py,sha256=IEqpmWx_hl4eXn_KoHM0EPXmxeN2wKkgK7H01_t0x5Q,7355
28
- sglang/srt/constrained/__init__.py,sha256=BPRNDJnWtzYJ13X4urRS5aE6wFuwAVNBA9qeWIHF8rE,1236
29
- sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
30
- sglang/srt/constrained/fsm_cache.py,sha256=20mEgtDXU1Zeoicl5KBQC3arkg-RhRWiYnchJc00m1g,901
31
- sglang/srt/constrained/jump_forward.py,sha256=Z-pz2Jnvk1CxSEZA65OVq0GryqdiKuOkhhc13v5T6Lo,2482
32
- sglang/srt/layers/context_flashattention_nopad.py,sha256=TVYQ6IjftWVXORmKpEROMqQxDOnF6n2g0G1Ci4LquYM,5209
33
- sglang/srt/layers/extend_attention.py,sha256=KGqQOA5mel9qScXMAQP_3Qyhp3BNbiQ7Y_6wi38Lxcs,12622
34
- sglang/srt/layers/logits_processor.py,sha256=MW2bpqSXyghODMojqeMSYWZhUHuAFPk_gUkyyLw9HkM,4827
35
- sglang/srt/layers/radix_attention.py,sha256=bqrb8H8K8RbKTr1PzVmpnUxRzMj0H-OWCi1JYZKuRDw,5597
36
- sglang/srt/layers/token_attention.py,sha256=waOjGsWZlvf6epFhYerRJlAaMwvDTy_Z3uzPaXsVQUU,8516
37
- sglang/srt/managers/detokenizer_manager.py,sha256=1lPNh_Pe6Pr0v-TzlCBBREbvz4uFWxyw31SmnEZh0s8,3292
38
- sglang/srt/managers/io_struct.py,sha256=nXJh3CrOvv9MdAfIFoo6SCXuNQTG3KswmRKkwF61Tek,3141
39
- sglang/srt/managers/openai_protocol.py,sha256=cttqg9iv3de8fhtCqDI4cYoPPZ_gULedMXstV1ok6WA,4563
40
- sglang/srt/managers/tokenizer_manager.py,sha256=hgsR9AMj6ic9S3-2WiELh7Hnp8Xnb_bzp7kpbjHwHtM,9733
41
- sglang/srt/managers/router/infer_batch.py,sha256=U-Ckt9ad1WaOQF_dW6Eo9AMIRQoOJQ-Pm-MMXnEmPP8,18399
42
- sglang/srt/managers/router/manager.py,sha256=TNYs0IrkZGkPvZJViwL7BMUg0VlvzeyTjDMjuvRoMDI,2529
43
- sglang/srt/managers/router/model_rpc.py,sha256=VlwLNpHZ92bnteQl4PhVKoAXM0C8Y4_2LBBVaffeu3g,26766
44
- sglang/srt/managers/router/model_runner.py,sha256=-wWv00EbB_UkkLpio6VKGBTagfzxLHfY-eKDDQ0rZQc,18292
45
- sglang/srt/managers/router/radix_cache.py,sha256=XGUF5mxQTSCzD7GW_ltNP2p5aelEKrMXzdezufJ7NCQ,6484
46
- sglang/srt/managers/router/scheduler.py,sha256=V-LAnVSzgD2ddy2eXW3jWURCeq9Lv7YxCGk4kHyytfM,2818
47
- sglang/srt/models/gemma.py,sha256=8XlfHPtVixPYYjz5F9T4DOAuoordWFStmyFFWGfny1k,11582
48
- sglang/srt/models/llama2.py,sha256=VL4iN8R3wyTNr0bDxxKdLNnVGEvdXF6iGvA768YeakA,11611
49
- sglang/srt/models/llava.py,sha256=42sn-AgI-6dMaTEU4aEbi4Js5epy0J3JVQoMooUOKt8,14922
50
- sglang/srt/models/mistral.py,sha256=XSn7fiZqspyWVTYrpVAacAnWdwAybBtyn9-Sh9AvMTM,254
51
- sglang/srt/models/mixtral.py,sha256=wqIwKfR90ih0gDiTZkFZcQD4PIYpZFD3CmzxRcuKIqw,13915
52
- sglang/srt/models/qwen.py,sha256=CvdbcF90aI1tJPSQ-3OMUaQGMuaxCGe0y29m5nU_Yj0,9225
53
- sglang/srt/models/qwen2.py,sha256=myPc0wvgf5ZzJyGhUGN49YjY-tMf4t8Jn_Imjg8D7Mk,11307
54
- sglang/srt/models/stablelm.py,sha256=vMZUNgwXKPGYr5FcdYHw5g3QifVu9owKqq51_-EBOY0,10817
55
- sglang/srt/models/yivl.py,sha256=Qvp-zQ93cOZGg3zVyaiQLhRsfXiLrQhxu9TyQP2FMm4,4414
56
- sglang/test/test_conversation.py,sha256=1zIrXcXiwEliPHgDAsqsQUA7JKzZ5fnQEU-U6L887FU,1592
57
- sglang/test/test_openai_protocol.py,sha256=eePzoskYR3PqfWczSVZvg8ja63qbT8TFUNEMyzDZpa8,1657
58
- sglang/test/test_programs.py,sha256=mrLhGuprwvx8ZJ-0Qe28E-iCw5Qv-9T0SAv1Jgo1AJw,11421
59
- sglang/test/test_utils.py,sha256=6PhTRi8UnR-BRNjit6aGu0M5lO0RebNQwEcDt712hE4,4830
60
- sglang-0.1.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
61
- sglang-0.1.14.dist-info/METADATA,sha256=C5N0VOYRHixdJcsf4dExIvP-Q099kYBMKs_dA4LBXSM,28809
62
- sglang-0.1.14.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
63
- sglang-0.1.14.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
64
- sglang-0.1.14.dist-info/RECORD,,