sglang 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sglang/srt/server.py CHANGED
@@ -65,9 +65,6 @@ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
65
65
  app = FastAPI()
66
66
  tokenizer_manager = None
67
67
 
68
- # Put some args for easily access
69
- global_server_args_dict = {}
70
-
71
68
 
72
69
  @app.get("/health")
73
70
  async def health() -> Response:
@@ -150,14 +147,6 @@ def available_models():
150
147
  return ModelList(data=model_cards)
151
148
 
152
149
 
153
- def _set_global_server_args(server_args: ServerArgs):
154
- global global_server_args_dict
155
- global_server_args_dict = {
156
- "disable_flashinfer": server_args.disable_flashinfer,
157
- "attention_reduce_in_fp32": server_args.attention_reduce_in_fp32,
158
- }
159
-
160
-
161
150
  def _set_torch_compile_config():
162
151
  # The following configurations are for torch compile optimizations
163
152
  import torch._dynamo.config
@@ -176,6 +165,8 @@ def launch_server(
176
165
  model_overide_args: Optional[dict] = None,
177
166
  pipe_finish_writer: Optional[mp.connection.Connection] = None,
178
167
  ):
168
+ server_args.check_server_args()
169
+
179
170
  """Launch an HTTP server."""
180
171
  global tokenizer_manager
181
172
 
@@ -211,8 +202,6 @@ def launch_server(
211
202
  if server_args.enable_torch_compile:
212
203
  _set_torch_compile_config()
213
204
 
214
- _set_global_server_args(server_args)
215
-
216
205
  # Allocate ports
217
206
  server_args.port, server_args.additional_ports = allocate_init_ports(
218
207
  server_args.port,
@@ -230,8 +219,6 @@ def launch_server(
230
219
 
231
220
  # Handle multi-node tensor parallelism
232
221
  if server_args.nnodes > 1:
233
- assert server_args.dp_size == 1, "Multi-node dp is not supported."
234
-
235
222
  if server_args.node_rank != 0:
236
223
  tp_size_local = server_args.tp_size // server_args.nnodes
237
224
  gpu_ids = [
sglang/srt/server_args.py CHANGED
@@ -28,6 +28,7 @@ class ServerArgs:
28
28
  mem_fraction_static: Optional[float] = None
29
29
  max_prefill_tokens: Optional[int] = None
30
30
  max_running_requests: Optional[int] = None
31
+ max_num_reqs: Optional[int] = None
31
32
  schedule_heuristic: str = "lpm"
32
33
  schedule_conservativeness: float = 1.0
33
34
 
@@ -51,13 +52,14 @@ class ServerArgs:
51
52
 
52
53
  # Optimization/debug options
53
54
  disable_flashinfer: bool = False
55
+ disable_flashinfer_sampling: bool = False
54
56
  disable_radix_cache: bool = False
55
57
  disable_regex_jump_forward: bool = False
56
58
  disable_cuda_graph: bool = False
57
59
  disable_disk_cache: bool = False
58
60
  enable_torch_compile: bool = False
59
- attention_reduce_in_fp32: bool = False
60
61
  enable_p2p_check: bool = False
62
+ attention_reduce_in_fp32: bool = False
61
63
  efficient_weight_load: bool = False
62
64
 
63
65
  # Distributed args
@@ -203,6 +205,12 @@ class ServerArgs:
203
205
  default=ServerArgs.max_running_requests,
204
206
  help="The maximum number of running requests.",
205
207
  )
208
+ parser.add_argument(
209
+ "--max-num-reqs",
210
+ type=int,
211
+ default=None,
212
+ help="The maximum number of requests to serve in the memory pool. If the model have a large context length, you may need to decrease this value to avoid out-of-memory errors.",
213
+ )
206
214
  parser.add_argument(
207
215
  "--schedule-heuristic",
208
216
  type=str,
@@ -296,7 +304,12 @@ class ServerArgs:
296
304
  parser.add_argument(
297
305
  "--disable-flashinfer",
298
306
  action="store_true",
299
- help="Disable flashinfer inference kernels.",
307
+ help="Disable flashinfer attention kernels.",
308
+ )
309
+ parser.add_argument(
310
+ "--disable-flashinfer-sampling",
311
+ action="store_true",
312
+ help="Disable flashinfer sampling kernels.",
300
313
  )
301
314
  parser.add_argument(
302
315
  "--disable-radix-cache",
@@ -324,15 +337,15 @@ class ServerArgs:
324
337
  help="Optimize the model with torch.compile, experimental feature.",
325
338
  )
326
339
  parser.add_argument(
327
- "--attention-reduce-in-fp32",
340
+ "--enable-p2p-check",
328
341
  action="store_true",
329
- help="Cast the intermidiate attention results to fp32 to avoid possible crashes related to fp16."
330
- "This only affects Triton attention kernels",
342
+ help="Enable P2P check for GPU access, otherwise the p2p access is allowed by default.",
331
343
  )
332
344
  parser.add_argument(
333
- "--enable-p2p-check",
345
+ "--attention-reduce-in-fp32",
334
346
  action="store_true",
335
- help="Enable P2P check for GPU access, otherwise the p2p access is allowed by default.",
347
+ help="Cast the intermidiate attention results to fp32 to avoid possible crashes related to fp16."
348
+ "This only affects Triton attention kernels",
336
349
  )
337
350
  parser.add_argument(
338
351
  "--efficient-weight-load",
@@ -357,6 +370,14 @@ class ServerArgs:
357
370
  f"disable_disk_cache={self.disable_disk_cache}, "
358
371
  )
359
372
 
373
+ def check_server_args(self):
374
+ assert (
375
+ self.tp_size % self.nnodes == 0
376
+ ), "tp_size must be divisible by number of nodes"
377
+ assert not (
378
+ self.dp_size > 1 and self.node_rank is not None
379
+ ), "multi-node data parallel is not supported"
380
+
360
381
 
361
382
  @dataclasses.dataclass
362
383
  class PortArgs:
@@ -118,7 +118,11 @@ def test_decode_json_regex():
118
118
  s += "}"
119
119
 
120
120
  ret = decode_json.run()
121
- js_obj = json.loads(ret["json_output"])
121
+ try:
122
+ js_obj = json.loads(ret["json_output"])
123
+ except json.decoder.JSONDecodeError:
124
+ print(ret["json_output"])
125
+ raise
122
126
  assert isinstance(js_obj["name"], str)
123
127
  assert isinstance(js_obj["population"], int)
124
128
 
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.4"
1
+ __version__ = "0.2.6"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -249,7 +249,7 @@ Requires-Dist: outlines >=0.0.44 ; extra == 'srt'
249
249
 
250
250
  --------------------------------------------------------------------------------
251
251
 
252
- | [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Paper**](https://arxiv.org/abs/2312.07104) |
252
+ | [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Paper**](https://arxiv.org/abs/2312.07104) | [**Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2ngly9muu-t37XiH87qvD~6rVBTkTEHw) |
253
253
 
254
254
  SGLang is a fast serving framework for large language models and vision language models.
255
255
  It makes your interaction with models faster and more controllable by co-designing the backend runtime and frontend language.
@@ -404,16 +404,17 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
404
404
  ### Run Llama 3.1 405B
405
405
 
406
406
  ```bash
407
- # 2 nodes run 405B fp16
407
+ ## Run 405B (fp8) on a single node
408
+ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
409
+
410
+ ## Run 405B (fp16) on two nodes
408
411
  # replace the `172.16.4.52:20000` with your own first node ip address and port, disable CUDA Graph temporarily
412
+
409
413
  # on the first node
410
414
  GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 --disable-cuda-graph --mem-frac 0.75
411
415
 
412
416
  # on the second
413
417
  GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 --disable-cuda-graph --mem-frac 0.75
414
-
415
- # single node run 405B fp8
416
- python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
417
418
  ```
418
419
 
419
420
  ### Supported Models
@@ -422,6 +423,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instr
422
423
  - Mistral / Mixtral
423
424
  - Gemma / Gemma 2
424
425
  - Qwen / Qwen 2 / Qwen 2 MoE
426
+ - DeepSeek / DeepSeek 2
425
427
  - LLaVA 1.5 / 1.6
426
428
  - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000`
427
429
  - `python -m sglang.launch_server --model-path liuhaotian/llava-v1.6-vicuna-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --chat-template vicuna_v1.1 --port 30000`
@@ -442,7 +444,7 @@ Instructions for supporting a new model are [here](https://github.com/sgl-projec
442
444
 
443
445
  ### Benchmark Performance
444
446
 
445
- - Benchmark a single static batch. Run the following command without launching a server. The arguments are the same as those for `launch_server.py`.
447
+ - Benchmark a single static batch by running the following command without launching a server. The arguments are the same as those for `launch_server.py`. This is not a dynamic batching server, so it may run out of memory for a batch size that can run successfully with a real server. This is because a real server will truncate the prefill into several batches/chunks, while this unit test does not do this.
446
448
  ```
447
449
  python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 32 --input-len 256 --output-len 32
448
450
  ```
@@ -7,11 +7,11 @@ sglang/global_config.py,sha256=CyhGL7PE-KlMcg7IHWykzImU1y4NQlpeIlh9lHA77uo,1749
7
7
  sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
8
8
  sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
9
9
  sglang/utils.py,sha256=arJuwOAEX445M2NL9SAOi6jBNu0-cfU04PLAr-hIH3U,8168
10
- sglang/version.py,sha256=SBl2EPFW-ltPvQ7vbVWItyAsz3aKYIpjO7vcfr84GkU,22
10
+ sglang/version.py,sha256=Oz5HbwHMyE87nmwV80AZzpkJPf-wBg7eDuJr_BXZkhU,22
11
11
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
13
13
  sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
14
- sglang/lang/interpreter.py,sha256=27j7H9p7TY4uUfF9f5E17FxK1xCNeNju4aut_PaWCrQ,29693
14
+ sglang/lang/interpreter.py,sha256=fbPrKF_SDpVPsiV2WbmlMfwRA7C9T9_IyVmGnpaXa0A,29687
15
15
  sglang/lang/ir.py,sha256=5VVK2JnbspdysrhcGgkmp_JlAprd2XqqRnS_GfP_XWc,16645
16
16
  sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
17
17
  sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,55 +19,56 @@ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtx
19
19
  sglang/lang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
20
20
  sglang/lang/backend/litellm.py,sha256=QsaLRh0KVyuaxRZGAvLOdCCSStIMs-V0XyMX0PR6y0w,2452
21
21
  sglang/lang/backend/openai.py,sha256=-ScfI2TFALB_FTYBur9ab0gNYxK1ogHkhdLxX19t6-Y,14808
22
- sglang/lang/backend/runtime_endpoint.py,sha256=TZ0NV89or5_3MIZZFnc1JXAAjnv7tCfeQmHDla8R0e0,9208
22
+ sglang/lang/backend/runtime_endpoint.py,sha256=6iW1S62KmYyQGiWsHJFhZidK01vlIE55IsYN2tP38WQ,9202
23
23
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
24
24
  sglang/srt/conversation.py,sha256=Il7JJuu4o42k2xdBWVfONNmstTsAM-4idX6AcEOnrXQ,15526
25
25
  sglang/srt/flush_cache.py,sha256=SJsbZnmDhH-gb9ch3hIwnI_nuwaOLlKvlXADyLBGENk,403
26
- sglang/srt/hf_transformers_utils.py,sha256=94mOI93B2xOmXKqfJfEoGxqHgwwlWNbPHgsA47AQJK8,11245
26
+ sglang/srt/hf_transformers_utils.py,sha256=RnyxC1_OmOf-QzdPBziqAUOIQXyRzrb4RNlqFB1ArEc,11354
27
27
  sglang/srt/memory_pool.py,sha256=FhJk5GtYortO3MJIsMMQ-o49agwDHVX1aEQH2LITq6c,3949
28
28
  sglang/srt/mm_utils.py,sha256=OptgAHDX-73Bk4jAdr2BOAJtiEXJNzPrMhaM-dy275c,8889
29
- sglang/srt/model_config.py,sha256=lZu1D-XLVMETHS6FBMoPn8Uowa9QFGe95d3SuWrr2q8,5282
30
- sglang/srt/sampling_params.py,sha256=OI11asr1Bd_E5soDjih614v4flgWxdMZU9HAF0aBafQ,3062
31
- sglang/srt/server.py,sha256=IUed6vnXCx7-xbrpEMAaJZ_aa4UubPAQ5pXvcv-xNoY,14607
32
- sglang/srt/server_args.py,sha256=aF6L35mEB-FU3BL_ooKuCIcOXLhYLxA9-MjpaOTQRCo,13189
29
+ sglang/srt/model_config.py,sha256=9VF7ET0CGKEY-zdiU7kGv8Cg7H_9Q1fmqtI3C0z22S0,5458
30
+ sglang/srt/sampling_params.py,sha256=WjJ_sOhbJVMKIBH8gJWQKhzeK5Ipu9XRNV7soWnLtak,3122
31
+ sglang/srt/server.py,sha256=IKSTgp6FJN6TE9anog47zh9GJYXoyMjEKBNXUZ89Cuk,14197
32
+ sglang/srt/server_args.py,sha256=RfWoipSUURmv5NqT4L_YF9qJ6gOkZ8omRUFC_5fmgts,14043
33
33
  sglang/srt/utils.py,sha256=HvKkGbut8sOxMpGIzYsJ9NEZJg48LOnxyGESaGZmANs,22385
34
34
  sglang/srt/constrained/__init__.py,sha256=5LB3_mDTMW6wcRkFA5J2Rd5HPHHEKRyiELhe4gtlBYM,1472
35
35
  sglang/srt/constrained/base_cache.py,sha256=QQjmFEiT8jlOskJoZobhrDl2TKB-B4b1LPQo9JQCP_w,1405
36
- sglang/srt/constrained/fsm_cache.py,sha256=P4qNDHHxpKpTnYL_8V1R6OFXlUwbM6ZcBdzddpcBgb4,1135
36
+ sglang/srt/constrained/fsm_cache.py,sha256=HlzFs9TXvMFmeZhTpXmJU3UNQ_Kix4Ir-SwpqXGhX8k,2061
37
37
  sglang/srt/constrained/jump_forward.py,sha256=s60jZ7Ue8zaodgQm7gDpN6pSedpvpUck_waJALUMj60,5615
38
38
  sglang/srt/layers/context_flashattention_nopad.py,sha256=7ps_9W_ia9zikL9HqsSUwWHyBVotywosE-dOiPtaGY8,4615
39
39
  sglang/srt/layers/extend_attention.py,sha256=aYAAL9HZJpaSASp-ulMvbmSmyMcqdYUsgVQC-Lbm7_U,12008
40
40
  sglang/srt/layers/fused_moe.py,sha256=uyrbCaIHioq3G00xQUrCo53hYDoHzk5rep3Eji3oQiQ,20258
41
41
  sglang/srt/layers/linear.py,sha256=qLwFkOiRAljzE7LkAkLRdcCdVMk-t7b56jEjwQAuYDM,33953
42
- sglang/srt/layers/logits_processor.py,sha256=KyRYANCiq9Cfu_VPjrIbSBAlqN_clcAgF3JrG9waU5k,9674
43
- sglang/srt/layers/radix_attention.py,sha256=A3J_wOlysjblFXHgehAqRHBQmpYAHLyUovyLFsrMJ7A,6386
44
- sglang/srt/layers/token_attention.py,sha256=EJ4gjbVLfshOZ_vr1iB-Eq8_B-4F26n_wPDj6e1Zrww,7386
42
+ sglang/srt/layers/logits_processor.py,sha256=VjP6T582K64X0mfyPUkhcIEZxsqJNu6ziqR3V82N_jE,10118
43
+ sglang/srt/layers/radix_attention.py,sha256=to6w0kIq6dtaOYJtqIZcqR3t1yf05qBH1LWnFlE-jEQ,6374
44
+ sglang/srt/layers/token_attention.py,sha256=uBtk3I6KeFjBRKRuQoG5BEZtVJsX4p7UOtJoej6ILZI,7411
45
45
  sglang/srt/layers/quantization/__init__.py,sha256=PQFzdPpul98DvywBA6YMBOnrMjtHE1LMlMpJ7FM8J3I,1971
46
46
  sglang/srt/layers/quantization/fp8.py,sha256=jaqgRFnHC--IL8iqB6Qygi-KXYPYBKKqt_j4Rk55_h4,24946
47
47
  sglang/srt/managers/detokenizer_manager.py,sha256=8rN2cdMr61LWy07lingEqLnNy0W5Rebdn14IsTQ9PCs,5049
48
- sglang/srt/managers/io_struct.py,sha256=VHy9wdZ3sfZA7fS6iq8lqbxdHL5WkBZNqxpacyZ8_8c,5483
49
- sglang/srt/managers/tokenizer_manager.py,sha256=SbivhFhZUR9HU9pLTe93MlYprAFAHzOU3KMBA2piQUk,19308
50
- sglang/srt/managers/controller/cuda_graph_runner.py,sha256=0aRqA1_34oJ557Zn8PjpJecex5bBWJdnCmBlcDVvYO0,8509
51
- sglang/srt/managers/controller/infer_batch.py,sha256=SKwCwhnZ_CNlG0mVCEc4X0e4HNjJFke-c8zdWP3TzjQ,34186
48
+ sglang/srt/managers/io_struct.py,sha256=WmBGrWR8R6X2zh2p1FkfPZtJzuGSlNW8cmIDm0EEqMA,5528
49
+ sglang/srt/managers/tokenizer_manager.py,sha256=2it1o4dKd7nFzfZflOw1cT03gFktqC2sVPICbBSR4c0,19594
50
+ sglang/srt/managers/controller/cuda_graph_runner.py,sha256=KEqX4Tc1yEWW52LzzFb4THb-guYIaft2pxxH8rWchSA,8808
51
+ sglang/srt/managers/controller/infer_batch.py,sha256=3DixMdSW0odH5I6p7h8_xtRlHx4q76ArR6YZW8Gkqzg,35888
52
52
  sglang/srt/managers/controller/manager_multi.py,sha256=DT8Y9RF5OyTxlrLEZYz4claNWir3UrVztdOZaVPiA6g,6077
53
53
  sglang/srt/managers/controller/manager_single.py,sha256=2xO_iWK6tWvc0B31nKbe2N3klxwQBJmPTnFhNjzhVSI,4566
54
- sglang/srt/managers/controller/model_runner.py,sha256=4-nBd9_MgIlamjEdLZDepBEykYNR8nL-65Sf1EYsnx0,14371
54
+ sglang/srt/managers/controller/model_runner.py,sha256=9o4xWnfI9-FJU6-S7WfEFlGMjWA2YesAhUKpuq8urhk,14854
55
55
  sglang/srt/managers/controller/radix_cache.py,sha256=tx8LEQpqLxipw9UUVj4D1YQLMMDmWnjDYv8oDlOl-co,8210
56
56
  sglang/srt/managers/controller/schedule_heuristic.py,sha256=SQAGzPS3aB_TPj7rnPBhewwyR6W1sVwW4D3zG3JUY00,2714
57
- sglang/srt/managers/controller/tp_worker.py,sha256=yjz-Xzl0zEy4QSU-EYneZH5vi3oHtBuXTtYe4VuDp2g,30517
57
+ sglang/srt/managers/controller/tp_worker.py,sha256=VYhO3xcJrcDQwonGLWSWKHq4T7BvFmb6-L5LxY3-fhE,30607
58
58
  sglang/srt/model_loader/model_loader.py,sha256=VS8VQL5ITN3akZ9eU_-uHWMan1axLMNG2_O12HzGysA,10132
59
59
  sglang/srt/model_loader/utils.py,sha256=I2PS5HIH5Cg-p7xKO_Cw_foK2vQ61xVc3zQv7CbeGEw,10120
60
60
  sglang/srt/models/chatglm.py,sha256=pH8g2Dj8qQLGPYpWVTb-IONfXsdfmpWi0-IEYNdSi4s,13296
61
61
  sglang/srt/models/commandr.py,sha256=hHsNQWi0X8rNL7_gpcoUxQxdhxtvx5_RVx8u6cLzqYQ,13606
62
62
  sglang/srt/models/dbrx.py,sha256=rRxOusGPu670ommeqXg62AllwB1apzE4yZoWc1fcr2M,14095
63
63
  sglang/srt/models/deepseek.py,sha256=YtoPmv4fKmiH_jsRMSab9Wxq3aOZga9pCPGnkCs3Vvs,15457
64
+ sglang/srt/models/deepseek_v2.py,sha256=1FqLe6tSENFpYgcEkmMr2-M4qksgne2glU3kZhSBB0Q,19527
64
65
  sglang/srt/models/gemma.py,sha256=DweoalfWYhLL-ZWLAO5gl4SCZflWmejVeDG3Vky_WNo,11719
65
66
  sglang/srt/models/gemma2.py,sha256=x3Dua-TVwRm5fJjo5UDekdoWqwt9xYbMuB-ogfXyiT8,15860
66
67
  sglang/srt/models/gpt_bigcode.py,sha256=XHO1naPdXfiKYQRQ6uZe1fN3PBDhKH3-bchsaaZvfE4,9637
67
68
  sglang/srt/models/grok.py,sha256=611zrlIchvFaVfztRdBY7z97oU3KB-anykbOZy1hK6M,27295
68
69
  sglang/srt/models/internlm2.py,sha256=8MNcwxU5Th9IxWa314HqqmbCRlPUFScnfneBDs0riIU,11659
69
70
  sglang/srt/models/llama2.py,sha256=OyAf_lun5aZEsT80WmrIYBF8QXTXRpW8sUlylr4AZIc,14204
70
- sglang/srt/models/llama_classification.py,sha256=foCPvNyP2bTZ0YcRBF-qkmBv-gT24lhLNCXP30Oq4VU,4370
71
+ sglang/srt/models/llama_classification.py,sha256=Z2dvZAdOwCnN-lGFZRcwU0rNreE1gKwLefeWzEH36Uw,4366
71
72
  sglang/srt/models/llava.py,sha256=vBI6EEeOG_9o23Shi9h8k58rxTOHZnSKMmPl3B3Q3uc,17924
72
73
  sglang/srt/models/llavavid.py,sha256=SrNQ-U2wekHvP_up-ZXRkCSros2NzheHpPfXHrp0YBU,13050
73
74
  sglang/srt/models/minicpm.py,sha256=9uE8D-NopAj-sfaKJ7d-0x-PuCTEevQPoHPZvZlwstA,13277
@@ -79,14 +80,14 @@ sglang/srt/models/qwen2.py,sha256=87Tt1Bti-Py3AGudcf7k5ni-OHhtDKPj_Hke44YGw4U,11
79
80
  sglang/srt/models/qwen2_moe.py,sha256=oHNoo45myV5kitkls2GWVzuGt1Q4pRHN2nLlXEltFI8,17581
80
81
  sglang/srt/models/stablelm.py,sha256=Z_XCDSHY_QMz3lZwwkZdIZjEOizZjLYJU9GDi8o08qQ,10802
81
82
  sglang/srt/models/yivl.py,sha256=55KPrQ-dVplI0hh2WCSugjc1luE0J2UAafjZxu_7Xuc,4367
82
- sglang/srt/openai_api/adapter.py,sha256=A0IG9ZKEMkkYCsLrVEspnVWzZHBUbc1vHv747LrF8ew,15920
83
- sglang/srt/openai_api/protocol.py,sha256=j7ifIR2SFQxTwaHAd9ksM096vfffcNltzTH4sg7H0RA,5739
83
+ sglang/srt/openai_api/adapter.py,sha256=DVZ2niAEOgE8GQdYnuvwjrGiFRkAu5YtOB-yxOlF_Eg,15868
84
+ sglang/srt/openai_api/protocol.py,sha256=jTb22jv5caB7k7Ub2ltYEbTtDheZjwwWAAUdvjiLTR0,5741
84
85
  sglang/test/test_conversation.py,sha256=gF_AyOxQgpPQBPnA57-kq-M0p_zFu-rBDMFgAq655Rw,1596
85
86
  sglang/test/test_openai_protocol.py,sha256=DVx3r6hrb8oRqbo5AYIleldxbqMBTtb-gtORM6t_Y1c,1661
86
- sglang/test/test_programs.py,sha256=uefeHUFKT2NJESOujj-CsnPXdw1aQQN2TzUbPCHJjGs,13654
87
+ sglang/test/test_programs.py,sha256=s4WGpTmYP4Yx5g8JYZpbkeF9RN5iUnlKdi8FGAZovTc,13756
87
88
  sglang/test/test_utils.py,sha256=kD_fQe3WroZ9Kc3NBRKPiZOFJ_JD2uEE9XIvPp6AD9Y,11048
88
- sglang-0.2.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
89
- sglang-0.2.4.dist-info/METADATA,sha256=Fr6JX7mNV5-CjsMH-r3sSwdEFOFXbJWqTjlXG0ox08s,31692
90
- sglang-0.2.4.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
91
- sglang-0.2.4.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
92
- sglang-0.2.4.dist-info/RECORD,,
89
+ sglang-0.2.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
90
+ sglang-0.2.6.dist-info/METADATA,sha256=g_G_XHbWCNSY9F6RieXV43svnNzq1wonwrArNxX0VNA,32095
91
+ sglang-0.2.6.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
92
+ sglang-0.2.6.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
93
+ sglang-0.2.6.dist-info/RECORD,,
File without changes