sglang 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. sglang/api.py +7 -1
  2. sglang/bench_latency.py +3 -2
  3. sglang/global_config.py +1 -1
  4. sglang/lang/backend/runtime_endpoint.py +60 -49
  5. sglang/lang/interpreter.py +4 -2
  6. sglang/lang/ir.py +13 -4
  7. sglang/srt/constrained/jump_forward.py +13 -2
  8. sglang/srt/layers/activation.py +0 -1
  9. sglang/srt/layers/extend_attention.py +3 -1
  10. sglang/srt/layers/fused_moe/__init__.py +1 -0
  11. sglang/srt/layers/{fused_moe.py → fused_moe/fused_moe.py} +165 -108
  12. sglang/srt/layers/fused_moe/layer.py +587 -0
  13. sglang/srt/layers/logits_processor.py +4 -4
  14. sglang/srt/layers/radix_attention.py +38 -14
  15. sglang/srt/managers/schedule_batch.py +9 -14
  16. sglang/srt/managers/tokenizer_manager.py +1 -1
  17. sglang/srt/managers/tp_worker.py +1 -7
  18. sglang/srt/model_executor/cuda_graph_runner.py +48 -17
  19. sglang/srt/model_executor/forward_batch_info.py +132 -58
  20. sglang/srt/model_executor/model_runner.py +61 -28
  21. sglang/srt/models/chatglm.py +2 -2
  22. sglang/srt/models/commandr.py +1 -1
  23. sglang/srt/models/deepseek.py +2 -2
  24. sglang/srt/models/deepseek_v2.py +7 -6
  25. sglang/srt/models/gemma.py +1 -1
  26. sglang/srt/models/gemma2.py +11 -5
  27. sglang/srt/models/grok.py +50 -396
  28. sglang/srt/models/minicpm.py +2 -2
  29. sglang/srt/models/mixtral.py +56 -254
  30. sglang/srt/models/mixtral_quant.py +1 -4
  31. sglang/srt/models/qwen.py +2 -2
  32. sglang/srt/models/qwen2.py +2 -2
  33. sglang/srt/models/qwen2_moe.py +2 -2
  34. sglang/srt/models/stablelm.py +1 -1
  35. sglang/srt/openai_api/adapter.py +32 -21
  36. sglang/srt/sampling_params.py +0 -4
  37. sglang/srt/server.py +23 -15
  38. sglang/srt/server_args.py +7 -1
  39. sglang/srt/utils.py +1 -2
  40. sglang/test/runners.py +18 -10
  41. sglang/test/test_programs.py +32 -5
  42. sglang/test/test_utils.py +5 -1
  43. sglang/version.py +1 -1
  44. {sglang-0.2.12.dist-info → sglang-0.2.13.dist-info}/METADATA +12 -4
  45. {sglang-0.2.12.dist-info → sglang-0.2.13.dist-info}/RECORD +48 -48
  46. {sglang-0.2.12.dist-info → sglang-0.2.13.dist-info}/WHEEL +1 -1
  47. sglang/srt/model_loader/model_loader.py +0 -292
  48. sglang/srt/model_loader/utils.py +0 -275
  49. {sglang-0.2.12.dist-info → sglang-0.2.13.dist-info}/LICENSE +0 -0
  50. {sglang-0.2.12.dist-info → sglang-0.2.13.dist-info}/top_level.txt +0 -0
sglang/srt/server.py CHANGED
@@ -288,6 +288,8 @@ def launch_server(
288
288
 
289
289
  # Launch processes
290
290
  tokenizer_manager = TokenizerManager(server_args, port_args, model_overide_args)
291
+ if server_args.chat_template:
292
+ load_chat_template_for_openai_api(tokenizer_manager, server_args.chat_template)
291
293
  pipe_controller_reader, pipe_controller_writer = mp.Pipe(duplex=False)
292
294
  pipe_detoken_reader, pipe_detoken_writer = mp.Pipe(duplex=False)
293
295
 
@@ -358,6 +360,7 @@ def _set_envs_and_config(server_args: ServerArgs):
358
360
  os.environ["NCCL_CUMEM_ENABLE"] = "0"
359
361
  os.environ["NCCL_NVLS_ENABLE"] = "0"
360
362
  os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1"
363
+ os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1"
361
364
 
362
365
  # Set ulimit
363
366
  set_ulimit()
@@ -375,16 +378,11 @@ def _set_envs_and_config(server_args: ServerArgs):
375
378
  # FIXME: remove this after https://github.com/triton-lang/triton/pull/4295 is used as a dependency.
376
379
  maybe_set_triton_cache_manager()
377
380
 
378
- # Set global chat template
379
- if server_args.chat_template:
380
- # TODO: replace this with huggingface transformers template
381
- load_chat_template_for_openai_api(server_args.chat_template)
382
-
383
381
  # Check flashinfer version
384
382
  if not server_args.disable_flashinfer:
385
383
  assert_pkg_version(
386
384
  "flashinfer",
387
- "0.1.4",
385
+ "0.1.5",
388
386
  "Please uninstall the old version and "
389
387
  "reinstall the latest version by following the instructions "
390
388
  "at https://docs.flashinfer.ai/installation.html.",
@@ -533,11 +531,18 @@ class Runtime:
533
531
  prompt: str,
534
532
  sampling_params: Optional[Dict] = None,
535
533
  ):
536
- json_data = {
537
- "text": prompt,
538
- "sampling_params": sampling_params,
539
- "stream": True,
540
- }
534
+ if self.server_args.skip_tokenizer_init:
535
+ json_data = {
536
+ "input_ids": prompt,
537
+ "sampling_params": sampling_params,
538
+ "stream": True,
539
+ }
540
+ else:
541
+ json_data = {
542
+ "text": prompt,
543
+ "sampling_params": sampling_params,
544
+ "stream": True,
545
+ }
541
546
  pos = 0
542
547
 
543
548
  timeout = aiohttp.ClientTimeout(total=3 * 3600)
@@ -549,10 +554,13 @@ class Runtime:
549
554
  if chunk == "data: [DONE]\n\n":
550
555
  break
551
556
  data = json.loads(chunk[5:].strip("\n"))
552
- cur = data["text"][pos:]
553
- if cur:
554
- yield cur
555
- pos += len(cur)
557
+ if hasattr(data, "text"):
558
+ cur = data["text"][pos:]
559
+ if cur:
560
+ yield cur
561
+ pos += len(cur)
562
+ else:
563
+ yield data
556
564
 
557
565
  add_request = async_generate
558
566
 
sglang/srt/server_args.py CHANGED
@@ -17,9 +17,12 @@ limitations under the License.
17
17
 
18
18
  import argparse
19
19
  import dataclasses
20
+ import logging
20
21
  import random
21
22
  from typing import List, Optional, Union
22
23
 
24
+ logger = logging.getLogger(__name__)
25
+
23
26
 
24
27
  @dataclasses.dataclass
25
28
  class ServerArgs:
@@ -46,7 +49,7 @@ class ServerArgs:
46
49
  max_running_requests: Optional[int] = None
47
50
  max_num_reqs: Optional[int] = None
48
51
  max_total_tokens: Optional[int] = None
49
- chunked_prefill_size: int = -1
52
+ chunked_prefill_size: int = 8192
50
53
  max_prefill_tokens: int = 16384
51
54
  schedule_policy: str = "lpm"
52
55
  schedule_conservativeness: float = 1.0
@@ -446,6 +449,9 @@ class ServerArgs:
446
449
  assert not (
447
450
  self.dp_size > 1 and self.node_rank is not None
448
451
  ), "multi-node data parallel is not supported"
452
+ if "gemma-2" in self.model_path.lower():
453
+ logger.info(f"When using sliding window in gemma-2, turn on flashinfer.")
454
+ self.disable_flashinfer = False
449
455
 
450
456
 
451
457
  @dataclasses.dataclass
sglang/srt/utils.py CHANGED
@@ -35,7 +35,6 @@ import torch
35
35
  import torch.distributed as dist
36
36
  from fastapi.responses import JSONResponse
37
37
  from packaging import version as pkg_version
38
- from starlette.middleware.base import BaseHTTPMiddleware
39
38
  from torch.nn.parameter import Parameter
40
39
  from triton.runtime.cache import (
41
40
  FileCacheManager,
@@ -644,7 +643,7 @@ def set_ulimit(target_soft_limit=65535):
644
643
  logger.warn(f"Fail to set RLIMIT_NOFILE: {e}")
645
644
 
646
645
 
647
- def is_llama3_405b_fp8(model_config):
646
+ def is_llama3_405b_fp8_head_16(model_config):
648
647
  """Return whether the model is meta-llama/Meta-Llama-3.1-405B-FP8 with 16 kv heads."""
649
648
  if (
650
649
  model_config.hf_config.architectures[0] == "LlamaForCausalLM"
sglang/test/runners.py CHANGED
@@ -15,6 +15,7 @@ limitations under the License.
15
15
 
16
16
  import json
17
17
  import multiprocessing
18
+ import os
18
19
  from dataclasses import dataclass
19
20
  from typing import List, Union
20
21
 
@@ -31,8 +32,14 @@ DEFAULT_PROMPTS = [
31
32
  "The capital of the United Kindom is",
32
33
  "Today is a sunny day and I like",
33
34
  "AI is a field of computer science focused on",
35
+ "Apple is red. Banana is Yellow. " * 800 + "Apple is",
34
36
  ]
35
37
 
38
+ dirpath = os.path.dirname(__file__)
39
+ with open(os.path.join(dirpath, "long_prompt.txt"), "r") as f:
40
+ long_prompt = f.read()
41
+ DEFAULT_PROMPTS.append(long_prompt)
42
+
36
43
  NUM_TOP_LOGPROBS = 5
37
44
 
38
45
 
@@ -125,16 +132,14 @@ class HFRunner:
125
132
  )
126
133
 
127
134
  logits = self.model.forward(input_ids).logits[0]
128
- logprobs = F.log_softmax(
129
- logits, dim=-1, dtype=torch.float32
130
- ).tolist()
131
- # index_of_max = (lambda nums: nums.index(max(nums)))(logprobs[-1])
132
- # print("index", index_of_max)
133
- logprobs = [
134
- sorted(token_logprobs, reverse=True)[:NUM_TOP_LOGPROBS]
135
- for token_logprobs in logprobs
136
- ]
137
- prefill_logprobs.append(logprobs)
135
+ logprobs = F.log_softmax(logits, dim=-1, dtype=torch.float32)
136
+ logprobs, top_indices = torch.topk(
137
+ logprobs, k=NUM_TOP_LOGPROBS, dim=-1
138
+ )
139
+ # print("index", top_indices)
140
+ prefill_logprobs.append(logprobs.tolist())
141
+ del logits
142
+ del logprobs
138
143
 
139
144
  out_queue.put(
140
145
  ModelOutput(
@@ -174,6 +179,7 @@ class SRTRunner:
174
179
  tp_size=1,
175
180
  torch_dtype=torch.float16,
176
181
  is_generation_model=None,
182
+ port=5157,
177
183
  ):
178
184
  self.is_generation_model = (
179
185
  is_generation_model(model_path)
@@ -184,6 +190,8 @@ class SRTRunner:
184
190
  model_path=model_path,
185
191
  tp_size=tp_size,
186
192
  dtype=get_dtype_str(torch_dtype),
193
+ port=port,
194
+ mem_fraction_static=0.7,
187
195
  )
188
196
 
189
197
  def forward(
@@ -103,16 +103,19 @@ def test_decode_int():
103
103
  def test_decode_json_regex():
104
104
  @sgl.function
105
105
  def decode_json(s):
106
- from sglang.lang.ir import REGEX_FLOAT, REGEX_INT, REGEX_STRING
106
+ from sglang.lang.ir import REGEX_FLOAT, REGEX_INT, REGEX_STR
107
107
 
108
108
  s += "Generate a JSON object to describe the basic city information of Paris.\n"
109
+ s += "Here are the JSON object:\n"
110
+
111
+ # NOTE: we recommend using dtype gen or whole regex string to control the output
109
112
 
110
113
  with s.var_scope("json_output"):
111
114
  s += "{\n"
112
- s += ' "name": ' + sgl.gen(regex=REGEX_STRING + ",") + "\n"
113
- s += ' "population": ' + sgl.gen(regex=REGEX_INT + ",") + "\n"
114
- s += ' "area": ' + sgl.gen(regex=REGEX_INT + ",") + "\n"
115
- s += ' "latitude": ' + sgl.gen(regex=REGEX_FLOAT) + "\n"
115
+ s += ' "name": ' + sgl.gen(regex=REGEX_STR) + ",\n"
116
+ s += ' "population": ' + sgl.gen(regex=REGEX_INT, stop=[" ", "\n"]) + ",\n"
117
+ s += ' "area": ' + sgl.gen(regex=REGEX_INT, stop=[" ", "\n"]) + ",\n"
118
+ s += ' "latitude": ' + sgl.gen(regex=REGEX_FLOAT, stop=[" ", "\n"]) + "\n"
116
119
  s += "}"
117
120
 
118
121
  ret = decode_json.run(temperature=0.0)
@@ -359,6 +362,30 @@ def test_regex():
359
362
  assert re.match(regex, answer)
360
363
 
361
364
 
365
+ def test_dtype_gen():
366
+ @sgl.function
367
+ def dtype_gen(s):
368
+ s += "Q: What is the full name of DNS?\n"
369
+ s += "A: The full nams is " + sgl.gen("str_res", dtype=str, stop="\n") + "\n"
370
+ s += "Q: Which year was DNS invented?\n"
371
+ s += "A: " + sgl.gen("int_res", dtype=int) + "\n"
372
+ s += "Q: What is the value of pi?\n"
373
+ s += "A: " + sgl.gen("float_res", dtype=float) + "\n"
374
+ s += "Q: Is the sky blue?\n"
375
+ s += "A: " + sgl.gen("bool_res", dtype=bool) + "\n"
376
+
377
+ state = dtype_gen.run()
378
+
379
+ try:
380
+ state["int_res"] = int(state["int_res"])
381
+ state["float_res"] = float(state["float_res"])
382
+ state["bool_res"] = bool(state["bool_res"])
383
+ # assert state["str_res"].startswith('"') and state["str_res"].endswith('"')
384
+ except ValueError:
385
+ print(state)
386
+ raise
387
+
388
+
362
389
  def test_completion_speculative():
363
390
  @sgl.function(num_api_spec_tokens=64)
364
391
  def gen_character_spec(s):
sglang/test/test_utils.py CHANGED
@@ -21,7 +21,11 @@ from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
21
21
  from sglang.utils import get_exception_traceback
22
22
 
23
23
  DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
24
- DEFAULT_URL_FOR_TEST = "http://127.0.0.1:8157"
24
+ DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
25
+ DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
26
+ DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
27
+ DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
28
+ DEFAULT_URL_FOR_E2E_TEST = "http://127.0.0.1:9157"
25
29
 
26
30
 
27
31
  def call_generate_lightllm(prompt, temperature, max_tokens, stop=None, url=None):
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.12"
1
+ __version__ = "0.2.13"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.2.12
3
+ Version: 0.2.13
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -308,7 +308,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
308
308
  ### Method 2: From source
309
309
  ```
310
310
  # Use the last release branch
311
- git clone -b v0.2.12 https://github.com/sgl-project/sglang.git
311
+ git clone -b v0.2.13 https://github.com/sgl-project/sglang.git
312
312
  cd sglang
313
313
 
314
314
  pip install --upgrade pip
@@ -329,11 +329,19 @@ docker run --gpus all \
329
329
  --env "HF_TOKEN=<secret>" \
330
330
  --ipc=host \
331
331
  lmsysorg/sglang:latest \
332
- python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --host 0.0.0.0 --port 30000
332
+ python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0 --port 30000
333
333
  ```
334
334
 
335
+ ### Method 4: Using docker compose
336
+
337
+ > This method is recommended if you plan to serve it as a service.
338
+ > A better approach is to use the [k8s-sglang-service.yaml](./docker/k8s-sglang-service.yaml).
339
+
340
+ 1. Copy the [compose.yml](./docker/compose.yaml) to your local machine
341
+ 2. Execute the command `docker compose up -d` in your terminal.
342
+
335
343
  ### Common Notes
336
- - If you cannot install FlashInfer, check out its [installation](https://docs.flashinfer.ai/installation.html#) page. If you still cannot install it, you can use the slower Triton kernels by adding `--disable-flashinfer` when launching the server.
344
+ - [FlashInfer](https://github.com/flashinfer-ai/flashinfer) is currently one of the dependencies that must be installed for SGLang. If you are using NVIDIA GPU devices below sm80, such as T4, you can't use SGLang for the time being. We expect to resolve this issue soon, so please stay tuned. If you encounter any FlashInfer-related issues on sm80+ devices (e.g., A100, L40S, H100), consider using Triton's kernel by `--disable-flashinfer --disable-flashinfer-sampling` and raise a issue.
337
345
  - If you only need to use the OpenAI backend, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
338
346
 
339
347
  ## Backend: SGLang Runtime (SRT)
@@ -1,91 +1,91 @@
1
1
  sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
2
- sglang/api.py,sha256=gAY9JhqWXjrYoWnMvR-iiuuY1YSN94We-lc1LH0z3cw,6030
3
- sglang/bench_latency.py,sha256=E-cfuZSjBGonzKL0LgB0zAqMWpiP3qozB_Ht9dH8qvc,16207
2
+ sglang/api.py,sha256=sRuA17JzayE9SFOhaZFqKFJDb_aRpNlcyKiMA5BzsDk,6258
3
+ sglang/bench_latency.py,sha256=UM5noYvFb6hc7wS82WAFeWTx3u83vkg9pfhyW0KdvY4,16234
4
4
  sglang/bench_serving.py,sha256=sS-fawAyzngrOVbPE3N1FBxPojoPd9vj9XQDsWpIYTQ,35798
5
5
  sglang/check_env.py,sha256=oU8VmjjPK2SviRhr41cF1953soBu-eTT5E0Hf04zMzo,4974
6
- sglang/global_config.py,sha256=9JxaFkBKSgep6BVeEl_kx9tuW9PqdijYELyBGTryl6o,1704
6
+ sglang/global_config.py,sha256=nwOjUflwqLQySPUMvk8Hk63TIS6mknh_ODSW3CZ1rJw,1704
7
7
  sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
8
8
  sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
9
9
  sglang/utils.py,sha256=zFYGkC4vOUR3sTv1TmQXcsOLZDtDBR3wnjqnDp3xMIs,8352
10
- sglang/version.py,sha256=X4KG3FscE5AhbGbcdDDgdDC550CVpxNMwdNLcx6EQ7M,23
10
+ sglang/version.py,sha256=C0atO05M0rfDTTHt02NxNa4jt0eSqXM4AxShEhb2epA,23
11
11
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
13
13
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
14
14
  sglang/lang/compiler.py,sha256=1Tc6MQs4RsIfrNmmO7PMSUEHIqvNqKOp_HxaYqonwFE,7533
15
- sglang/lang/interpreter.py,sha256=3RIeSGdKlKTq2Ixg_Tyo0fGEDTvBKS2f9FaJYODBHzA,30102
16
- sglang/lang/ir.py,sha256=Ow6jXDPIeRd1piAuYjvgyFxfro1G2_-1QwUFfq4Aihs,16842
15
+ sglang/lang/interpreter.py,sha256=8QiLvjUgVJrtzIjS9lCUR01k7BeZWZQsmRAwLMz-cmA,30194
16
+ sglang/lang/ir.py,sha256=WOZdRbONMhhSeD75bvUeQRv4gObxVMtkvzmalRrVdkM,17261
17
17
  sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
18
18
  sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
20
20
  sglang/lang/backend/base_backend.py,sha256=Q5HdiDtyBewQeoYH0kDtBRVL8KFiEPNq9dw7XmauHQ8,1985
21
21
  sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
22
22
  sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
23
- sglang/lang/backend/runtime_endpoint.py,sha256=AaBc5yczchX7mkwiKDMyjLjBkJsh2Lubrfd9lvCOlDo,9544
23
+ sglang/lang/backend/runtime_endpoint.py,sha256=SDlp03EuQEK1eGK4_IaFySWgxlp4wCs3EPewZ6O640E,9549
24
24
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
25
25
  sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
26
26
  sglang/srt/hf_transformers_utils.py,sha256=Tf_RplcW7llVXsigRvSGqmeAUxBeAL8rPCkzuqWfZ8U,11925
27
27
  sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
28
28
  sglang/srt/model_config.py,sha256=k4OfRV-szWkFaJMIC40JoJGJ75AfYQ2hf4M1dS1aQ-o,6366
29
- sglang/srt/sampling_params.py,sha256=5V1MhhEvyCWZrCF5VmQxcKNuKVoC4LynY-q4Bx3P3mo,4876
30
- sglang/srt/server.py,sha256=FvczPB9ojDVLIdC2kic0RLAmOTt0WZrql_BvYzwbeRY,18495
31
- sglang/srt/server_args.py,sha256=GLuJkgwv-Osmf3IqCvZqfdqIBJjcHkdtoNT0_zq75Kc,16849
32
- sglang/srt/utils.py,sha256=ReJqGMdquK_cfve269yjpWWQaozTVoEHSLG5P3CKvAg,24102
29
+ sglang/srt/sampling_params.py,sha256=CIrM-OLAjUJ8oSQfhXetjv50BAseexWYOV5Wr6LXYeY,4739
30
+ sglang/srt/server.py,sha256=gSGC6MJLLXsuusizKzTxJaaWiaQjsa-Zm5hxV2fYHb8,18845
31
+ sglang/srt/server_args.py,sha256=YoTVFzt65w1vjypyh0a4FV7BNreVGS49d8uf6TPrM_w,17083
32
+ sglang/srt/utils.py,sha256=MIDD53BT4ukaHO-zmEQZD5l7Xco_gefO0co4FJsMsn4,24053
33
33
  sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
34
34
  sglang/srt/constrained/base_tool_cache.py,sha256=5sazBMHHDpHMoqOjuY6itCxwTmIFCflIWEDXMtmrPVs,2006
35
35
  sglang/srt/constrained/fsm_cache.py,sha256=QTrBFoZCp2FeigtIakz2MCgQLtvQFXgl2lDPQaGtu9M,2784
36
- sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
37
- sglang/srt/layers/activation.py,sha256=MXkuGi5caKHEwqUegoEfOk2Omab8OLrxP-sjPj2TVzU,1197
36
+ sglang/srt/constrained/jump_forward.py,sha256=9_HxmXtWjr5S6a5e0cBimbY3ZhiLiJC74V6jIqDXfuo,6575
37
+ sglang/srt/layers/activation.py,sha256=j2zQmY1snfB5DqrYr5KqRUEkMXQn6LVnkeur60FfMCU,1175
38
38
  sglang/srt/layers/decode_attention.py,sha256=Vgxd2rWzSZkNFp0bjZRAUAusG4bz6iy3D0CULnN-cdk,8904
39
- sglang/srt/layers/extend_attention.py,sha256=_LOgzSr-1c2UweHZXADjWHbXOmd2JPm-tUMb1vwTTZI,14197
40
- sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
39
+ sglang/srt/layers/extend_attention.py,sha256=h4O0R7PJpAVKS3Vx_583zhrFPD0vv6XqzvOcHBI3zoc,14268
41
40
  sglang/srt/layers/layernorm.py,sha256=RzN4eESN9S8mw32r2Nxarq7wKFdeG1yhxPmehUMx79s,2073
42
- sglang/srt/layers/logits_processor.py,sha256=iewPk7VR4jdJeLH6NAO_XqwqM4RhIHdWJzj7-qPRYIw,11362
41
+ sglang/srt/layers/logits_processor.py,sha256=wBgo6IVxWgV4vYRQesnuE2qA8ynB2oFtv0COZSAMIeA,11374
43
42
  sglang/srt/layers/pooler.py,sha256=qNMG3Ycvt2yf9mk1Lcs-2K7oPeCuVeDYoHAxkMu9b_Q,1610
44
43
  sglang/srt/layers/prefill_attention.py,sha256=y7vdcuX8lMa9Qf_jQYNDvQO9PVCBQSs3hb5LV2DFgpU,5256
45
- sglang/srt/layers/radix_attention.py,sha256=LpfTizXKXm1oS5oUfh6aowZceHUHqnquvx-GpfyYjdk,7508
44
+ sglang/srt/layers/radix_attention.py,sha256=EA7rc73ZGnle2tQlslF9Ri_VEY07jD0e0cPiKcsqOyA,8473
45
+ sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
46
+ sglang/srt/layers/fused_moe/fused_moe.py,sha256=1WM2cObWXcFWtqh_utGJFPnrT344rORwuQ9hJDaH2s0,23104
47
+ sglang/srt/layers/fused_moe/layer.py,sha256=ByNlMmmXsckcsjI12rhlg_IH0KvO6zWJoOYuk7i4ogY,20947
46
48
  sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
47
49
  sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
48
50
  sglang/srt/managers/detokenizer_manager.py,sha256=OXufjdCt2ebt-S7MDndjY9Ew16rP4fhualGgj6YEKp0,6295
49
51
  sglang/srt/managers/io_struct.py,sha256=Xvfl6DNZ2Ek2S4qlRzpVo3foc-aC-1-N-5odcJ4gdq4,9446
50
52
  sglang/srt/managers/policy_scheduler.py,sha256=KRFaZwjCAkPQDX3W8lbzrxYqgOe7LKFDj2BPlcmlnR8,8379
51
- sglang/srt/managers/schedule_batch.py,sha256=iZ2OwdEn5As7cVGAoe0x97cMCPSS6q_SI_iG79mF8LQ,31111
52
- sglang/srt/managers/tokenizer_manager.py,sha256=TIIo4YlfdM10LE4JVqv2cO2uDJJtKXDagwzfjMCDU5Q,24858
53
- sglang/srt/managers/tp_worker.py,sha256=qOx99QL6BIW0aOz7SknWqgflLeNeFYpJsGq0ZsYmYFY,32805
53
+ sglang/srt/managers/schedule_batch.py,sha256=L9kBQZBfsy-2Arzkx4ZjKjNL-zN1BErnv9LqRi3CQNI,30657
54
+ sglang/srt/managers/tokenizer_manager.py,sha256=4cf7JyuMGvLVp6Dv8pWG6c9285O6zuD2Ja0eEePUCNg,24857
55
+ sglang/srt/managers/tp_worker.py,sha256=TPtWHcLM-bh7GGdA7-8c-zdNLFeLxWNnl3iqODKwYWw,32583
54
56
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
55
57
  sglang/srt/mem_cache/chunk_cache.py,sha256=CjZZYlqQzq7mYOiBMLWA5XNb6HIyh5lIMdY-K0OUZEc,2368
56
58
  sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
57
59
  sglang/srt/mem_cache/memory_pool.py,sha256=eXDCstd5Mvu1CbHt1y9z27Eq60QYwW45FsKbZspu4yw,5310
58
60
  sglang/srt/mem_cache/radix_cache.py,sha256=0AVr1BKKDOtTyybUkwxrz6PT8khDx-DpzgN5MgL27IE,10088
59
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=xQgTTtoMkvYJhYyRJHxPdybmPtfvcODqPLW9btUFt60,10003
60
- sglang/srt/model_executor/forward_batch_info.py,sha256=B3flTlRNLMa7Km7use1O0Z2YL3-a6rw1BodNKjKV51g,11049
61
- sglang/srt/model_executor/model_runner.py,sha256=ZlFgqBNuqgWpa-NrjkfTT-_amtea33H9M1tBl-MT_nk,16977
62
- sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
63
- sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
64
- sglang/srt/models/chatglm.py,sha256=7bHU2AFoppINDZm0EdxgtAJe7rwr9OPkhOCfq2qNrIA,13862
65
- sglang/srt/models/commandr.py,sha256=5BEtIS2uUQJANkkY-6ZeDqlrpUK5yXVYHiztU3vsTKY,14172
61
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=xvhFptAJKonqnEjeVYaIiKwhEM4NzbSeF9YvC6YqVc8,11364
62
+ sglang/srt/model_executor/forward_batch_info.py,sha256=tcWwiKBU2W2USg19ASRlx-9utvYL6PTO0NPNyK5frJk,14272
63
+ sglang/srt/model_executor/model_runner.py,sha256=QpNzsV1WiH4_1T0klmM6GjivWI-fKLATC5E67C1LSYk,18158
64
+ sglang/srt/models/chatglm.py,sha256=aoEgA2nflcOCIKtZojhUoboqxSP6i5IrrvuDOpzNPnE,13844
65
+ sglang/srt/models/commandr.py,sha256=2rAXRZRb4PkJZ4NWEqP_rIgsjxbdZyHpuoMOarqTWzQ,14163
66
66
  sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
67
- sglang/srt/models/deepseek.py,sha256=E5W4nkH-Ne449rAIwQZgz-FAH2Qqp2r1vNfboyk5wEg,16024
68
- sglang/srt/models/deepseek_v2.py,sha256=NMcckZb48kVUwAmDA2l8wO19T6DNkJOkKAhHa6utBZM,26968
69
- sglang/srt/models/gemma.py,sha256=ilfN_NOcz7hpwEJ2y7NW3fBFmFO7YfjhdFDbfzl2qww,12285
70
- sglang/srt/models/gemma2.py,sha256=ybQOXAPofw_Pv3mBer7dTpH4SlZt6Gf2I462Q3lOIww,16359
67
+ sglang/srt/models/deepseek.py,sha256=7UJgde1EV9ey6d-CKRcEyTKh1_WhZdatpZiltIuqpik,16006
68
+ sglang/srt/models/deepseek_v2.py,sha256=uk--2a1e83H6U9wTx_wd3UvkS3VrSRSkjCOjky0R0uo,27004
69
+ sglang/srt/models/gemma.py,sha256=3orOUznoGt2NxVKO5c8AjD_ue0gWqwb7LnKbhlcS5Vg,12276
70
+ sglang/srt/models/gemma2.py,sha256=IUXKjwO11dpnhevmapS9jz_qPZvzSKrHhYHIXnBR9AU,16475
71
71
  sglang/srt/models/gpt_bigcode.py,sha256=OKk9UP67as3T5bePlTRGHTCD-1wqaUEk92AowXPm6dg,10204
72
- sglang/srt/models/grok.py,sha256=M9rtdXslqYBle5VyZqFVHiJUXq_q_aHbza63xa03zqI,27861
72
+ sglang/srt/models/grok.py,sha256=TrYcCQZhV7f5SUntU4Lo4ZDC8uBi0Vg0SWtyYiZxdqs,14530
73
73
  sglang/srt/models/internlm2.py,sha256=6j7JH0p3yib8GZDH8Cmrs-pgwfH3eOlAK6V3Cq64O7w,12202
74
74
  sglang/srt/models/llama2.py,sha256=HmzE1I8OnesmrdPY5b56l7okhWH_lRvWAg16K-UwKHg,14300
75
75
  sglang/srt/models/llama_classification.py,sha256=Dvzy3PfETiJtnKFOk8qDDLUoZECf_cpSrNeA60PaDo4,4932
76
76
  sglang/srt/models/llama_embedding.py,sha256=e2lpZ6GHKrHT1rr7_5gHGoCpfqdOBMusZCz34n62lec,3542
77
77
  sglang/srt/models/llava.py,sha256=-ysi192vpBDxNaMS8qaLOhC34lXQyRtbG_0niVaceSo,18436
78
78
  sglang/srt/models/llavavid.py,sha256=MX7YpqYh5J4BoOnV7vVAIfoOlBFQXYpp8Kpe7WK0ejk,13562
79
- sglang/srt/models/minicpm.py,sha256=ea_OyiwVTo6Tg9jNRAwqxETnA6FFeAqlIbiUS-xViEI,13843
79
+ sglang/srt/models/minicpm.py,sha256=ioqCsTCE_oF8xqGF5fm5cK9dclK5Y0EQ1UJfyteIDDo,13825
80
80
  sglang/srt/models/mistral.py,sha256=jlrWBVNXbAUziAaIdHAjFcOJnKtn9Bl8rBd65ypJM-I,819
81
- sglang/srt/models/mixtral.py,sha256=raSLbp6AfWg5_u-f-lYeRejE9koAjbHt8iIHXd3nURM,21397
82
- sglang/srt/models/mixtral_quant.py,sha256=xYeeatZ9OfwCTas_KbH9nl6lnUT4YqSY7NAxpgLp5LE,14222
83
- sglang/srt/models/qwen.py,sha256=43ea6gn4wHzAaI3JTDLtl08aEm0vIqgzbVH9M8oeuY0,10006
84
- sglang/srt/models/qwen2.py,sha256=Hyhks2r4KHpKeb9iHZpnvEVc5klmnrPwcLohqg8j1kw,12284
85
- sglang/srt/models/qwen2_moe.py,sha256=pTfBivDyzdbcP22_7PdmdPqgx34esH8J98r-EgFA9Uw,17747
86
- sglang/srt/models/stablelm.py,sha256=yPrdzPEoUD2s_Q3RgOq7BBC7z-UtEaACzabqbDRs2tA,11368
81
+ sglang/srt/models/mixtral.py,sha256=cZK-1kGXQC8ZC0tFNmbAoqWlyrrvv5omumpDdEwzzss,13623
82
+ sglang/srt/models/mixtral_quant.py,sha256=wMACJq78OTWj7HlqPDRNEh8cjrVAjKqJEsOG3CO5xow,14072
83
+ sglang/srt/models/qwen.py,sha256=ssdSgVuhT1Ei0JPa0xwqzrwwPNwkCHRJA4q70hK-Z7E,9988
84
+ sglang/srt/models/qwen2.py,sha256=eeah76x-OYZiy6Bb1SDNVk8m_xXHYuh-P58GXjEFZ4w,12266
85
+ sglang/srt/models/qwen2_moe.py,sha256=-Ijn_H2IGCjQAYA-9teS9IXKTPMBWSkkPp0Nox6MCuQ,17729
86
+ sglang/srt/models/stablelm.py,sha256=30ngpc0Xq3VxzXJlf6svP1oax8Q3krMJkxM8PVKtZWU,11359
87
87
  sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
88
- sglang/srt/openai_api/adapter.py,sha256=fgUAPAcQ_mUJszbpsI_cgv2vzOAS7AKKAJPi2B91aw4,42490
88
+ sglang/srt/openai_api/adapter.py,sha256=C53adcpLGfIUm_B259iWnOCQ3B3VjJbqFseqP8Vo-t8,43064
89
89
  sglang/srt/openai_api/protocol.py,sha256=knf-nds0XO2LYg-hPM-Ho1f1y2XZIV_Gvg3xcCKLfgQ,9411
90
90
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
91
91
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=WkTNeDhj9H9rtp2ZZeX6MS2sdKSGlLboE6FcuKrwUo0,10815
@@ -94,7 +94,7 @@ sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=XJZP0C4NFyXgc
94
94
  sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=0PlANTrR959foTA3Nj5qBE7ndaOZgG-9X6LhzlmEUc8,2533
95
95
  sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=v9jOgA0-I31WcrhIydiFbpy2ZJPLytFLGM98NRPd2sU,2820
96
96
  sglang/test/run_eval.py,sha256=NWxeLWmInBgkCvC9Jr_QzF7GfAiBve3Gf1JQrEOlNlU,3899
97
- sglang/test/runners.py,sha256=FYLbrWePfTacN5bsbAgMl5RiDI4g_Bsbwh1gXqRwr0Y,7794
97
+ sglang/test/runners.py,sha256=J4XfBSPhZvLiHLrDsHUuIKjX3kzbMrD7fFEPr07SUkU,7975
98
98
  sglang/test/simple_eval_common.py,sha256=HL1bfgkTAKP7sk-kShg73WTeADhuBD6xSsuLbV_9C3s,12359
99
99
  sglang/test/simple_eval_gpqa.py,sha256=CaRAuHdZj0m4mRm4tH9k7cB0kQxe0LHwlz7Vn1qyKps,3189
100
100
  sglang/test/simple_eval_humaneval.py,sha256=iCtN2LBL6j3nxMDjRJ--m0MCNPAwDo81gJ2whE-2Rt0,5674
@@ -102,11 +102,11 @@ sglang/test/simple_eval_math.py,sha256=EQblQmtUt-kl558drzhP7c6KhpDNgr1EJhhKx5eeH
102
102
  sglang/test/simple_eval_mgsm.py,sha256=wfbqJW9Rkc66vzq2fEMF6jchmoA8mw1OUiGU55cZ2B0,10261
103
103
  sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
104
104
  sglang/test/test_layernorm.py,sha256=VDdoeqGvebUa-l3rDiid6cC7wZq0Phpbm5fxxD0-cpg,1910
105
- sglang/test/test_programs.py,sha256=vRhKIriZgSk_Zn8gGviIfiY_suOBA7Ni7P0NaQM2Esk,13894
106
- sglang/test/test_utils.py,sha256=cO0ZbnfBS_MxyZ6MDyA7DrDVwu3umKRb3WP_dwggPng,14505
105
+ sglang/test/test_programs.py,sha256=V_-Bx3lLkw37P6gDyA7mZCqxlyNMaFLBkRrPMQQQqn4,14909
106
+ sglang/test/test_utils.py,sha256=Fw606sa8sTX6HJ7OCuyDUH8LQr9PvtwBKYnyZj2SLWU,14741
107
107
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=-0p0rV-P4lNo7xAe3rQSBHTubc50a-DFyOQmLGAkgkQ,12515
108
- sglang-0.2.12.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
109
- sglang-0.2.12.dist-info/METADATA,sha256=k4QBFP1vyWHeXgCA9Npoz7Wb8qT9aC8rL7R1QP2J60g,34314
110
- sglang-0.2.12.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
111
- sglang-0.2.12.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
112
- sglang-0.2.12.dist-info/RECORD,,
108
+ sglang-0.2.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
109
+ sglang-0.2.13.dist-info/METADATA,sha256=oy69SBbn-iEZE0JRzPkHuhzRlAjNj6v8twSXrjsOWXs,34892
110
+ sglang-0.2.13.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
111
+ sglang-0.2.13.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
112
+ sglang-0.2.13.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (72.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5