sglang 0.2.9.post1__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sglang/test/test_utils.py CHANGED
@@ -1,9 +1,14 @@
1
1
  """Common utilities for testing and benchmarking"""
2
2
 
3
+ import argparse
3
4
  import asyncio
5
+ import multiprocessing
4
6
  import subprocess
7
+ import threading
5
8
  import time
9
+ import unittest
6
10
  from functools import partial
11
+ from typing import Callable, List, Optional
7
12
 
8
13
  import numpy as np
9
14
  import requests
@@ -247,7 +252,7 @@ async def call_select_lmql(context, choices, temperature=0, max_len=4096, model=
247
252
  return choices.index(answer)
248
253
 
249
254
 
250
- def add_common_other_args_and_parse(parser):
255
+ def add_common_other_args_and_parse(parser: argparse.ArgumentParser):
251
256
  parser.add_argument("--parallel", type=int, default=64)
252
257
  parser.add_argument("--host", type=str, default="http://127.0.0.1")
253
258
  parser.add_argument("--port", type=int, default=None)
@@ -286,7 +291,7 @@ def add_common_other_args_and_parse(parser):
286
291
  return args
287
292
 
288
293
 
289
- def add_common_sglang_args_and_parse(parser):
294
+ def add_common_sglang_args_and_parse(parser: argparse.ArgumentParser):
290
295
  parser.add_argument("--parallel", type=int, default=64)
291
296
  parser.add_argument("--host", type=str, default="http://127.0.0.1")
292
297
  parser.add_argument("--port", type=int, default=30000)
@@ -296,7 +301,7 @@ def add_common_sglang_args_and_parse(parser):
296
301
  return args
297
302
 
298
303
 
299
- def select_sglang_backend(args):
304
+ def select_sglang_backend(args: argparse.Namespace):
300
305
  if args.backend.startswith("srt"):
301
306
  if args.backend == "srt-no-parallel":
302
307
  global_config.enable_parallel_decoding = False
@@ -309,7 +314,7 @@ def select_sglang_backend(args):
309
314
  return backend
310
315
 
311
316
 
312
- def _get_call_generate(args):
317
+ def _get_call_generate(args: argparse.Namespace):
313
318
  if args.backend == "lightllm":
314
319
  return partial(call_generate_lightllm, url=f"{args.host}:{args.port}/generate")
315
320
  elif args.backend == "vllm":
@@ -336,7 +341,7 @@ def _get_call_generate(args):
336
341
  raise ValueError(f"Invalid backend: {args.backend}")
337
342
 
338
343
 
339
- def _get_call_select(args):
344
+ def _get_call_select(args: argparse.Namespace):
340
345
  if args.backend == "lightllm":
341
346
  return partial(call_select_lightllm, url=f"{args.host}:{args.port}/generate")
342
347
  elif args.backend == "vllm":
@@ -359,7 +364,7 @@ def _get_call_select(args):
359
364
  raise ValueError(f"Invalid backend: {args.backend}")
360
365
 
361
366
 
362
- def get_call_generate(args):
367
+ def get_call_generate(args: argparse.Namespace):
363
368
  call_generate = _get_call_generate(args)
364
369
 
365
370
  def func(*args, **kwargs):
@@ -372,7 +377,7 @@ def get_call_generate(args):
372
377
  return func
373
378
 
374
379
 
375
- def get_call_select(args):
380
+ def get_call_select(args: argparse.Namespace):
376
381
  call_select = _get_call_select(args)
377
382
 
378
383
  def func(*args, **kwargs):
@@ -385,7 +390,16 @@ def get_call_select(args):
385
390
  return func
386
391
 
387
392
 
388
- def popen_launch_server(model, port, timeout, *args):
393
+ def popen_launch_server(
394
+ model: str,
395
+ base_url: str,
396
+ timeout: float,
397
+ api_key: Optional[str] = None,
398
+ other_args: tuple = (),
399
+ ):
400
+ _, host, port = base_url.split(":")
401
+ host = host[2:]
402
+
389
403
  command = [
390
404
  "python3",
391
405
  "-m",
@@ -393,21 +407,88 @@ def popen_launch_server(model, port, timeout, *args):
393
407
  "--model-path",
394
408
  model,
395
409
  "--host",
396
- "localhost",
410
+ host,
397
411
  "--port",
398
- str(port),
399
- *args,
412
+ port,
413
+ *other_args,
400
414
  ]
415
+ if api_key:
416
+ command += ["--api-key", api_key]
417
+
401
418
  process = subprocess.Popen(command, stdout=None, stderr=None)
402
- base_url = f"http://localhost:{port}/v1"
403
419
 
404
420
  start_time = time.time()
405
421
  while time.time() - start_time < timeout:
406
422
  try:
407
- response = requests.get(f"{base_url}/models")
423
+ headers = {
424
+ "Content-Type": "application/json; charset=utf-8",
425
+ "Authorization": f"Bearer {api_key}",
426
+ }
427
+ response = requests.get(f"{base_url}/v1/models", headers=headers)
408
428
  if response.status_code == 200:
409
429
  return process
410
430
  except requests.RequestException:
411
431
  pass
412
432
  time.sleep(10)
413
433
  raise TimeoutError("Server failed to start within the timeout period.")
434
+
435
+
436
+ def run_with_timeout(
437
+ func: Callable,
438
+ args: tuple = (),
439
+ kwargs: Optional[dict] = None,
440
+ timeout: float = None,
441
+ ):
442
+ """Run a function with timeout."""
443
+ ret_value = []
444
+
445
+ def _target_func():
446
+ ret_value.append(func(*args, **(kwargs or {})))
447
+
448
+ t = threading.Thread(target=_target_func)
449
+ t.start()
450
+ t.join(timeout=timeout)
451
+ if t.is_alive():
452
+ raise TimeoutError()
453
+
454
+ if not ret_value:
455
+ raise RuntimeError()
456
+
457
+ return ret_value[0]
458
+
459
+
460
+ def run_unittest_files(files: List[str], timeout_per_file: float):
461
+ tic = time.time()
462
+ success = True
463
+
464
+ for filename in files:
465
+
466
+ def func():
467
+ print(f"\n\nRun {filename}\n\n")
468
+ ret = unittest.main(module=None, argv=["", "-vb"] + [filename])
469
+
470
+ p = multiprocessing.Process(target=func)
471
+
472
+ def run_one_file():
473
+ p.start()
474
+ p.join()
475
+
476
+ try:
477
+ run_with_timeout(run_one_file, timeout=timeout_per_file)
478
+ if p.exitcode != 0:
479
+ success = False
480
+ break
481
+ except TimeoutError:
482
+ p.terminate()
483
+ time.sleep(5)
484
+ print(
485
+ "\nTimeout after {timeout_per_file} seconds when running {filename}\n"
486
+ )
487
+ return False
488
+
489
+ if success:
490
+ print(f"Success. Time elapsed: {time.time() - tic:.2f}s")
491
+ else:
492
+ print(f"Fail. Time elapsed: {time.time() - tic:.2f}s")
493
+
494
+ return 0 if success else -1
sglang/utils.py CHANGED
@@ -12,6 +12,7 @@ import urllib.request
12
12
  from concurrent.futures import ThreadPoolExecutor
13
13
  from io import BytesIO
14
14
  from json import dumps
15
+ from typing import Union
15
16
 
16
17
  import numpy as np
17
18
  import requests
@@ -25,7 +26,7 @@ def get_exception_traceback():
25
26
  return err_str
26
27
 
27
28
 
28
- def is_same_type(values):
29
+ def is_same_type(values: list):
29
30
  """Return whether the elements in values are of the same type."""
30
31
  if len(values) <= 1:
31
32
  return True
@@ -45,7 +46,7 @@ def read_jsonl(filename: str):
45
46
  return rets
46
47
 
47
48
 
48
- def dump_state_text(filename, states, mode="w"):
49
+ def dump_state_text(filename: str, states: list, mode: str = "w"):
49
50
  """Dump program state in a text file."""
50
51
  from sglang.lang.interpreter import ProgramState
51
52
 
@@ -75,19 +76,13 @@ class HttpResponse:
75
76
  return self.resp.status
76
77
 
77
78
 
78
- def http_request(
79
- url, json=None, stream=False, auth_token=None, api_key=None, verify=None
80
- ):
79
+ def http_request(url, json=None, stream=False, api_key=None, verify=None):
81
80
  """A faster version of requests.post with low-level urllib API."""
82
81
  headers = {"Content-Type": "application/json; charset=utf-8"}
83
82
 
84
- # add the Authorization header if an auth token is provided
85
- if auth_token is not None:
86
- headers["Authorization"] = f"Bearer {auth_token}"
87
-
88
- # add the API Key header if an API key is provided
83
+ # add the Authorization header if an api key is provided
89
84
  if api_key is not None:
90
- headers["X-API-Key"] = api_key
85
+ headers["Authorization"] = f"Bearer {api_key}"
91
86
 
92
87
  if stream:
93
88
  return requests.post(url, json=json, stream=True, headers=headers)
@@ -105,7 +100,7 @@ def http_request(
105
100
  return HttpResponse(e)
106
101
 
107
102
 
108
- def encode_image_base64(image_path):
103
+ def encode_image_base64(image_path: Union[str, bytes]):
109
104
  """Encode an image in base64."""
110
105
  if isinstance(image_path, str):
111
106
  with open(image_path, "rb") as image_file:
@@ -144,7 +139,7 @@ def encode_frame(frame):
144
139
  return frame_bytes
145
140
 
146
141
 
147
- def encode_video_base64(video_path, num_frames=16):
142
+ def encode_video_base64(video_path: str, num_frames: int = 16):
148
143
  import cv2 # pip install opencv-python-headless
149
144
 
150
145
  cap = cv2.VideoCapture(video_path)
@@ -190,7 +185,7 @@ def encode_video_base64(video_path, num_frames=16):
190
185
  return video_base64
191
186
 
192
187
 
193
- def _is_chinese_char(cp):
188
+ def _is_chinese_char(cp: int):
194
189
  """Checks whether CP is the codepoint of a CJK character."""
195
190
  # This defines a "chinese character" as anything in the CJK Unicode block:
196
191
  # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
@@ -215,7 +210,7 @@ def _is_chinese_char(cp):
215
210
  return False
216
211
 
217
212
 
218
- def find_printable_text(text):
213
+ def find_printable_text(text: str):
219
214
  """Returns the longest printable substring of text that contains only entire words."""
220
215
  # Borrowed from https://github.com/huggingface/transformers/blob/061580c82c2db1de9139528243e105953793f7a2/src/transformers/generation/streamers.py#L99
221
216
 
@@ -234,26 +229,7 @@ def find_printable_text(text):
234
229
  return text[: text.rfind(" ") + 1]
235
230
 
236
231
 
237
- def run_with_timeout(func, args=(), kwargs=None, timeout=None):
238
- """Run a function with timeout."""
239
- ret_value = []
240
-
241
- def _target_func():
242
- ret_value.append(func(*args, **(kwargs or {})))
243
-
244
- t = threading.Thread(target=_target_func)
245
- t.start()
246
- t.join(timeout=timeout)
247
- if t.is_alive():
248
- raise TimeoutError()
249
-
250
- if not ret_value:
251
- raise RuntimeError()
252
-
253
- return ret_value[0]
254
-
255
-
256
- def graceful_registry(sub_module_name):
232
+ def graceful_registry(sub_module_name: str):
257
233
  def graceful_shutdown(signum, frame):
258
234
  logger.info(
259
235
  f"{sub_module_name} Received signal to shutdown. Performing graceful shutdown..."
@@ -265,7 +241,9 @@ def graceful_registry(sub_module_name):
265
241
 
266
242
 
267
243
  class LazyImport:
268
- def __init__(self, module_name, class_name):
244
+ """Lazy import to make `import sglang` run faster."""
245
+
246
+ def __init__(self, module_name: str, class_name: str):
269
247
  self.module_name = module_name
270
248
  self.class_name = class_name
271
249
  self._module = None
@@ -276,7 +254,7 @@ class LazyImport:
276
254
  self._module = getattr(module, self.class_name)
277
255
  return self._module
278
256
 
279
- def __getattr__(self, name):
257
+ def __getattr__(self, name: str):
280
258
  module = self._load()
281
259
  return getattr(module, name)
282
260
 
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.9.post1"
1
+ __version__ = "0.2.10"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.2.9.post1
3
+ Version: 0.2.10
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -215,34 +215,35 @@ Requires-Dist: requests
215
215
  Requires-Dist: tqdm
216
216
  Requires-Dist: numpy
217
217
  Provides-Extra: all
218
- Requires-Dist: sglang[srt] ; extra == 'all'
219
- Requires-Dist: sglang[openai] ; extra == 'all'
220
- Requires-Dist: sglang[anthropic] ; extra == 'all'
221
- Requires-Dist: sglang[litellm] ; extra == 'all'
218
+ Requires-Dist: sglang[srt]; extra == "all"
219
+ Requires-Dist: sglang[openai]; extra == "all"
220
+ Requires-Dist: sglang[anthropic]; extra == "all"
221
+ Requires-Dist: sglang[litellm]; extra == "all"
222
222
  Provides-Extra: anthropic
223
- Requires-Dist: anthropic >=0.20.0 ; extra == 'anthropic'
223
+ Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
224
224
  Provides-Extra: litellm
225
- Requires-Dist: litellm >=1.0.0 ; extra == 'litellm'
225
+ Requires-Dist: litellm>=1.0.0; extra == "litellm"
226
226
  Provides-Extra: openai
227
- Requires-Dist: openai >=1.0 ; extra == 'openai'
228
- Requires-Dist: tiktoken ; extra == 'openai'
227
+ Requires-Dist: openai>=1.0; extra == "openai"
228
+ Requires-Dist: tiktoken; extra == "openai"
229
229
  Provides-Extra: srt
230
- Requires-Dist: aiohttp ; extra == 'srt'
231
- Requires-Dist: fastapi ; extra == 'srt'
232
- Requires-Dist: hf-transfer ; extra == 'srt'
233
- Requires-Dist: huggingface-hub ; extra == 'srt'
234
- Requires-Dist: interegular ; extra == 'srt'
235
- Requires-Dist: packaging ; extra == 'srt'
236
- Requires-Dist: pillow ; extra == 'srt'
237
- Requires-Dist: psutil ; extra == 'srt'
238
- Requires-Dist: pydantic ; extra == 'srt'
239
- Requires-Dist: torch ; extra == 'srt'
240
- Requires-Dist: uvicorn ; extra == 'srt'
241
- Requires-Dist: uvloop ; extra == 'srt'
242
- Requires-Dist: zmq ; extra == 'srt'
243
- Requires-Dist: vllm ==0.5.3.post1 ; extra == 'srt'
244
- Requires-Dist: outlines >=0.0.44 ; extra == 'srt'
245
- Requires-Dist: python-multipart ; extra == 'srt'
230
+ Requires-Dist: aiohttp; extra == "srt"
231
+ Requires-Dist: fastapi; extra == "srt"
232
+ Requires-Dist: hf-transfer; extra == "srt"
233
+ Requires-Dist: huggingface-hub; extra == "srt"
234
+ Requires-Dist: interegular; extra == "srt"
235
+ Requires-Dist: jsonlines; extra == "srt"
236
+ Requires-Dist: packaging; extra == "srt"
237
+ Requires-Dist: pillow; extra == "srt"
238
+ Requires-Dist: psutil; extra == "srt"
239
+ Requires-Dist: pydantic; extra == "srt"
240
+ Requires-Dist: python-multipart; extra == "srt"
241
+ Requires-Dist: torch; extra == "srt"
242
+ Requires-Dist: uvicorn; extra == "srt"
243
+ Requires-Dist: uvloop; extra == "srt"
244
+ Requires-Dist: zmq; extra == "srt"
245
+ Requires-Dist: vllm==0.5.3.post1; extra == "srt"
246
+ Requires-Dist: outlines>=0.0.44; extra == "srt"
246
247
 
247
248
  <div align="center">
248
249
  <img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400"></img>
@@ -300,8 +301,8 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
300
301
 
301
302
  ### Method 2: From source
302
303
  ```
303
- # Use the stable v0.2.9.post1 branch
304
- git clone -b v0.2.9.post1 https://github.com/sgl-project/sglang.git
304
+ # Use the last release branch
305
+ git clone -b v0.2.10 https://github.com/sgl-project/sglang.git
305
306
  cd sglang
306
307
 
307
308
  pip install --upgrade pip
@@ -453,7 +454,7 @@ Instructions for supporting a new model are [here](https://github.com/sgl-projec
453
454
 
454
455
  ### Benchmark Performance
455
456
 
456
- - Benchmark a single static batch by running the following command without launching a server. The arguments are the same as those for `launch_server.py`. This is not a dynamic batching server, so it may run out of memory for a batch size that can run successfully with a real server. This is because a real server will truncate the prefill into several batches/chunks, while this unit test does not do this.
457
+ - Benchmark a single static batch by running the following command without launching a server. The arguments are the same as for `launch_server.py`. Note that this is not a dynamic batching server, so it may run out of memory for a batch size that a real server can handle. A real server truncates the prefill into several batches, while this unit test does not. For accurate large batch testing, consider using `sglang.bench_serving`.
457
458
  ```
458
459
  python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 32 --input-len 256 --output-len 32
459
460
  ```
@@ -1,13 +1,13 @@
1
1
  sglang/__init__.py,sha256=ECjvAWlxIwKtUIXGchfkoCIbF-iqLjH-Q0o8xHTlVNY,1352
2
2
  sglang/api.py,sha256=s_P8BvGDCQ0PiqOapr2TLFge1NA7QmKqUx6bFQ8Q5GQ,5676
3
- sglang/bench_latency.py,sha256=JPatRvstM3nXb-ViVgtR-TaRrFHpcHzqoDG7BQmRYK8,10539
3
+ sglang/bench_latency.py,sha256=lHk9C3XM1e-UQd6HY2qn-njr2rG5AFQ_sNVD5hcF5Vc,12162
4
4
  sglang/bench_serving.py,sha256=M0YQT6xElpkx-FtmyUe6lhX1DZfVLGh54qd6qfFYquc,34801
5
- sglang/check_env.py,sha256=M4hWWl9IAmrhVATj6nbPlGiZ4FtLI3K9ETL9hFzxh8Q,4138
5
+ sglang/check_env.py,sha256=XlVou81XC20tPFVTuKDSKqDqLQJoO2QvlnReWMf-Ho4,4152
6
6
  sglang/global_config.py,sha256=CyhGL7PE-KlMcg7IHWykzImU1y4NQlpeIlh9lHA77uo,1749
7
7
  sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
8
8
  sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
9
- sglang/utils.py,sha256=r0Z7hY_bFFk-b6WeQJir9br-hCW2-p7n5E7Et2WziaQ,8776
10
- sglang/version.py,sha256=xiigmrV7lCZAjfifluudBrG9T_WlhKAusKo0iARTZm0,28
9
+ sglang/utils.py,sha256=C50xm06WWKpKB8kSNs9vO4egJ2QTk_OAA6M13S2cB_A,8369
10
+ sglang/version.py,sha256=waXgc7p-jgGCsUjdVfO_KjlVZblnCvrzf4A0dsBj_lg,23
11
11
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
13
13
  sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
@@ -19,51 +19,51 @@ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtx
19
19
  sglang/lang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
20
20
  sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
21
21
  sglang/lang/backend/openai.py,sha256=6ww2rwKouWgtmjaCf4hk-kXXJ6bY6n9Xnbm3UTFZvl4,14808
22
- sglang/lang/backend/runtime_endpoint.py,sha256=6iW1S62KmYyQGiWsHJFhZidK01vlIE55IsYN2tP38WQ,9202
22
+ sglang/lang/backend/runtime_endpoint.py,sha256=n78pyBWTCMYmDAS-0yZVFvzQYCiACz8Usj7FTDfdVKE,8763
23
23
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
24
24
  sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
25
- sglang/srt/hf_transformers_utils.py,sha256=Fg-3panb6lsqOhHmAYA0ivkXyBjdnvY5mqvilDv2xF4,11919
25
+ sglang/srt/hf_transformers_utils.py,sha256=Tf_RplcW7llVXsigRvSGqmeAUxBeAL8rPCkzuqWfZ8U,11925
26
26
  sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
27
- sglang/srt/model_config.py,sha256=DO7m84WiT3dzPWmyKz_UXDAHEdqEjq8Lq5wCjzjYMME,6023
27
+ sglang/srt/model_config.py,sha256=k4OfRV-szWkFaJMIC40JoJGJ75AfYQ2hf4M1dS1aQ-o,6366
28
28
  sglang/srt/sampling_params.py,sha256=uZFDlTUPnNR5_3IDH-INDeN-tm6LlRkC2KT-B3njxJs,3687
29
- sglang/srt/server.py,sha256=cDHUmLqj7MjF-3L9WcfA-4z9dRl55cwF5ygXuncMl-Q,15852
30
- sglang/srt/server_args.py,sha256=wdRlxR-509RfNYuMQoxUAefMwoc5eme6sYwEMyRBHmk,16034
31
- sglang/srt/utils.py,sha256=5wgGe6kI59JAmf8kxLsItulJ4xQaOJHHYaWWd6_WWmo,23384
29
+ sglang/srt/server.py,sha256=ur_fDb-nEmlzz1mSKwWa87XFJdQM1gxFz4cahMcMatA,16028
30
+ sglang/srt/server_args.py,sha256=oUMzSSBrJ5_g0yeBapABUv2MlhDNWEfWLdLVROgqZOU,16305
31
+ sglang/srt/utils.py,sha256=GcRFf3pb5l-Q5TJU4gF-Wp7Ct46l3BO0aMpjlyHXp3I,23766
32
32
  sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
33
33
  sglang/srt/constrained/base_tool_cache.py,sha256=1_m-AivPtWRwUgGiEZBafCrSFUGahK4UM4vgAd8TkMg,2004
34
34
  sglang/srt/constrained/fsm_cache.py,sha256=GoPBr_9ZdJizF2PKbYoQw2I4ckfrUYwCeMZxB9sY3TM,2639
35
35
  sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
36
36
  sglang/srt/layers/context_flashattention_nopad.py,sha256=r_TpHuYAVgq1pN81PiWe1bebtY-p9MBndBaoIE2VXrk,5180
37
- sglang/srt/layers/extend_attention.py,sha256=zuNnAdL_wF6BX0Mwn1dgDJvh3YJjYwqa5Fbzp8muOVc,12573
37
+ sglang/srt/layers/extend_attention.py,sha256=V5pm7toSDlzByaV4lGRgXVGWFUPf68chvvahlT2h4mk,14092
38
38
  sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
39
39
  sglang/srt/layers/linear.py,sha256=3Se2FRXyqXcd-uvNx2b7s-jolsUTEVeYBMYHmV82wPw,34518
40
40
  sglang/srt/layers/logits_processor.py,sha256=5Cg3h5b4H0EUeOJRst3IOMWL5dniP63A5s15BRkAMmk,11091
41
- sglang/srt/layers/radix_attention.py,sha256=tdA-kdd9LQY1wbw3iYuy-9cikVJYmy3EctwAlUfN-Uo,6945
42
- sglang/srt/layers/token_attention.py,sha256=ylUqUnozJCCohxTGAiiP3sxgUrcXfEVic8-qgcHYDj4,7968
41
+ sglang/srt/layers/radix_attention.py,sha256=cNSQWO74DcXgpAMKSMaHzfpy5IcLORUnWe5gOwATLrw,7466
42
+ sglang/srt/layers/token_attention.py,sha256=pdBORaWQGvDy_Aitcq0XDHk2Rravol-jZZkrsgkXeng,8849
43
43
  sglang/srt/layers/quantization/__init__.py,sha256=JMlgE-FWS759lfQ9Uc6mGFqBbTFLlvKeVEFpZLATe14,2536
44
44
  sglang/srt/layers/quantization/fp8.py,sha256=GQOLeGbrcUfwO-7oClzDda0RXGPHR70ZXUHArZsa174,25511
45
45
  sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
46
46
  sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
47
47
  sglang/srt/managers/detokenizer_manager.py,sha256=GXWdW4n2N-otL3zcgdr0t1PcEe2EmQJA8AElntiNV1o,5606
48
- sglang/srt/managers/io_struct.py,sha256=Rz7Ur9Yw6prDGdy6XjsSiUmVBccS6cef-G_9TW7HA_4,7105
48
+ sglang/srt/managers/io_struct.py,sha256=VK61d6zfnBz5a3IMmwYsa5PNa9jUXPPmED1TdDRQGDs,7345
49
49
  sglang/srt/managers/policy_scheduler.py,sha256=ajSB-gCC6VJkXvnKU8FYU3Kgcigozp2pMTwF84Wp14o,3138
50
- sglang/srt/managers/schedule_batch.py,sha256=LIoVCPNivh0u1dOrrWRgFD6a4ywq3nrG_4dNgCK0kIw,37697
51
- sglang/srt/managers/tokenizer_manager.py,sha256=rtZ44aiZOMHLHkXDhMgj0HDR3gExpeGjWfoCD0PfG_o,20574
50
+ sglang/srt/managers/schedule_batch.py,sha256=yIjiiMcaYYN9iaEOGQZoPUpFviDptMVh9hMwRRnDAco,37896
51
+ sglang/srt/managers/tokenizer_manager.py,sha256=kxkoAa8VbQt9FJPX1fN-7IzAD8RIcIvz3AGR8uEMYjk,21202
52
52
  sglang/srt/managers/tp_worker.py,sha256=JPLneFwcPlmPXZX1QxZHWgcdau8FC8wNuVqfCqsgOkU,35234
53
53
  sglang/srt/mem_cache/base_cache.py,sha256=czyN8IumXcMQskYOZDV3DzjfD4kdR-qwLVxceDqnOmE,788
54
54
  sglang/srt/mem_cache/chunk_cache.py,sha256=u1mkGoTI7_31H0i0mhKT7S57StYSsdmsSPqyGubE7lY,1560
55
55
  sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
56
- sglang/srt/mem_cache/memory_pool.py,sha256=wkhjyYLbAZrl2FB5i4ODkxgMufBuDpe4N0kbXhu6ZO0,4509
56
+ sglang/srt/mem_cache/memory_pool.py,sha256=8N4eHybhtBuwIwYyeNSvrZI90LGgMG8sA3OrXdXZAZs,5496
57
57
  sglang/srt/mem_cache/radix_cache.py,sha256=pa5RD4xNKPSuvL55BnC4mimoca5oJRXr4Rg91-sbTcs,8881
58
58
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=OdmO6R7nHWrRJCtZOxYkt0KNdGoX7Md4knsypwPYjaQ,9365
59
- sglang/srt/model_executor/model_runner.py,sha256=RadluuL2Ou_BTOgo2IrLfMMEX7Z_DRgPL3JVBIr2KaU,16189
59
+ sglang/srt/model_executor/model_runner.py,sha256=tJHlqk_JH3RJDaPAiSljaDI951LUu9AYW679eCKMJXs,17404
60
60
  sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
61
61
  sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
62
62
  sglang/srt/models/chatglm.py,sha256=vYWooqyPmcSFZNjxj_g5I_FgHJlDytbEiz6vyv3JBNM,13856
63
63
  sglang/srt/models/commandr.py,sha256=gaTI77hgitPlcUNyxMEdGu_XZQj2DuAMnh3KbZQ9HFg,14166
64
64
  sglang/srt/models/dbrx.py,sha256=LQu7I2KH-XzY9iBlaK7IQsM1o3kzsuI1vTCspK2C19o,14655
65
65
  sglang/srt/models/deepseek.py,sha256=adr57ZX6aPOBOpmvm7YIvoqo6u0jdrKJPZ8SGcVXAh8,16014
66
- sglang/srt/models/deepseek_v2.py,sha256=9CORl-IroSguYPX3wz_aGe7mFoUE7cQRMs7CgbkBYLk,20087
66
+ sglang/srt/models/deepseek_v2.py,sha256=jaVaQlL1aPCTu8nLcvtAW_rmtvHe6y2CviIOjXzh4q4,26962
67
67
  sglang/srt/models/gemma.py,sha256=PMPI1-WLuLdk6e7u6I9d_LoCkauLkWY3aOP8MFEZ-sI,12279
68
68
  sglang/srt/models/gemma2.py,sha256=kTjZcsptgtYaO8BL_NlygjVSMSloq2Mc4Rf3FKvEhbs,16420
69
69
  sglang/srt/models/gpt_bigcode.py,sha256=U7GmHKywSu12D-EwvuWv3RwHkx6bPawaRIjlFIpQkfs,10194
@@ -82,16 +82,19 @@ sglang/srt/models/qwen2.py,sha256=mXlVd6UTCXY3VdgodFpQnlaY-NYLIbA-SknxdA9R13w,12
82
82
  sglang/srt/models/qwen2_moe.py,sha256=YYdJEezic7GyW-_bXlNIaqBa0C4IHQpz_vuRBLxms4k,18141
83
83
  sglang/srt/models/stablelm.py,sha256=b3d-ZwLQoLjZ6CupnkIq7d-z9tzGSxAyIcgSmZiZxZw,11362
84
84
  sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
85
- sglang/srt/openai_api/adapter.py,sha256=h6TIU0Fu3jU361pye4J12vcDug7UJJRPiBAY_HfFUuE,32599
86
- sglang/srt/openai_api/protocol.py,sha256=JXLnnQ63I-bJv93ICPfP0cBpyomQA5IYE_mkUg5X4Es,8177
87
- sglang/test/run_eval.py,sha256=WvMLSi70G9fhruP8cPLOfDJ9XEKL7yNn2pylx-7tNsQ,3054
88
- sglang/test/simple_eval_common.py,sha256=Qh1-iEXJCKfJmgpAzNSp28fcP1TUJzt3s9i1FjvemHY,12340
89
- sglang/test/simple_eval_humaneval.py,sha256=IW0ZC6D4SXu06IJiMoAY9DK9SMsTOlDPAwu4cfbJco0,5826
85
+ sglang/srt/openai_api/adapter.py,sha256=p2HeYO9Qgl7EERXutwpsQ659NvZhFnkQmTZX5s-x-oI,37444
86
+ sglang/srt/openai_api/protocol.py,sha256=q1MuDUhwSM-8G2uGnWUMeEk87aZxei8lCcaP6VuA8So,8200
87
+ sglang/test/run_eval.py,sha256=kbM6SiosfXj-1uYTFXPWMd7hZDvJZwV-AmdHi_WfP3A,3559
88
+ sglang/test/runners.py,sha256=APXXbrqmUGUqnX7T1Aq8X2NJQkIqtv6B42a2ybdlPjA,7459
89
+ sglang/test/simple_eval_common.py,sha256=HL1bfgkTAKP7sk-kShg73WTeADhuBD6xSsuLbV_9C3s,12359
90
+ sglang/test/simple_eval_gpqa.py,sha256=CaRAuHdZj0m4mRm4tH9k7cB0kQxe0LHwlz7Vn1qyKps,3189
91
+ sglang/test/simple_eval_humaneval.py,sha256=k50DKoAbXiw-ubrFXHet9B-7tboHU2dQJf5G3C-KKq4,5838
92
+ sglang/test/simple_eval_math.py,sha256=EQblQmtUt-kl558drzhP7c6KhpDNgr1EJhhKx5eeHM4,2519
90
93
  sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
91
94
  sglang/test/test_programs.py,sha256=e9_ifoIvuI1Ctkbkz3wfdZLBBSRikby8ywcodBIkf9M,13826
92
- sglang/test/test_utils.py,sha256=PndOL1zdseMrpHTHGmgsHHepxqYBn__eNLrlsSXLy6k,11905
93
- sglang-0.2.9.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
94
- sglang-0.2.9.post1.dist-info/METADATA,sha256=BY728JRrlJkEdmiLJAvi_qfuUY2LDBqcKqqtooU9dxk,33281
95
- sglang-0.2.9.post1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
96
- sglang-0.2.9.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
97
- sglang-0.2.9.post1.dist-info/RECORD,,
95
+ sglang/test/test_utils.py,sha256=p-G6iiT5-Vkg6LMYgvDheomLJ6IYMLsYHCp3tkatiy8,13983
96
+ sglang-0.2.10.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
97
+ sglang-0.2.10.dist-info/METADATA,sha256=Lt9wnP2134unvF88fDj2PfQIf2YaeYJ6xZdfmMAJkoM,33303
98
+ sglang-0.2.10.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
99
+ sglang-0.2.10.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
100
+ sglang-0.2.10.dist-info/RECORD,,