sglang 0.2.9.post1__py3-none-any.whl → 0.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. sglang/__init__.py +8 -0
  2. sglang/api.py +10 -2
  3. sglang/bench_latency.py +234 -74
  4. sglang/check_env.py +25 -2
  5. sglang/global_config.py +0 -1
  6. sglang/lang/backend/base_backend.py +3 -1
  7. sglang/lang/backend/openai.py +8 -3
  8. sglang/lang/backend/runtime_endpoint.py +46 -40
  9. sglang/lang/choices.py +164 -0
  10. sglang/lang/interpreter.py +6 -13
  11. sglang/lang/ir.py +11 -2
  12. sglang/srt/hf_transformers_utils.py +2 -2
  13. sglang/srt/layers/extend_attention.py +59 -7
  14. sglang/srt/layers/logits_processor.py +1 -1
  15. sglang/srt/layers/radix_attention.py +24 -14
  16. sglang/srt/layers/token_attention.py +28 -2
  17. sglang/srt/managers/io_struct.py +9 -4
  18. sglang/srt/managers/schedule_batch.py +98 -323
  19. sglang/srt/managers/tokenizer_manager.py +34 -16
  20. sglang/srt/managers/tp_worker.py +20 -22
  21. sglang/srt/mem_cache/memory_pool.py +74 -38
  22. sglang/srt/model_config.py +11 -0
  23. sglang/srt/model_executor/cuda_graph_runner.py +3 -3
  24. sglang/srt/model_executor/forward_batch_info.py +256 -0
  25. sglang/srt/model_executor/model_runner.py +51 -26
  26. sglang/srt/models/chatglm.py +1 -1
  27. sglang/srt/models/commandr.py +1 -1
  28. sglang/srt/models/dbrx.py +1 -1
  29. sglang/srt/models/deepseek.py +1 -1
  30. sglang/srt/models/deepseek_v2.py +199 -17
  31. sglang/srt/models/gemma.py +1 -1
  32. sglang/srt/models/gemma2.py +1 -1
  33. sglang/srt/models/gpt_bigcode.py +1 -1
  34. sglang/srt/models/grok.py +1 -1
  35. sglang/srt/models/internlm2.py +1 -1
  36. sglang/srt/models/llama2.py +1 -1
  37. sglang/srt/models/llama_classification.py +1 -1
  38. sglang/srt/models/llava.py +1 -2
  39. sglang/srt/models/llavavid.py +1 -2
  40. sglang/srt/models/minicpm.py +1 -1
  41. sglang/srt/models/mixtral.py +1 -1
  42. sglang/srt/models/mixtral_quant.py +1 -1
  43. sglang/srt/models/qwen.py +1 -1
  44. sglang/srt/models/qwen2.py +1 -1
  45. sglang/srt/models/qwen2_moe.py +1 -1
  46. sglang/srt/models/stablelm.py +1 -1
  47. sglang/srt/openai_api/adapter.py +151 -29
  48. sglang/srt/openai_api/protocol.py +7 -1
  49. sglang/srt/server.py +111 -84
  50. sglang/srt/server_args.py +12 -2
  51. sglang/srt/utils.py +25 -20
  52. sglang/test/run_eval.py +21 -10
  53. sglang/test/runners.py +237 -0
  54. sglang/test/simple_eval_common.py +12 -12
  55. sglang/test/simple_eval_gpqa.py +92 -0
  56. sglang/test/simple_eval_humaneval.py +5 -5
  57. sglang/test/simple_eval_math.py +72 -0
  58. sglang/test/test_utils.py +95 -14
  59. sglang/utils.py +15 -37
  60. sglang/version.py +1 -1
  61. {sglang-0.2.9.post1.dist-info → sglang-0.2.11.dist-info}/METADATA +59 -48
  62. sglang-0.2.11.dist-info/RECORD +102 -0
  63. sglang-0.2.9.post1.dist-info/RECORD +0 -97
  64. {sglang-0.2.9.post1.dist-info → sglang-0.2.11.dist-info}/LICENSE +0 -0
  65. {sglang-0.2.9.post1.dist-info → sglang-0.2.11.dist-info}/WHEEL +0 -0
  66. {sglang-0.2.9.post1.dist-info → sglang-0.2.11.dist-info}/top_level.txt +0 -0
sglang/utils.py CHANGED
@@ -12,6 +12,7 @@ import urllib.request
12
12
  from concurrent.futures import ThreadPoolExecutor
13
13
  from io import BytesIO
14
14
  from json import dumps
15
+ from typing import Union
15
16
 
16
17
  import numpy as np
17
18
  import requests
@@ -25,7 +26,7 @@ def get_exception_traceback():
25
26
  return err_str
26
27
 
27
28
 
28
- def is_same_type(values):
29
+ def is_same_type(values: list):
29
30
  """Return whether the elements in values are of the same type."""
30
31
  if len(values) <= 1:
31
32
  return True
@@ -45,7 +46,7 @@ def read_jsonl(filename: str):
45
46
  return rets
46
47
 
47
48
 
48
- def dump_state_text(filename, states, mode="w"):
49
+ def dump_state_text(filename: str, states: list, mode: str = "w"):
49
50
  """Dump program state in a text file."""
50
51
  from sglang.lang.interpreter import ProgramState
51
52
 
@@ -75,19 +76,13 @@ class HttpResponse:
75
76
  return self.resp.status
76
77
 
77
78
 
78
- def http_request(
79
- url, json=None, stream=False, auth_token=None, api_key=None, verify=None
80
- ):
79
+ def http_request(url, json=None, stream=False, api_key=None, verify=None):
81
80
  """A faster version of requests.post with low-level urllib API."""
82
81
  headers = {"Content-Type": "application/json; charset=utf-8"}
83
82
 
84
- # add the Authorization header if an auth token is provided
85
- if auth_token is not None:
86
- headers["Authorization"] = f"Bearer {auth_token}"
87
-
88
- # add the API Key header if an API key is provided
83
+ # add the Authorization header if an api key is provided
89
84
  if api_key is not None:
90
- headers["X-API-Key"] = api_key
85
+ headers["Authorization"] = f"Bearer {api_key}"
91
86
 
92
87
  if stream:
93
88
  return requests.post(url, json=json, stream=True, headers=headers)
@@ -105,7 +100,7 @@ def http_request(
105
100
  return HttpResponse(e)
106
101
 
107
102
 
108
- def encode_image_base64(image_path):
103
+ def encode_image_base64(image_path: Union[str, bytes]):
109
104
  """Encode an image in base64."""
110
105
  if isinstance(image_path, str):
111
106
  with open(image_path, "rb") as image_file:
@@ -144,7 +139,7 @@ def encode_frame(frame):
144
139
  return frame_bytes
145
140
 
146
141
 
147
- def encode_video_base64(video_path, num_frames=16):
142
+ def encode_video_base64(video_path: str, num_frames: int = 16):
148
143
  import cv2 # pip install opencv-python-headless
149
144
 
150
145
  cap = cv2.VideoCapture(video_path)
@@ -190,7 +185,7 @@ def encode_video_base64(video_path, num_frames=16):
190
185
  return video_base64
191
186
 
192
187
 
193
- def _is_chinese_char(cp):
188
+ def _is_chinese_char(cp: int):
194
189
  """Checks whether CP is the codepoint of a CJK character."""
195
190
  # This defines a "chinese character" as anything in the CJK Unicode block:
196
191
  # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
@@ -215,7 +210,7 @@ def _is_chinese_char(cp):
215
210
  return False
216
211
 
217
212
 
218
- def find_printable_text(text):
213
+ def find_printable_text(text: str):
219
214
  """Returns the longest printable substring of text that contains only entire words."""
220
215
  # Borrowed from https://github.com/huggingface/transformers/blob/061580c82c2db1de9139528243e105953793f7a2/src/transformers/generation/streamers.py#L99
221
216
 
@@ -234,26 +229,7 @@ def find_printable_text(text):
234
229
  return text[: text.rfind(" ") + 1]
235
230
 
236
231
 
237
- def run_with_timeout(func, args=(), kwargs=None, timeout=None):
238
- """Run a function with timeout."""
239
- ret_value = []
240
-
241
- def _target_func():
242
- ret_value.append(func(*args, **(kwargs or {})))
243
-
244
- t = threading.Thread(target=_target_func)
245
- t.start()
246
- t.join(timeout=timeout)
247
- if t.is_alive():
248
- raise TimeoutError()
249
-
250
- if not ret_value:
251
- raise RuntimeError()
252
-
253
- return ret_value[0]
254
-
255
-
256
- def graceful_registry(sub_module_name):
232
+ def graceful_registry(sub_module_name: str):
257
233
  def graceful_shutdown(signum, frame):
258
234
  logger.info(
259
235
  f"{sub_module_name} Received signal to shutdown. Performing graceful shutdown..."
@@ -265,7 +241,9 @@ def graceful_registry(sub_module_name):
265
241
 
266
242
 
267
243
  class LazyImport:
268
- def __init__(self, module_name, class_name):
244
+ """Lazy import to make `import sglang` run faster."""
245
+
246
+ def __init__(self, module_name: str, class_name: str):
269
247
  self.module_name = module_name
270
248
  self.class_name = class_name
271
249
  self._module = None
@@ -276,7 +254,7 @@ class LazyImport:
276
254
  self._module = getattr(module, self.class_name)
277
255
  return self._module
278
256
 
279
- def __getattr__(self, name):
257
+ def __getattr__(self, name: str):
280
258
  module = self._load()
281
259
  return getattr(module, name)
282
260
 
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.9.post1"
1
+ __version__ = "0.2.11"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.2.9.post1
3
+ Version: 0.2.11
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -215,34 +215,41 @@ Requires-Dist: requests
215
215
  Requires-Dist: tqdm
216
216
  Requires-Dist: numpy
217
217
  Provides-Extra: all
218
- Requires-Dist: sglang[srt] ; extra == 'all'
219
- Requires-Dist: sglang[openai] ; extra == 'all'
220
- Requires-Dist: sglang[anthropic] ; extra == 'all'
221
- Requires-Dist: sglang[litellm] ; extra == 'all'
218
+ Requires-Dist: sglang[srt]; extra == "all"
219
+ Requires-Dist: sglang[openai]; extra == "all"
220
+ Requires-Dist: sglang[anthropic]; extra == "all"
221
+ Requires-Dist: sglang[litellm]; extra == "all"
222
222
  Provides-Extra: anthropic
223
- Requires-Dist: anthropic >=0.20.0 ; extra == 'anthropic'
223
+ Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
224
+ Provides-Extra: dev
225
+ Requires-Dist: sglang[all]; extra == "dev"
226
+ Requires-Dist: sglang[test]; extra == "dev"
224
227
  Provides-Extra: litellm
225
- Requires-Dist: litellm >=1.0.0 ; extra == 'litellm'
228
+ Requires-Dist: litellm>=1.0.0; extra == "litellm"
226
229
  Provides-Extra: openai
227
- Requires-Dist: openai >=1.0 ; extra == 'openai'
228
- Requires-Dist: tiktoken ; extra == 'openai'
230
+ Requires-Dist: openai>=1.0; extra == "openai"
231
+ Requires-Dist: tiktoken; extra == "openai"
229
232
  Provides-Extra: srt
230
- Requires-Dist: aiohttp ; extra == 'srt'
231
- Requires-Dist: fastapi ; extra == 'srt'
232
- Requires-Dist: hf-transfer ; extra == 'srt'
233
- Requires-Dist: huggingface-hub ; extra == 'srt'
234
- Requires-Dist: interegular ; extra == 'srt'
235
- Requires-Dist: packaging ; extra == 'srt'
236
- Requires-Dist: pillow ; extra == 'srt'
237
- Requires-Dist: psutil ; extra == 'srt'
238
- Requires-Dist: pydantic ; extra == 'srt'
239
- Requires-Dist: torch ; extra == 'srt'
240
- Requires-Dist: uvicorn ; extra == 'srt'
241
- Requires-Dist: uvloop ; extra == 'srt'
242
- Requires-Dist: zmq ; extra == 'srt'
243
- Requires-Dist: vllm ==0.5.3.post1 ; extra == 'srt'
244
- Requires-Dist: outlines >=0.0.44 ; extra == 'srt'
245
- Requires-Dist: python-multipart ; extra == 'srt'
233
+ Requires-Dist: aiohttp; extra == "srt"
234
+ Requires-Dist: fastapi; extra == "srt"
235
+ Requires-Dist: hf-transfer; extra == "srt"
236
+ Requires-Dist: huggingface-hub; extra == "srt"
237
+ Requires-Dist: interegular; extra == "srt"
238
+ Requires-Dist: packaging; extra == "srt"
239
+ Requires-Dist: pillow; extra == "srt"
240
+ Requires-Dist: psutil; extra == "srt"
241
+ Requires-Dist: pydantic; extra == "srt"
242
+ Requires-Dist: python-multipart; extra == "srt"
243
+ Requires-Dist: torch; extra == "srt"
244
+ Requires-Dist: uvicorn; extra == "srt"
245
+ Requires-Dist: uvloop; extra == "srt"
246
+ Requires-Dist: zmq; extra == "srt"
247
+ Requires-Dist: vllm==0.5.4; extra == "srt"
248
+ Requires-Dist: outlines>=0.0.44; extra == "srt"
249
+ Provides-Extra: test
250
+ Requires-Dist: jsonlines; extra == "test"
251
+ Requires-Dist: matplotlib; extra == "test"
252
+ Requires-Dist: pandas; extra == "test"
246
253
 
247
254
  <div align="center">
248
255
  <img src="https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png" alt="logo" width="400"></img>
@@ -295,20 +302,20 @@ pip install --upgrade pip
295
302
  pip install "sglang[all]"
296
303
 
297
304
  # Install FlashInfer CUDA kernels
298
- pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
305
+ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
299
306
  ```
300
307
 
301
308
  ### Method 2: From source
302
309
  ```
303
- # Use the stable v0.2.9.post1 branch
304
- git clone -b v0.2.9.post1 https://github.com/sgl-project/sglang.git
310
+ # Use the last release branch
311
+ git clone -b v0.2.11 https://github.com/sgl-project/sglang.git
305
312
  cd sglang
306
313
 
307
314
  pip install --upgrade pip
308
315
  pip install -e "python[all]"
309
316
 
310
317
  # Install FlashInfer CUDA kernels
311
- pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
318
+ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
312
319
  ```
313
320
 
314
321
  ### Method 3: Using docker
@@ -382,7 +389,7 @@ response = client.chat.completions.create(
382
389
  print(response)
383
390
  ```
384
391
 
385
- It supports streaming, vision, and most features of the Chat/Completions/Models endpoints specified by the [OpenAI API Reference](https://platform.openai.com/docs/api-reference/).
392
+ It supports streaming, vision, and most features of the Chat/Completions/Models/Batch endpoints specified by the [OpenAI API Reference](https://platform.openai.com/docs/api-reference/).
386
393
 
387
394
  ### Additional Server Arguments
388
395
  - Add `--tp 2` to enable tensor parallelism. If it indicates `peer access is not supported between these two devices`, add `--enable-p2p-check` option.
@@ -393,10 +400,14 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
393
400
  ```
394
401
  python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --dp 2 --tp 2
395
402
  ```
396
- - If you see out-of-memory errors during serving, please try to reduce the memory usage of the KV cache pool by setting a smaller value of `--mem-fraction-static`. The default value is `0.9`
403
+ - If you see out-of-memory errors during serving, please try to reduce the memory usage of the KV cache pool by setting a smaller value of `--mem-fraction-static`. The default value is `0.9`.
397
404
  ```
398
405
  python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000 --mem-fraction-static 0.7
399
406
  ```
407
+ - If you see out-of-memory errors during prefill for long prompts on a model that supports long context, consider using chunked prefill.
408
+ ```
409
+ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --port 30000 --chunked-prefill-size 8192
410
+ ```
400
411
  - See [hyperparameter_tuning.md](docs/en/hyperparameter_tuning.md) on tuning hyperparameters for better performance.
401
412
  - Add `--nnodes 2` to run tensor parallelism on multiple nodes. If you have two nodes with two GPUs on each node and want to run TP=4, let `sgl-dev-0` be the hostname of the first node and `50000` be an available port.
402
413
  ```
@@ -410,22 +421,6 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
410
421
  - To enable fp8 quantization, you can add `--quantization fp8` on a fp16 checkpoint or directly load a fp8 checkpoint without specifying any arguments.
411
422
  - To enable experimental torch.compile support, you can add `--enable-torch-compile`. It accelerates small models on small batch sizes.
412
423
 
413
- ### Run Llama 3.1 405B
414
-
415
- ```bash
416
- ## Run 405B (fp8) on a single node
417
- python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
418
-
419
- ## Run 405B (fp16) on two nodes
420
- # replace the `172.16.4.52:20000` with your own first node ip address and port, disable CUDA Graph temporarily
421
-
422
- # on the first node
423
- GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 --disable-cuda-graph --mem-frac 0.75
424
-
425
- # on the second
426
- GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 --disable-cuda-graph --mem-frac 0.75
427
- ```
428
-
429
424
  ### Supported Models
430
425
 
431
426
  - Llama / Llama 2 / Llama 3 / Llama 3.1
@@ -451,9 +446,25 @@ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/
451
446
 
452
447
  Instructions for supporting a new model are [here](https://github.com/sgl-project/sglang/blob/main/docs/en/model_support.md).
453
448
 
449
+ ### Run Llama 3.1 405B
450
+
451
+ ```bash
452
+ ## Run 405B (fp8) on a single node
453
+ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 --tp 8
454
+
455
+ ## Run 405B (fp16) on two nodes
456
+ # replace the `172.16.4.52:20000` with your own first node ip address and port, disable CUDA Graph temporarily
457
+
458
+ # on the first node
459
+ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 0 --disable-cuda-graph --mem-frac 0.75
460
+
461
+ # on the second
462
+ GLOO_SOCKET_IFNAME=eth0 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-405B-Instruct --tp 16 --nccl-init-addr 172.16.4.52:20000 --nnodes 2 --node-rank 1 --disable-cuda-graph --mem-frac 0.75
463
+ ```
464
+
454
465
  ### Benchmark Performance
455
466
 
456
- - Benchmark a single static batch by running the following command without launching a server. The arguments are the same as those for `launch_server.py`. This is not a dynamic batching server, so it may run out of memory for a batch size that can run successfully with a real server. This is because a real server will truncate the prefill into several batches/chunks, while this unit test does not do this.
467
+ - Benchmark a single static batch by running the following command without launching a server. The arguments are the same as for `launch_server.py`. Note that this is not a dynamic batching server, so it may run out of memory for a batch size that a real server can handle. A real server truncates the prefill into several batches, while this unit test does not. For accurate large batch testing, consider using `sglang.bench_serving`.
457
468
  ```
458
469
  python -m sglang.bench_latency --model-path meta-llama/Meta-Llama-3-8B-Instruct --batch 32 --input-len 256 --output-len 32
459
470
  ```
@@ -0,0 +1,102 @@
1
+ sglang/__init__.py,sha256=T8MYdFfKFPZcgFKHMBpOCIlFbhjwmr77Nqm6mdE6bCY,1590
2
+ sglang/api.py,sha256=gAY9JhqWXjrYoWnMvR-iiuuY1YSN94We-lc1LH0z3cw,6030
3
+ sglang/bench_latency.py,sha256=CXvukEW0IeoH2IwN2vuriC0eHBdJsz3lgT7OwwNo_7A,16146
4
+ sglang/bench_serving.py,sha256=M0YQT6xElpkx-FtmyUe6lhX1DZfVLGh54qd6qfFYquc,34801
5
+ sglang/check_env.py,sha256=oU8VmjjPK2SviRhr41cF1953soBu-eTT5E0Hf04zMzo,4974
6
+ sglang/global_config.py,sha256=9JxaFkBKSgep6BVeEl_kx9tuW9PqdijYELyBGTryl6o,1704
7
+ sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
8
+ sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
9
+ sglang/utils.py,sha256=C50xm06WWKpKB8kSNs9vO4egJ2QTk_OAA6M13S2cB_A,8369
10
+ sglang/version.py,sha256=_MLx4ac1juJPWEEiC9kMQISX3x3jFBr507jM2P_hxMg,23
11
+ sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
13
+ sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
14
+ sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
15
+ sglang/lang/interpreter.py,sha256=3RIeSGdKlKTq2Ixg_Tyo0fGEDTvBKS2f9FaJYODBHzA,30102
16
+ sglang/lang/ir.py,sha256=FGWghAfVW9IcxcrVqHiqpf7vmWzuNYoVTMSbBZkYVRk,16839
17
+ sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
18
+ sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
20
+ sglang/lang/backend/base_backend.py,sha256=Q5HdiDtyBewQeoYH0kDtBRVL8KFiEPNq9dw7XmauHQ8,1985
21
+ sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
22
+ sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
23
+ sglang/lang/backend/runtime_endpoint.py,sha256=AaBc5yczchX7mkwiKDMyjLjBkJsh2Lubrfd9lvCOlDo,9544
24
+ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
25
+ sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
26
+ sglang/srt/hf_transformers_utils.py,sha256=Tf_RplcW7llVXsigRvSGqmeAUxBeAL8rPCkzuqWfZ8U,11925
27
+ sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
28
+ sglang/srt/model_config.py,sha256=k4OfRV-szWkFaJMIC40JoJGJ75AfYQ2hf4M1dS1aQ-o,6366
29
+ sglang/srt/sampling_params.py,sha256=uZFDlTUPnNR5_3IDH-INDeN-tm6LlRkC2KT-B3njxJs,3687
30
+ sglang/srt/server.py,sha256=hUNnTvH4c1AI2JJzoBUf9TQuTelx-vulcqwkEplw7Gk,16699
31
+ sglang/srt/server_args.py,sha256=SmvnebtDTsvPNDyW6lltuJKC7h8eVdYmurY1ieIMySA,16475
32
+ sglang/srt/utils.py,sha256=GcRFf3pb5l-Q5TJU4gF-Wp7Ct46l3BO0aMpjlyHXp3I,23766
33
+ sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
34
+ sglang/srt/constrained/base_tool_cache.py,sha256=1_m-AivPtWRwUgGiEZBafCrSFUGahK4UM4vgAd8TkMg,2004
35
+ sglang/srt/constrained/fsm_cache.py,sha256=GoPBr_9ZdJizF2PKbYoQw2I4ckfrUYwCeMZxB9sY3TM,2639
36
+ sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
37
+ sglang/srt/layers/context_flashattention_nopad.py,sha256=r_TpHuYAVgq1pN81PiWe1bebtY-p9MBndBaoIE2VXrk,5180
38
+ sglang/srt/layers/extend_attention.py,sha256=V5pm7toSDlzByaV4lGRgXVGWFUPf68chvvahlT2h4mk,14092
39
+ sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
40
+ sglang/srt/layers/linear.py,sha256=3Se2FRXyqXcd-uvNx2b7s-jolsUTEVeYBMYHmV82wPw,34518
41
+ sglang/srt/layers/logits_processor.py,sha256=wHKB1FjbfY0a7KGw5dCsEhmO4sc7VMy3gYtSPv4oQYM,11097
42
+ sglang/srt/layers/radix_attention.py,sha256=lXwm-qs7hPy_EFV1Zf2pPQ0-drAdrO8V5J4eX0LwLtU,7505
43
+ sglang/srt/layers/token_attention.py,sha256=pdBORaWQGvDy_Aitcq0XDHk2Rravol-jZZkrsgkXeng,8849
44
+ sglang/srt/layers/quantization/__init__.py,sha256=JMlgE-FWS759lfQ9Uc6mGFqBbTFLlvKeVEFpZLATe14,2536
45
+ sglang/srt/layers/quantization/fp8.py,sha256=GQOLeGbrcUfwO-7oClzDda0RXGPHR70ZXUHArZsa174,25511
46
+ sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
47
+ sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
48
+ sglang/srt/managers/detokenizer_manager.py,sha256=GXWdW4n2N-otL3zcgdr0t1PcEe2EmQJA8AElntiNV1o,5606
49
+ sglang/srt/managers/io_struct.py,sha256=VK61d6zfnBz5a3IMmwYsa5PNa9jUXPPmED1TdDRQGDs,7345
50
+ sglang/srt/managers/policy_scheduler.py,sha256=ajSB-gCC6VJkXvnKU8FYU3Kgcigozp2pMTwF84Wp14o,3138
51
+ sglang/srt/managers/schedule_batch.py,sha256=sKQAHRL6VoapGiO7yQV796gW4sVGAgVVBMtmENbKtvg,29641
52
+ sglang/srt/managers/tokenizer_manager.py,sha256=wqb6zQbkHYcSNU14Auuh5519CVMmfbKGBQvn_IwDSAo,21408
53
+ sglang/srt/managers/tp_worker.py,sha256=3sHlN4hxksF22lkOJ8i3X6WSH4_5POy74BfbIAzIDtM,35216
54
+ sglang/srt/mem_cache/base_cache.py,sha256=czyN8IumXcMQskYOZDV3DzjfD4kdR-qwLVxceDqnOmE,788
55
+ sglang/srt/mem_cache/chunk_cache.py,sha256=u1mkGoTI7_31H0i0mhKT7S57StYSsdmsSPqyGubE7lY,1560
56
+ sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
57
+ sglang/srt/mem_cache/memory_pool.py,sha256=oOKtPTgzujo9gHXykSuER7VKqQRuwNKlXyXlaK-3dxo,5280
58
+ sglang/srt/mem_cache/radix_cache.py,sha256=pa5RD4xNKPSuvL55BnC4mimoca5oJRXr4Rg91-sbTcs,8881
59
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=EyI8sMMoVlOjdTT2Y3cfwo1-uQ43QCQ1skx5BNgchjE,9433
60
+ sglang/srt/model_executor/forward_batch_info.py,sha256=P5bGeLsnFbEqgWLI5X5Eg0XFCG1j2oWZOsIAMZNkZW4,9022
61
+ sglang/srt/model_executor/model_runner.py,sha256=yzkJLIM41mhbfgfq87ToskAaA1PS67YzhmoSMbflkZI,17479
62
+ sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
63
+ sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
64
+ sglang/srt/models/chatglm.py,sha256=7bHU2AFoppINDZm0EdxgtAJe7rwr9OPkhOCfq2qNrIA,13862
65
+ sglang/srt/models/commandr.py,sha256=5BEtIS2uUQJANkkY-6ZeDqlrpUK5yXVYHiztU3vsTKY,14172
66
+ sglang/srt/models/dbrx.py,sha256=N_0Ku_p1NCsc29NktUBNqPv7Z33XhYxOZK5xN7nzW4s,14661
67
+ sglang/srt/models/deepseek.py,sha256=E5W4nkH-Ne449rAIwQZgz-FAH2Qqp2r1vNfboyk5wEg,16024
68
+ sglang/srt/models/deepseek_v2.py,sha256=NMcckZb48kVUwAmDA2l8wO19T6DNkJOkKAhHa6utBZM,26968
69
+ sglang/srt/models/gemma.py,sha256=ilfN_NOcz7hpwEJ2y7NW3fBFmFO7YfjhdFDbfzl2qww,12285
70
+ sglang/srt/models/gemma2.py,sha256=D8GZOI1tAbEV9PaBmJSsJRzCmvaK3tGXttIbrMb5yiQ,16426
71
+ sglang/srt/models/gpt_bigcode.py,sha256=OKk9UP67as3T5bePlTRGHTCD-1wqaUEk92AowXPm6dg,10204
72
+ sglang/srt/models/grok.py,sha256=M9rtdXslqYBle5VyZqFVHiJUXq_q_aHbza63xa03zqI,27861
73
+ sglang/srt/models/internlm2.py,sha256=CKWBL0dBvLdaEUeJOUvLUNPb8BLrAZ8_BSf2mfFQhfU,12225
74
+ sglang/srt/models/llama2.py,sha256=3ZEWi0PVCDNjTrVNvLs1ESdyTcZhJlZjaH5uyS46JyM,14288
75
+ sglang/srt/models/llama_classification.py,sha256=Dvzy3PfETiJtnKFOk8qDDLUoZECf_cpSrNeA60PaDo4,4932
76
+ sglang/srt/models/llava.py,sha256=-ysi192vpBDxNaMS8qaLOhC34lXQyRtbG_0niVaceSo,18436
77
+ sglang/srt/models/llavavid.py,sha256=MX7YpqYh5J4BoOnV7vVAIfoOlBFQXYpp8Kpe7WK0ejk,13562
78
+ sglang/srt/models/minicpm.py,sha256=ea_OyiwVTo6Tg9jNRAwqxETnA6FFeAqlIbiUS-xViEI,13843
79
+ sglang/srt/models/mistral.py,sha256=jlrWBVNXbAUziAaIdHAjFcOJnKtn9Bl8rBd65ypJM-I,819
80
+ sglang/srt/models/mixtral.py,sha256=raSLbp6AfWg5_u-f-lYeRejE9koAjbHt8iIHXd3nURM,21397
81
+ sglang/srt/models/mixtral_quant.py,sha256=xYeeatZ9OfwCTas_KbH9nl6lnUT4YqSY7NAxpgLp5LE,14222
82
+ sglang/srt/models/qwen.py,sha256=43ea6gn4wHzAaI3JTDLtl08aEm0vIqgzbVH9M8oeuY0,10006
83
+ sglang/srt/models/qwen2.py,sha256=Hyhks2r4KHpKeb9iHZpnvEVc5klmnrPwcLohqg8j1kw,12284
84
+ sglang/srt/models/qwen2_moe.py,sha256=PZdhEf0DUuGWsld3TyDWlIqSbrrOdqvCD4lAtCPWXeg,18147
85
+ sglang/srt/models/stablelm.py,sha256=yPrdzPEoUD2s_Q3RgOq7BBC7z-UtEaACzabqbDRs2tA,11368
86
+ sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
87
+ sglang/srt/openai_api/adapter.py,sha256=Eq44_hGwHcglCKOc6WqWDxBsgyRqtuC6VR4HB4GLfUY,38193
88
+ sglang/srt/openai_api/protocol.py,sha256=pcRgmDM3Kozh74Aj-qEo8q64BI6hEjrdhYDU4m9srdI,8294
89
+ sglang/test/run_eval.py,sha256=kbM6SiosfXj-1uYTFXPWMd7hZDvJZwV-AmdHi_WfP3A,3559
90
+ sglang/test/runners.py,sha256=APXXbrqmUGUqnX7T1Aq8X2NJQkIqtv6B42a2ybdlPjA,7459
91
+ sglang/test/simple_eval_common.py,sha256=HL1bfgkTAKP7sk-kShg73WTeADhuBD6xSsuLbV_9C3s,12359
92
+ sglang/test/simple_eval_gpqa.py,sha256=CaRAuHdZj0m4mRm4tH9k7cB0kQxe0LHwlz7Vn1qyKps,3189
93
+ sglang/test/simple_eval_humaneval.py,sha256=k50DKoAbXiw-ubrFXHet9B-7tboHU2dQJf5G3C-KKq4,5838
94
+ sglang/test/simple_eval_math.py,sha256=EQblQmtUt-kl558drzhP7c6KhpDNgr1EJhhKx5eeHM4,2519
95
+ sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
96
+ sglang/test/test_programs.py,sha256=e9_ifoIvuI1Ctkbkz3wfdZLBBSRikby8ywcodBIkf9M,13826
97
+ sglang/test/test_utils.py,sha256=ITQcY3WGV4kLGWEkfU-AeuFX8yGLmq9LEK5jHiuW7Sw,13991
98
+ sglang-0.2.11.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
99
+ sglang-0.2.11.dist-info/METADATA,sha256=gSQA5-Hf9y41ulOKiMeHRu4Nf-c9Nbt6xhmlCGzvhNY,33783
100
+ sglang-0.2.11.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
101
+ sglang-0.2.11.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
102
+ sglang-0.2.11.dist-info/RECORD,,
@@ -1,97 +0,0 @@
1
- sglang/__init__.py,sha256=ECjvAWlxIwKtUIXGchfkoCIbF-iqLjH-Q0o8xHTlVNY,1352
2
- sglang/api.py,sha256=s_P8BvGDCQ0PiqOapr2TLFge1NA7QmKqUx6bFQ8Q5GQ,5676
3
- sglang/bench_latency.py,sha256=JPatRvstM3nXb-ViVgtR-TaRrFHpcHzqoDG7BQmRYK8,10539
4
- sglang/bench_serving.py,sha256=M0YQT6xElpkx-FtmyUe6lhX1DZfVLGh54qd6qfFYquc,34801
5
- sglang/check_env.py,sha256=M4hWWl9IAmrhVATj6nbPlGiZ4FtLI3K9ETL9hFzxh8Q,4138
6
- sglang/global_config.py,sha256=CyhGL7PE-KlMcg7IHWykzImU1y4NQlpeIlh9lHA77uo,1749
7
- sglang/launch_server.py,sha256=Gg8CwNlTCCfg1dF65ZT9ePLxOT9LKtY79GhIPG6PCrU,358
8
- sglang/launch_server_llavavid.py,sha256=40uaazMsavKuk6YXFa5v37kdUpFGuealgJJeph1g8gU,1025
9
- sglang/utils.py,sha256=r0Z7hY_bFFk-b6WeQJir9br-hCW2-p7n5E7Et2WziaQ,8776
10
- sglang/version.py,sha256=xiigmrV7lCZAjfifluudBrG9T_WlhKAusKo0iARTZm0,28
11
- sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- sglang/lang/chat_template.py,sha256=psIlhaDo70twgLrx5Lgln03metLEA3-FZuixeI0Y7Ao,13309
13
- sglang/lang/compiler.py,sha256=UiXUmPR9wBAPtnORrLcyQX8Uh0ZL0nKeV8ZgBozAJPw,7531
14
- sglang/lang/interpreter.py,sha256=_MbvYB0vweCgALklpM2DlofiCXuITCmX_fl8rPPcp5U,30340
15
- sglang/lang/ir.py,sha256=0r-mhA4aO-uuS97Dvkw99ERTcJXfzuV6jJQMmuCwHEg,16615
16
- sglang/lang/tracer.py,sha256=borJmlSJOhg1RUndGRnilnR60eEZz2Y9aU7BpftsOxU,8287
17
- sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
19
- sglang/lang/backend/base_backend.py,sha256=APiMht4WYECLCOGRPCEUF6lX-an1vjVe2dWoMSgymWY,1831
20
- sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
21
- sglang/lang/backend/openai.py,sha256=6ww2rwKouWgtmjaCf4hk-kXXJ6bY6n9Xnbm3UTFZvl4,14808
22
- sglang/lang/backend/runtime_endpoint.py,sha256=6iW1S62KmYyQGiWsHJFhZidK01vlIE55IsYN2tP38WQ,9202
23
- sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
24
- sglang/srt/conversation.py,sha256=V5YuoeO6-aLqGv0p3J2qx8TnBJbN1oTopYFutNul3GQ,16491
25
- sglang/srt/hf_transformers_utils.py,sha256=Fg-3panb6lsqOhHmAYA0ivkXyBjdnvY5mqvilDv2xF4,11919
26
- sglang/srt/mm_utils.py,sha256=n7_GmbOM_0IWVXovpM34rKIBw0Py9yb_NXSQw27u4OA,9454
27
- sglang/srt/model_config.py,sha256=DO7m84WiT3dzPWmyKz_UXDAHEdqEjq8Lq5wCjzjYMME,6023
28
- sglang/srt/sampling_params.py,sha256=uZFDlTUPnNR5_3IDH-INDeN-tm6LlRkC2KT-B3njxJs,3687
29
- sglang/srt/server.py,sha256=cDHUmLqj7MjF-3L9WcfA-4z9dRl55cwF5ygXuncMl-Q,15852
30
- sglang/srt/server_args.py,sha256=wdRlxR-509RfNYuMQoxUAefMwoc5eme6sYwEMyRBHmk,16034
31
- sglang/srt/utils.py,sha256=5wgGe6kI59JAmf8kxLsItulJ4xQaOJHHYaWWd6_WWmo,23384
32
- sglang/srt/constrained/__init__.py,sha256=NLpZGj9RIx83ejDrM_pfaRtqGgaPq_ggJszPQENUJ2E,2037
33
- sglang/srt/constrained/base_tool_cache.py,sha256=1_m-AivPtWRwUgGiEZBafCrSFUGahK4UM4vgAd8TkMg,2004
34
- sglang/srt/constrained/fsm_cache.py,sha256=GoPBr_9ZdJizF2PKbYoQw2I4ckfrUYwCeMZxB9sY3TM,2639
35
- sglang/srt/constrained/jump_forward.py,sha256=IgZ8D0woy5FLIQvXkE8wZRYejDsfVkjU0sqUlkiv_f4,6193
36
- sglang/srt/layers/context_flashattention_nopad.py,sha256=r_TpHuYAVgq1pN81PiWe1bebtY-p9MBndBaoIE2VXrk,5180
37
- sglang/srt/layers/extend_attention.py,sha256=zuNnAdL_wF6BX0Mwn1dgDJvh3YJjYwqa5Fbzp8muOVc,12573
38
- sglang/srt/layers/fused_moe.py,sha256=KmyXwau2OOZpQimGIQrHptzGNs1trIud5AKEEKXdzPU,20823
39
- sglang/srt/layers/linear.py,sha256=3Se2FRXyqXcd-uvNx2b7s-jolsUTEVeYBMYHmV82wPw,34518
40
- sglang/srt/layers/logits_processor.py,sha256=5Cg3h5b4H0EUeOJRst3IOMWL5dniP63A5s15BRkAMmk,11091
41
- sglang/srt/layers/radix_attention.py,sha256=tdA-kdd9LQY1wbw3iYuy-9cikVJYmy3EctwAlUfN-Uo,6945
42
- sglang/srt/layers/token_attention.py,sha256=ylUqUnozJCCohxTGAiiP3sxgUrcXfEVic8-qgcHYDj4,7968
43
- sglang/srt/layers/quantization/__init__.py,sha256=JMlgE-FWS759lfQ9Uc6mGFqBbTFLlvKeVEFpZLATe14,2536
44
- sglang/srt/layers/quantization/fp8.py,sha256=GQOLeGbrcUfwO-7oClzDda0RXGPHR70ZXUHArZsa174,25511
45
- sglang/srt/managers/controller_multi.py,sha256=LYI-XE9h57DW8Uh4gpd8upsC3p2dd5weKzddEH274jg,6626
46
- sglang/srt/managers/controller_single.py,sha256=CdQ9_XPZdcWF5jArDmVR8K-WZ9_8Gpgk4SwANKxTX-Y,5112
47
- sglang/srt/managers/detokenizer_manager.py,sha256=GXWdW4n2N-otL3zcgdr0t1PcEe2EmQJA8AElntiNV1o,5606
48
- sglang/srt/managers/io_struct.py,sha256=Rz7Ur9Yw6prDGdy6XjsSiUmVBccS6cef-G_9TW7HA_4,7105
49
- sglang/srt/managers/policy_scheduler.py,sha256=ajSB-gCC6VJkXvnKU8FYU3Kgcigozp2pMTwF84Wp14o,3138
50
- sglang/srt/managers/schedule_batch.py,sha256=LIoVCPNivh0u1dOrrWRgFD6a4ywq3nrG_4dNgCK0kIw,37697
51
- sglang/srt/managers/tokenizer_manager.py,sha256=rtZ44aiZOMHLHkXDhMgj0HDR3gExpeGjWfoCD0PfG_o,20574
52
- sglang/srt/managers/tp_worker.py,sha256=JPLneFwcPlmPXZX1QxZHWgcdau8FC8wNuVqfCqsgOkU,35234
53
- sglang/srt/mem_cache/base_cache.py,sha256=czyN8IumXcMQskYOZDV3DzjfD4kdR-qwLVxceDqnOmE,788
54
- sglang/srt/mem_cache/chunk_cache.py,sha256=u1mkGoTI7_31H0i0mhKT7S57StYSsdmsSPqyGubE7lY,1560
55
- sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
56
- sglang/srt/mem_cache/memory_pool.py,sha256=wkhjyYLbAZrl2FB5i4ODkxgMufBuDpe4N0kbXhu6ZO0,4509
57
- sglang/srt/mem_cache/radix_cache.py,sha256=pa5RD4xNKPSuvL55BnC4mimoca5oJRXr4Rg91-sbTcs,8881
58
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=OdmO6R7nHWrRJCtZOxYkt0KNdGoX7Md4knsypwPYjaQ,9365
59
- sglang/srt/model_executor/model_runner.py,sha256=RadluuL2Ou_BTOgo2IrLfMMEX7Z_DRgPL3JVBIr2KaU,16189
60
- sglang/srt/model_loader/model_loader.py,sha256=QmZUhHh1nmWrfYlunfnxMcTsIvip1l6aMIlrXoCED4I,10697
61
- sglang/srt/model_loader/utils.py,sha256=0AoWXX9uV5rKRYXJ4HduSnvdeerytI4ONCLCH6X4XFQ,10675
62
- sglang/srt/models/chatglm.py,sha256=vYWooqyPmcSFZNjxj_g5I_FgHJlDytbEiz6vyv3JBNM,13856
63
- sglang/srt/models/commandr.py,sha256=gaTI77hgitPlcUNyxMEdGu_XZQj2DuAMnh3KbZQ9HFg,14166
64
- sglang/srt/models/dbrx.py,sha256=LQu7I2KH-XzY9iBlaK7IQsM1o3kzsuI1vTCspK2C19o,14655
65
- sglang/srt/models/deepseek.py,sha256=adr57ZX6aPOBOpmvm7YIvoqo6u0jdrKJPZ8SGcVXAh8,16014
66
- sglang/srt/models/deepseek_v2.py,sha256=9CORl-IroSguYPX3wz_aGe7mFoUE7cQRMs7CgbkBYLk,20087
67
- sglang/srt/models/gemma.py,sha256=PMPI1-WLuLdk6e7u6I9d_LoCkauLkWY3aOP8MFEZ-sI,12279
68
- sglang/srt/models/gemma2.py,sha256=kTjZcsptgtYaO8BL_NlygjVSMSloq2Mc4Rf3FKvEhbs,16420
69
- sglang/srt/models/gpt_bigcode.py,sha256=U7GmHKywSu12D-EwvuWv3RwHkx6bPawaRIjlFIpQkfs,10194
70
- sglang/srt/models/grok.py,sha256=NfZdsRVErDIUWFqjhtNf2pqC9G4cRdYHBFpgDq1IZ2A,27855
71
- sglang/srt/models/internlm2.py,sha256=Ld2GUxZeqqqJ2vd4QiX2s1y2AceJLA1nVnUYY88GMQk,12219
72
- sglang/srt/models/llama2.py,sha256=zfOk3OK1_B6s6yuXsZFmNCf07RsfytVD72GunLBt8Cc,14282
73
- sglang/srt/models/llama_classification.py,sha256=4r_orFZqBR3U_yC4bus1K3Z3-ADscYGSzgA82_VDN0g,4926
74
- sglang/srt/models/llava.py,sha256=BJphgyQGdo7uTpJcKGEfWwdpH9GTMDnyiznLSSgmvm8,18476
75
- sglang/srt/models/llavavid.py,sha256=-7vaVqaIfukCvMkNakEPblpwjIHC6ezrAvmpE5RzlUY,13602
76
- sglang/srt/models/minicpm.py,sha256=Mj-dbhfN7li7cTEP-0sV7i5PSYkMGIaYCqRU7eDc-BY,13837
77
- sglang/srt/models/mistral.py,sha256=jlrWBVNXbAUziAaIdHAjFcOJnKtn9Bl8rBd65ypJM-I,819
78
- sglang/srt/models/mixtral.py,sha256=QiswCUdZ4VwMghtrr_vGP_dkzxSCrcUIcBgjlOZh_Ao,21391
79
- sglang/srt/models/mixtral_quant.py,sha256=I1sIdistZHw7GO35qvlteA16DGVtME5rvEVV86v0-7Y,14216
80
- sglang/srt/models/qwen.py,sha256=xAtlWyhMkcfwocRqzZoH01qKbkohXxAf4tnkPh0xtpM,10000
81
- sglang/srt/models/qwen2.py,sha256=mXlVd6UTCXY3VdgodFpQnlaY-NYLIbA-SknxdA9R13w,12278
82
- sglang/srt/models/qwen2_moe.py,sha256=YYdJEezic7GyW-_bXlNIaqBa0C4IHQpz_vuRBLxms4k,18141
83
- sglang/srt/models/stablelm.py,sha256=b3d-ZwLQoLjZ6CupnkIq7d-z9tzGSxAyIcgSmZiZxZw,11362
84
- sglang/srt/models/yivl.py,sha256=p4s_D_m4H2exP4b91Y-CTkq8T-eIG3DJsFy9pB0e7TM,4932
85
- sglang/srt/openai_api/adapter.py,sha256=h6TIU0Fu3jU361pye4J12vcDug7UJJRPiBAY_HfFUuE,32599
86
- sglang/srt/openai_api/protocol.py,sha256=JXLnnQ63I-bJv93ICPfP0cBpyomQA5IYE_mkUg5X4Es,8177
87
- sglang/test/run_eval.py,sha256=WvMLSi70G9fhruP8cPLOfDJ9XEKL7yNn2pylx-7tNsQ,3054
88
- sglang/test/simple_eval_common.py,sha256=Qh1-iEXJCKfJmgpAzNSp28fcP1TUJzt3s9i1FjvemHY,12340
89
- sglang/test/simple_eval_humaneval.py,sha256=IW0ZC6D4SXu06IJiMoAY9DK9SMsTOlDPAwu4cfbJco0,5826
90
- sglang/test/simple_eval_mmlu.py,sha256=KqSSdSu2qfoKQ870ttxev1NJ7c90xv2mvKOQsSODtAw,4326
91
- sglang/test/test_programs.py,sha256=e9_ifoIvuI1Ctkbkz3wfdZLBBSRikby8ywcodBIkf9M,13826
92
- sglang/test/test_utils.py,sha256=PndOL1zdseMrpHTHGmgsHHepxqYBn__eNLrlsSXLy6k,11905
93
- sglang-0.2.9.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
94
- sglang-0.2.9.post1.dist-info/METADATA,sha256=BY728JRrlJkEdmiLJAvi_qfuUY2LDBqcKqqtooU9dxk,33281
95
- sglang-0.2.9.post1.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
96
- sglang-0.2.9.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
97
- sglang-0.2.9.post1.dist-info/RECORD,,