sglang 0.4.3__py3-none-any.whl → 0.4.3.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/lang/backend/openai.py +5 -0
- sglang/lang/chat_template.py +22 -7
- sglang/lang/ir.py +1 -0
- sglang/srt/configs/__init__.py +6 -3
- sglang/srt/configs/model_config.py +2 -0
- sglang/srt/configs/qwen2_5_vl_config.py +1003 -0
- sglang/srt/entrypoints/engine.py +17 -2
- sglang/srt/hf_transformers_utils.py +2 -3
- sglang/srt/layers/attention/flashinfer_backend.py +101 -30
- sglang/srt/layers/moe/fused_moe_triton/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
- sglang/srt/layers/quantization/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/layers/quantization/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128, 128].json +26 -0
- sglang/srt/managers/image_processor.py +217 -122
- sglang/srt/managers/schedule_batch.py +1 -0
- sglang/srt/model_executor/forward_batch_info.py +4 -1
- sglang/srt/model_executor/model_runner.py +1 -0
- sglang/srt/models/deepseek_nextn.py +295 -0
- sglang/srt/models/deepseek_v2.py +9 -3
- sglang/srt/models/llava.py +2 -1
- sglang/srt/models/qwen2_5_vl.py +722 -0
- sglang/srt/models/qwen2_vl.py +2 -1
- sglang/srt/openai_api/adapter.py +17 -3
- sglang/srt/server_args.py +6 -3
- sglang/srt/speculative/eagle_worker.py +7 -2
- sglang/srt/speculative/spec_info.py +11 -1
- sglang/utils.py +99 -19
- sglang/version.py +1 -1
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/METADATA +3 -3
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/RECORD +43 -27
- sglang/srt/configs/qwen2vl.py +0 -130
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/LICENSE +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/WHEEL +0 -0
- {sglang-0.4.3.dist-info → sglang-0.4.3.post2.dist-info}/top_level.txt +0 -0
sglang/srt/entrypoints/engine.py
CHANGED
@@ -115,6 +115,9 @@ class Engine:
|
|
115
115
|
sampling_params: Optional[Union[List[Dict], Dict]] = None,
|
116
116
|
# The token ids for text; one can either specify text or input_ids.
|
117
117
|
input_ids: Optional[Union[List[List[int]], List[int]]] = None,
|
118
|
+
# The image input. It can be a file name, a url, or base64 encoded string.
|
119
|
+
# See also python/sglang/srt/utils.py:load_image.
|
120
|
+
image_data: Optional[Union[List[str], str]] = None,
|
118
121
|
return_logprob: Optional[Union[List[bool], bool]] = False,
|
119
122
|
logprob_start_len: Optional[Union[List[int], int]] = None,
|
120
123
|
top_logprobs_num: Optional[Union[List[int], int]] = None,
|
@@ -126,14 +129,20 @@ class Engine:
|
|
126
129
|
The arguments of this function is the same as `sglang/srt/managers/io_struct.py::GenerateReqInput`.
|
127
130
|
Please refer to `GenerateReqInput` for the documentation.
|
128
131
|
"""
|
132
|
+
modalities_list = []
|
133
|
+
if image_data is not None:
|
134
|
+
modalities_list.append("image")
|
135
|
+
|
129
136
|
obj = GenerateReqInput(
|
130
137
|
text=prompt,
|
131
138
|
input_ids=input_ids,
|
132
139
|
sampling_params=sampling_params,
|
140
|
+
image_data=image_data,
|
133
141
|
return_logprob=return_logprob,
|
134
142
|
logprob_start_len=logprob_start_len,
|
135
143
|
top_logprobs_num=top_logprobs_num,
|
136
144
|
lora_path=lora_path,
|
145
|
+
modalities=modalities_list,
|
137
146
|
custom_logit_processor=custom_logit_processor,
|
138
147
|
stream=stream,
|
139
148
|
)
|
@@ -162,6 +171,9 @@ class Engine:
|
|
162
171
|
sampling_params: Optional[Union[List[Dict], Dict]] = None,
|
163
172
|
# The token ids for text; one can either specify text or input_ids.
|
164
173
|
input_ids: Optional[Union[List[List[int]], List[int]]] = None,
|
174
|
+
# The image input. It can be a file name, a url, or base64 encoded string.
|
175
|
+
# See also python/sglang/srt/utils.py:load_image.
|
176
|
+
image_data: Optional[Union[List[str], str]] = None,
|
165
177
|
return_logprob: Optional[Union[List[bool], bool]] = False,
|
166
178
|
logprob_start_len: Optional[Union[List[int], int]] = None,
|
167
179
|
top_logprobs_num: Optional[Union[List[int], int]] = None,
|
@@ -177,6 +189,7 @@ class Engine:
|
|
177
189
|
text=prompt,
|
178
190
|
input_ids=input_ids,
|
179
191
|
sampling_params=sampling_params,
|
192
|
+
image_data=image_data,
|
180
193
|
return_logprob=return_logprob,
|
181
194
|
logprob_start_len=logprob_start_len,
|
182
195
|
top_logprobs_num=top_logprobs_num,
|
@@ -317,7 +330,7 @@ def _set_envs_and_config(server_args: ServerArgs):
|
|
317
330
|
if server_args.attention_backend == "flashinfer":
|
318
331
|
assert_pkg_version(
|
319
332
|
"flashinfer_python",
|
320
|
-
"0.2.1.
|
333
|
+
"0.2.1.post2",
|
321
334
|
"Please uninstall the old version and "
|
322
335
|
"reinstall the latest version by following the instructions "
|
323
336
|
"at https://docs.flashinfer.ai/installation.html.",
|
@@ -425,7 +438,9 @@ def _launch_subprocesses(server_args: ServerArgs) -> Tuple[TokenizerManager, Dic
|
|
425
438
|
# Launch tokenizer process
|
426
439
|
tokenizer_manager = TokenizerManager(server_args, port_args)
|
427
440
|
if server_args.chat_template:
|
428
|
-
load_chat_template_for_openai_api(
|
441
|
+
load_chat_template_for_openai_api(
|
442
|
+
tokenizer_manager, server_args.chat_template, server_args.model_path
|
443
|
+
)
|
429
444
|
|
430
445
|
# Wait for the model to finish loading
|
431
446
|
scheduler_infos = []
|
@@ -30,16 +30,15 @@ from transformers import (
|
|
30
30
|
)
|
31
31
|
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
|
32
32
|
|
33
|
-
from sglang.srt.configs import ChatGLMConfig, DbrxConfig, ExaoneConfig,
|
33
|
+
from sglang.srt.configs import ChatGLMConfig, DbrxConfig, ExaoneConfig, Qwen2_5_VLConfig
|
34
34
|
|
35
35
|
_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
|
36
36
|
ChatGLMConfig.model_type: ChatGLMConfig,
|
37
37
|
DbrxConfig.model_type: DbrxConfig,
|
38
38
|
ExaoneConfig.model_type: ExaoneConfig,
|
39
|
-
|
39
|
+
Qwen2_5_VLConfig.model_type: Qwen2_5_VLConfig,
|
40
40
|
}
|
41
41
|
|
42
|
-
|
43
42
|
for name, cls in _CONFIG_REGISTRY.items():
|
44
43
|
with contextlib.suppress(ValueError):
|
45
44
|
AutoConfig.register(name, cls)
|
@@ -54,7 +54,9 @@ class DecodeMetadata:
|
|
54
54
|
|
55
55
|
@dataclass
|
56
56
|
class PrefillMetadata:
|
57
|
-
prefill_wrappers: List[
|
57
|
+
prefill_wrappers: List[
|
58
|
+
Union[BatchPrefillWithPagedKVCacheWrapper, BatchMLAPagedAttentionWrapper]
|
59
|
+
]
|
58
60
|
use_ragged: bool
|
59
61
|
extend_no_prefix: bool
|
60
62
|
|
@@ -160,16 +162,36 @@ class FlashInferAttnBackend(AttentionBackend):
|
|
160
162
|
self.decode_wrappers = []
|
161
163
|
for _ in range(self.num_wrappers):
|
162
164
|
if not skip_prefill:
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
165
|
+
if (
|
166
|
+
self.enable_flashinfer_mla
|
167
|
+
and not global_server_args_dict["disable_radix_cache"]
|
168
|
+
):
|
169
|
+
# use mla paged prefill
|
170
|
+
self.prefill_wrappers_paged.append(
|
171
|
+
BatchMLAPagedAttentionWrapper(
|
172
|
+
self.workspace_buffer,
|
173
|
+
backend="fa2",
|
174
|
+
)
|
175
|
+
)
|
176
|
+
self.prefill_wrappers_verify.append(
|
177
|
+
BatchMLAPagedAttentionWrapper(
|
178
|
+
self.workspace_buffer,
|
179
|
+
backend="fa2",
|
180
|
+
)
|
181
|
+
)
|
182
|
+
else:
|
183
|
+
self.prefill_wrappers_paged.append(
|
184
|
+
BatchPrefillWithPagedKVCacheWrapper(
|
185
|
+
self.workspace_buffer,
|
186
|
+
"NHD",
|
187
|
+
backend="fa2",
|
188
|
+
)
|
189
|
+
)
|
190
|
+
self.prefill_wrappers_verify.append(
|
191
|
+
BatchPrefillWithPagedKVCacheWrapper(
|
192
|
+
self.workspace_buffer, "NHD"
|
193
|
+
)
|
168
194
|
)
|
169
|
-
)
|
170
|
-
self.prefill_wrappers_verify.append(
|
171
|
-
BatchPrefillWithPagedKVCacheWrapper(self.workspace_buffer, "NHD")
|
172
|
-
)
|
173
195
|
if self.enable_flashinfer_mla:
|
174
196
|
self.decode_wrappers.append(
|
175
197
|
BatchMLAPagedAttentionWrapper(self.workspace_buffer, backend="fa2")
|
@@ -237,7 +259,10 @@ class FlashInferAttnBackend(AttentionBackend):
|
|
237
259
|
else:
|
238
260
|
prefix_lens = forward_batch.extend_prefix_lens
|
239
261
|
|
240
|
-
if self.is_multimodal
|
262
|
+
if self.is_multimodal or (
|
263
|
+
self.enable_flashinfer_mla
|
264
|
+
and not global_server_args_dict["disable_radix_cache"]
|
265
|
+
):
|
241
266
|
use_ragged = False
|
242
267
|
extend_no_prefix = False
|
243
268
|
else:
|
@@ -419,23 +444,43 @@ class FlashInferAttnBackend(AttentionBackend):
|
|
419
444
|
|
420
445
|
logits_soft_cap = layer.logit_cap
|
421
446
|
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
447
|
+
if global_server_args_dict["disable_radix_cache"]:
|
448
|
+
# use mla ragged prefill
|
449
|
+
o, _ = self.prefill_wrapper_ragged.forward_return_lse(
|
450
|
+
q.view(-1, layer.tp_q_head_num, layer.head_dim),
|
451
|
+
k.view(-1, layer.tp_k_head_num, layer.head_dim),
|
452
|
+
v.view(-1, layer.tp_v_head_num, layer.v_head_dim),
|
453
|
+
causal=True,
|
454
|
+
sm_scale=layer.scaling,
|
455
|
+
logits_soft_cap=logits_soft_cap,
|
456
|
+
)
|
432
457
|
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
458
|
+
if save_kv_cache:
|
459
|
+
forward_batch.token_to_kv_pool.set_kv_buffer(
|
460
|
+
layer,
|
461
|
+
cache_loc,
|
462
|
+
k,
|
463
|
+
v,
|
464
|
+
)
|
465
|
+
else:
|
466
|
+
# use mla paged prefill
|
467
|
+
prefill_wrapper_paged = self.forward_metadata.prefill_wrappers[
|
468
|
+
self._get_wrapper_idx(layer)
|
469
|
+
]
|
470
|
+
if k is not None:
|
471
|
+
assert v is not None
|
472
|
+
if save_kv_cache:
|
473
|
+
forward_batch.token_to_kv_pool.set_kv_buffer(
|
474
|
+
layer, cache_loc, k, v
|
475
|
+
)
|
476
|
+
qall = q.view(-1, layer.tp_q_head_num, layer.head_dim)
|
477
|
+
k_buf = forward_batch.token_to_kv_pool.get_key_buffer(layer.layer_id)
|
478
|
+
|
479
|
+
o = prefill_wrapper_paged.run(
|
480
|
+
qall[:, :, : layer.v_head_dim],
|
481
|
+
qall[:, :, layer.v_head_dim :],
|
482
|
+
k_buf[:, :, : layer.v_head_dim],
|
483
|
+
k_buf[:, :, layer.v_head_dim :],
|
439
484
|
)
|
440
485
|
|
441
486
|
return o.view(-1, layer.tp_q_head_num * layer.v_head_dim)
|
@@ -800,7 +845,9 @@ class FlashInferIndicesUpdaterPrefill:
|
|
800
845
|
seq_lens: torch.Tensor,
|
801
846
|
seq_lens_sum: int,
|
802
847
|
prefix_lens: torch.Tensor,
|
803
|
-
prefill_wrappers: List[
|
848
|
+
prefill_wrappers: List[
|
849
|
+
Union[BatchPrefillWithPagedKVCacheWrapper, BatchMLAPagedAttentionWrapper]
|
850
|
+
],
|
804
851
|
use_ragged: bool,
|
805
852
|
encoder_lens: Optional[torch.Tensor],
|
806
853
|
spec_info: Optional[SpecInfo],
|
@@ -814,7 +861,9 @@ class FlashInferIndicesUpdaterPrefill:
|
|
814
861
|
seq_lens: torch.Tensor,
|
815
862
|
seq_lens_sum: int,
|
816
863
|
prefix_lens: torch.Tensor,
|
817
|
-
prefill_wrappers: List[
|
864
|
+
prefill_wrappers: List[
|
865
|
+
Union[BatchPrefillWithPagedKVCacheWrapper, BatchMLAPagedAttentionWrapper]
|
866
|
+
],
|
818
867
|
use_ragged: bool,
|
819
868
|
encoder_lens: Optional[torch.Tensor],
|
820
869
|
spec_info: Optional[SpecInfo],
|
@@ -923,7 +972,9 @@ class FlashInferIndicesUpdaterPrefill:
|
|
923
972
|
def call_begin_forward(
|
924
973
|
self,
|
925
974
|
wrapper_ragged: BatchPrefillWithRaggedKVCacheWrapper,
|
926
|
-
wrapper_paged:
|
975
|
+
wrapper_paged: Union[
|
976
|
+
BatchPrefillWithPagedKVCacheWrapper, BatchMLAPagedAttentionWrapper
|
977
|
+
],
|
927
978
|
req_pool_indices: torch.Tensor,
|
928
979
|
paged_kernel_lens: torch.Tensor,
|
929
980
|
paged_kernel_lens_sum: int,
|
@@ -1004,6 +1055,26 @@ class FlashInferIndicesUpdaterPrefill:
|
|
1004
1055
|
custom_mask=custom_mask,
|
1005
1056
|
non_blocking=True,
|
1006
1057
|
)
|
1058
|
+
elif (
|
1059
|
+
global_config.enable_flashinfer_mla
|
1060
|
+
and not global_server_args_dict["disable_radix_cache"]
|
1061
|
+
):
|
1062
|
+
# mla paged prefill
|
1063
|
+
kv_len_arr = kv_indptr[1:] - kv_indptr[:-1]
|
1064
|
+
wrapper_paged.plan(
|
1065
|
+
qo_indptr,
|
1066
|
+
kv_indptr,
|
1067
|
+
kv_indices,
|
1068
|
+
kv_len_arr,
|
1069
|
+
self.num_qo_heads,
|
1070
|
+
512,
|
1071
|
+
64,
|
1072
|
+
1,
|
1073
|
+
True,
|
1074
|
+
1 / math.sqrt(192),
|
1075
|
+
self.data_type,
|
1076
|
+
self.data_type,
|
1077
|
+
)
|
1007
1078
|
|
1008
1079
|
|
1009
1080
|
class FlashInferMultiStepDraftBackend:
|
@@ -0,0 +1,146 @@
|
|
1
|
+
{
|
2
|
+
"1": {
|
3
|
+
"BLOCK_SIZE_M": 64,
|
4
|
+
"BLOCK_SIZE_N": 64,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 16,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 3
|
9
|
+
},
|
10
|
+
"2": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 64,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 16,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 3
|
17
|
+
},
|
18
|
+
"4": {
|
19
|
+
"BLOCK_SIZE_M": 16,
|
20
|
+
"BLOCK_SIZE_N": 128,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 16,
|
23
|
+
"num_warps": 4,
|
24
|
+
"num_stages": 3
|
25
|
+
},
|
26
|
+
"8": {
|
27
|
+
"BLOCK_SIZE_M": 64,
|
28
|
+
"BLOCK_SIZE_N": 64,
|
29
|
+
"BLOCK_SIZE_K": 128,
|
30
|
+
"GROUP_SIZE_M": 32,
|
31
|
+
"num_warps": 4,
|
32
|
+
"num_stages": 3
|
33
|
+
},
|
34
|
+
"16": {
|
35
|
+
"BLOCK_SIZE_M": 64,
|
36
|
+
"BLOCK_SIZE_N": 128,
|
37
|
+
"BLOCK_SIZE_K": 128,
|
38
|
+
"GROUP_SIZE_M": 16,
|
39
|
+
"num_warps": 4,
|
40
|
+
"num_stages": 3
|
41
|
+
},
|
42
|
+
"24": {
|
43
|
+
"BLOCK_SIZE_M": 64,
|
44
|
+
"BLOCK_SIZE_N": 64,
|
45
|
+
"BLOCK_SIZE_K": 128,
|
46
|
+
"GROUP_SIZE_M": 1,
|
47
|
+
"num_warps": 4,
|
48
|
+
"num_stages": 3
|
49
|
+
},
|
50
|
+
"32": {
|
51
|
+
"BLOCK_SIZE_M": 64,
|
52
|
+
"BLOCK_SIZE_N": 64,
|
53
|
+
"BLOCK_SIZE_K": 128,
|
54
|
+
"GROUP_SIZE_M": 64,
|
55
|
+
"num_warps": 4,
|
56
|
+
"num_stages": 3
|
57
|
+
},
|
58
|
+
"48": {
|
59
|
+
"BLOCK_SIZE_M": 64,
|
60
|
+
"BLOCK_SIZE_N": 64,
|
61
|
+
"BLOCK_SIZE_K": 128,
|
62
|
+
"GROUP_SIZE_M": 1,
|
63
|
+
"num_warps": 4,
|
64
|
+
"num_stages": 3
|
65
|
+
},
|
66
|
+
"64": {
|
67
|
+
"BLOCK_SIZE_M": 64,
|
68
|
+
"BLOCK_SIZE_N": 128,
|
69
|
+
"BLOCK_SIZE_K": 128,
|
70
|
+
"GROUP_SIZE_M": 16,
|
71
|
+
"num_warps": 4,
|
72
|
+
"num_stages": 3
|
73
|
+
},
|
74
|
+
"96": {
|
75
|
+
"BLOCK_SIZE_M": 64,
|
76
|
+
"BLOCK_SIZE_N": 128,
|
77
|
+
"BLOCK_SIZE_K": 128,
|
78
|
+
"GROUP_SIZE_M": 16,
|
79
|
+
"num_warps": 4,
|
80
|
+
"num_stages": 3
|
81
|
+
},
|
82
|
+
"128": {
|
83
|
+
"BLOCK_SIZE_M": 64,
|
84
|
+
"BLOCK_SIZE_N": 128,
|
85
|
+
"BLOCK_SIZE_K": 128,
|
86
|
+
"GROUP_SIZE_M": 16,
|
87
|
+
"num_warps": 4,
|
88
|
+
"num_stages": 3
|
89
|
+
},
|
90
|
+
"256": {
|
91
|
+
"BLOCK_SIZE_M": 64,
|
92
|
+
"BLOCK_SIZE_N": 128,
|
93
|
+
"BLOCK_SIZE_K": 128,
|
94
|
+
"GROUP_SIZE_M": 32,
|
95
|
+
"num_warps": 4,
|
96
|
+
"num_stages": 3
|
97
|
+
},
|
98
|
+
"512": {
|
99
|
+
"BLOCK_SIZE_M": 64,
|
100
|
+
"BLOCK_SIZE_N": 128,
|
101
|
+
"BLOCK_SIZE_K": 128,
|
102
|
+
"GROUP_SIZE_M": 16,
|
103
|
+
"num_warps": 4,
|
104
|
+
"num_stages": 3
|
105
|
+
},
|
106
|
+
"1024": {
|
107
|
+
"BLOCK_SIZE_M": 64,
|
108
|
+
"BLOCK_SIZE_N": 128,
|
109
|
+
"BLOCK_SIZE_K": 128,
|
110
|
+
"GROUP_SIZE_M": 64,
|
111
|
+
"num_warps": 4,
|
112
|
+
"num_stages": 3
|
113
|
+
},
|
114
|
+
"1536": {
|
115
|
+
"BLOCK_SIZE_M": 64,
|
116
|
+
"BLOCK_SIZE_N": 128,
|
117
|
+
"BLOCK_SIZE_K": 128,
|
118
|
+
"GROUP_SIZE_M": 32,
|
119
|
+
"num_warps": 4,
|
120
|
+
"num_stages": 3
|
121
|
+
},
|
122
|
+
"2048": {
|
123
|
+
"BLOCK_SIZE_M": 64,
|
124
|
+
"BLOCK_SIZE_N": 128,
|
125
|
+
"BLOCK_SIZE_K": 128,
|
126
|
+
"GROUP_SIZE_M": 64,
|
127
|
+
"num_warps": 4,
|
128
|
+
"num_stages": 3
|
129
|
+
},
|
130
|
+
"3072": {
|
131
|
+
"BLOCK_SIZE_M": 128,
|
132
|
+
"BLOCK_SIZE_N": 64,
|
133
|
+
"BLOCK_SIZE_K": 128,
|
134
|
+
"GROUP_SIZE_M": 16,
|
135
|
+
"num_warps": 4,
|
136
|
+
"num_stages": 3
|
137
|
+
},
|
138
|
+
"4096": {
|
139
|
+
"BLOCK_SIZE_M": 64,
|
140
|
+
"BLOCK_SIZE_N": 128,
|
141
|
+
"BLOCK_SIZE_K": 128,
|
142
|
+
"GROUP_SIZE_M": 64,
|
143
|
+
"num_warps": 4,
|
144
|
+
"num_stages": 3
|
145
|
+
}
|
146
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"2048": {
|
3
|
+
"BLOCK_SIZE_M": 64,
|
4
|
+
"BLOCK_SIZE_N": 64,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 64,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 3
|
9
|
+
},
|
10
|
+
"3072": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 64,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 32,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 3
|
17
|
+
},
|
18
|
+
"4096": {
|
19
|
+
"BLOCK_SIZE_M": 64,
|
20
|
+
"BLOCK_SIZE_N": 128,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 32,
|
23
|
+
"num_warps": 4,
|
24
|
+
"num_stages": 3
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"2048": {
|
3
|
+
"BLOCK_SIZE_M": 64,
|
4
|
+
"BLOCK_SIZE_N": 64,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 32,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 3
|
9
|
+
},
|
10
|
+
"3072": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 64,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 1,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 3
|
17
|
+
},
|
18
|
+
"4096": {
|
19
|
+
"BLOCK_SIZE_M": 64,
|
20
|
+
"BLOCK_SIZE_N": 128,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 64,
|
23
|
+
"num_warps": 4,
|
24
|
+
"num_stages": 3
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"2048": {
|
3
|
+
"BLOCK_SIZE_M": 128,
|
4
|
+
"BLOCK_SIZE_N": 64,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 1,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 3
|
9
|
+
},
|
10
|
+
"3072": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 128,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 64,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 2
|
17
|
+
},
|
18
|
+
"4096": {
|
19
|
+
"BLOCK_SIZE_M": 64,
|
20
|
+
"BLOCK_SIZE_N": 128,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 64,
|
23
|
+
"num_warps": 4,
|
24
|
+
"num_stages": 3
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"2048": {
|
3
|
+
"BLOCK_SIZE_M": 64,
|
4
|
+
"BLOCK_SIZE_N": 64,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 32,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 3
|
9
|
+
},
|
10
|
+
"3072": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 64,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 1,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 3
|
17
|
+
},
|
18
|
+
"4096": {
|
19
|
+
"BLOCK_SIZE_M": 64,
|
20
|
+
"BLOCK_SIZE_N": 128,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 16,
|
23
|
+
"num_warps": 4,
|
24
|
+
"num_stages": 3
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"2048": {
|
3
|
+
"BLOCK_SIZE_M": 64,
|
4
|
+
"BLOCK_SIZE_N": 128,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 32,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 3
|
9
|
+
},
|
10
|
+
"3072": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 128,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 32,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 3
|
17
|
+
},
|
18
|
+
"4096": {
|
19
|
+
"BLOCK_SIZE_M": 64,
|
20
|
+
"BLOCK_SIZE_N": 128,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 32,
|
23
|
+
"num_warps": 4,
|
24
|
+
"num_stages": 3
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"2048": {
|
3
|
+
"BLOCK_SIZE_M": 64,
|
4
|
+
"BLOCK_SIZE_N": 64,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 64,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 4
|
9
|
+
},
|
10
|
+
"3072": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 64,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 1,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 3
|
17
|
+
},
|
18
|
+
"4096": {
|
19
|
+
"BLOCK_SIZE_M": 64,
|
20
|
+
"BLOCK_SIZE_N": 128,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 1,
|
23
|
+
"num_warps": 8,
|
24
|
+
"num_stages": 5
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"2048": {
|
3
|
+
"BLOCK_SIZE_M": 64,
|
4
|
+
"BLOCK_SIZE_N": 128,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 1,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 3
|
9
|
+
},
|
10
|
+
"3072": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 128,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 1,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 3
|
17
|
+
},
|
18
|
+
"4096": {
|
19
|
+
"BLOCK_SIZE_M": 64,
|
20
|
+
"BLOCK_SIZE_N": 128,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 1,
|
23
|
+
"num_warps": 4,
|
24
|
+
"num_stages": 3
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"2048": {
|
3
|
+
"BLOCK_SIZE_M": 64,
|
4
|
+
"BLOCK_SIZE_N": 64,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 64,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 2
|
9
|
+
},
|
10
|
+
"3072": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 128,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 1,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 3
|
17
|
+
},
|
18
|
+
"4096": {
|
19
|
+
"BLOCK_SIZE_M": 64,
|
20
|
+
"BLOCK_SIZE_N": 64,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 64,
|
23
|
+
"num_warps": 4,
|
24
|
+
"num_stages": 3
|
25
|
+
}
|
26
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"2048": {
|
3
|
+
"BLOCK_SIZE_M": 64,
|
4
|
+
"BLOCK_SIZE_N": 128,
|
5
|
+
"BLOCK_SIZE_K": 128,
|
6
|
+
"GROUP_SIZE_M": 16,
|
7
|
+
"num_warps": 4,
|
8
|
+
"num_stages": 3
|
9
|
+
},
|
10
|
+
"3072": {
|
11
|
+
"BLOCK_SIZE_M": 64,
|
12
|
+
"BLOCK_SIZE_N": 128,
|
13
|
+
"BLOCK_SIZE_K": 128,
|
14
|
+
"GROUP_SIZE_M": 1,
|
15
|
+
"num_warps": 4,
|
16
|
+
"num_stages": 3
|
17
|
+
},
|
18
|
+
"4096": {
|
19
|
+
"BLOCK_SIZE_M": 64,
|
20
|
+
"BLOCK_SIZE_N": 128,
|
21
|
+
"BLOCK_SIZE_K": 128,
|
22
|
+
"GROUP_SIZE_M": 1,
|
23
|
+
"num_warps": 4,
|
24
|
+
"num_stages": 2
|
25
|
+
}
|
26
|
+
}
|